mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2026-06-13 23:36:45 +03:00
fix(s3): list empty directories as directory markers (#9615)
* fix(s3): list empty directories as directory markers A real but empty directory created out of band (mount, mkdir, filer API) carries no MIME, so it was hidden from S3 listings. hadoop-aws getFileStatus probes LIST prefix=dir/ &delimiter=/ and reads an empty result as a missing path, which breaks Spark's eventLog.dir when it points at an empty directory. Surface such directories as directory markers, matching directories created via PutObject with a trailing "/". Emptiness comes from the recursion result, and the marker MIME is set only on the in-memory listing entry, so empty directories stay eligible for empty-folder cleanup. * fix(s3): only surface empty directory markers for explicit dir probes Restrict the empty-directory marker to a trailing-slash prefix probe (prefix=dir/), the pattern hadoop-aws getFileStatus uses. Plain listings are left as before, so an empty directory left behind by deleted objects (e.g. after lifecycle expiration) is no longer shown as a phantom key.
This commit is contained in:
@@ -677,22 +677,41 @@ func (s3a *S3ApiServer) doListFilerEntries(client filer_pb.SeaweedFilerClient, d
|
|||||||
}
|
}
|
||||||
|
|
||||||
if delimiter != "/" || cursor.prefixEndsOnDelimiter {
|
if delimiter != "/" || cursor.prefixEndsOnDelimiter {
|
||||||
|
// A trailing-slash prefix (e.g. "logs/") names one directory and asks
|
||||||
|
// whether it exists, so a real but empty directory must be reported for
|
||||||
|
// that probe.
|
||||||
|
explicitDirProbe := cursor.prefixEndsOnDelimiter
|
||||||
if cursor.prefixEndsOnDelimiter {
|
if cursor.prefixEndsOnDelimiter {
|
||||||
cursor.prefixEndsOnDelimiter = false
|
cursor.prefixEndsOnDelimiter = false
|
||||||
if entry.IsDirectoryKeyObject() {
|
}
|
||||||
eachEntryFn(dir, entry)
|
isKeyObject := entry.IsDirectoryKeyObject()
|
||||||
}
|
if isKeyObject {
|
||||||
} else if entry.IsDirectoryKeyObject() {
|
|
||||||
// Directory key objects (created via PutObject with trailing "/")
|
// Directory key objects (created via PutObject with trailing "/")
|
||||||
// must appear as regular keys in recursive listing mode.
|
// must appear as regular keys in recursive listing mode.
|
||||||
eachEntryFn(dir, entry)
|
eachEntryFn(dir, entry)
|
||||||
}
|
}
|
||||||
// Recurse into subdirectory to list any children
|
// Recurse into subdirectory to list any children, noting whether the
|
||||||
subNextMarker, subErr := s3a.doListFilerEntries(client, dir+"/"+entry.Name, "", cursor, "", delimiter, false, bucket, eachEntryFn)
|
// subtree produced any entries.
|
||||||
|
childEmitted := false
|
||||||
|
subNextMarker, subErr := s3a.doListFilerEntries(client, dir+"/"+entry.Name, "", cursor, "", delimiter, false, bucket, func(d string, e *filer_pb.Entry) {
|
||||||
|
childEmitted = true
|
||||||
|
eachEntryFn(d, e)
|
||||||
|
})
|
||||||
if subErr != nil {
|
if subErr != nil {
|
||||||
err = fmt.Errorf("doListFilerEntries2: %w", subErr)
|
err = fmt.Errorf("doListFilerEntries2: %w", subErr)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
// A real but empty directory (created out of band via mount, mkdir or
|
||||||
|
// the filer API, so it carries no MIME) is otherwise invisible to S3
|
||||||
|
// clients that detect directories by listing it under its own "<dir>/"
|
||||||
|
// prefix. Surface it as a directory marker for that explicit probe,
|
||||||
|
// identical to a directory created via PutObject with a trailing "/", so
|
||||||
|
// tools like hadoop-aws can find it. Plain listings are left untouched, so
|
||||||
|
// empty directories left behind by deleted objects are not shown as keys.
|
||||||
|
if explicitDirProbe && !isKeyObject && !childEmitted && !cursor.isTruncated && entry.Attributes != nil {
|
||||||
|
entry.Attributes.Mime = s3_constants.FolderMimeType
|
||||||
|
eachEntryFn(dir, entry)
|
||||||
|
}
|
||||||
// println("doListFilerEntries2 dir", dir+"/"+entry.Name, "subNextMarker", subNextMarker)
|
// println("doListFilerEntries2 dir", dir+"/"+entry.Name, "subNextMarker", subNextMarker)
|
||||||
nextMarker = entry.Name + "/" + subNextMarker
|
nextMarker = entry.Name + "/" + subNextMarker
|
||||||
if cursor.isTruncated {
|
if cursor.isTruncated {
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
||||||
|
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -61,3 +62,122 @@ func TestDirectoryListedAsCommonPrefix(t *testing.T) {
|
|||||||
"Directory should be passed to callback for CommonPrefix processing with delimiter=/")
|
"Directory should be passed to callback for CommonPrefix processing with delimiter=/")
|
||||||
assert.Empty(t, seenFiles, "No files should be seen, only the directory")
|
assert.Empty(t, seenFiles, "No files should be seen, only the directory")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestEmptyDirectorySurfacedAsMarker reproduces the hadoop-aws / Spark case where a
|
||||||
|
// real but empty directory (created via mount, mkdir or the filer API, so it has no
|
||||||
|
// MIME) must be visible to S3 clients that detect directories by listing under the
|
||||||
|
// "<dir>/" prefix. Such a directory is surfaced as a directory marker, identical to
|
||||||
|
// one created via PutObject with a trailing "/".
|
||||||
|
func TestEmptyDirectorySurfacedAsMarker(t *testing.T) {
|
||||||
|
s3a := &S3ApiServer{
|
||||||
|
option: &S3ApiServerOption{BucketsPath: "/buckets"},
|
||||||
|
}
|
||||||
|
|
||||||
|
emptyDir := &filer_pb.Entry{
|
||||||
|
Name: "logs",
|
||||||
|
IsDirectory: true,
|
||||||
|
Attributes: &filer_pb.FuseAttributes{Mime: ""},
|
||||||
|
}
|
||||||
|
client := &testFilerClient{
|
||||||
|
entriesByDir: map[string][]*filer_pb.Entry{
|
||||||
|
"/buckets/test": {emptyDir},
|
||||||
|
"/buckets/test/logs": {}, // empty directory has no children
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mirrors the getFileStatus probe: prefix "logs/" with delimiter "/".
|
||||||
|
cursor := &ListingCursor{maxKeys: 1000, prefixEndsOnDelimiter: true}
|
||||||
|
var seen []*filer_pb.Entry
|
||||||
|
_, err := s3a.doListFilerEntries(client, "/buckets/test", "logs", cursor, "", "/", false, "test",
|
||||||
|
func(dir string, entry *filer_pb.Entry) {
|
||||||
|
seen = append(seen, entry)
|
||||||
|
})
|
||||||
|
|
||||||
|
assert.NoError(t, err)
|
||||||
|
if assert.Len(t, seen, 1, "empty directory must be surfaced under the logs/ prefix") {
|
||||||
|
assert.True(t, seen[0].IsDirectoryKeyObject(),
|
||||||
|
"empty directory must be marked as a directory key object so it lists as logs/")
|
||||||
|
assert.Equal(t, s3_constants.FolderMimeType, seen[0].Attributes.Mime)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestNonEmptyDirectoryGetsNoPhantomMarker ensures the empty-directory fix does not add
|
||||||
|
// a spurious marker for directories that already have children; the children represent
|
||||||
|
// the directory.
|
||||||
|
func TestNonEmptyDirectoryGetsNoPhantomMarker(t *testing.T) {
|
||||||
|
s3a := &S3ApiServer{
|
||||||
|
option: &S3ApiServerOption{BucketsPath: "/buckets"},
|
||||||
|
}
|
||||||
|
|
||||||
|
dir := &filer_pb.Entry{
|
||||||
|
Name: "data",
|
||||||
|
IsDirectory: true,
|
||||||
|
Attributes: &filer_pb.FuseAttributes{Mime: ""},
|
||||||
|
}
|
||||||
|
child := &filer_pb.Entry{
|
||||||
|
Name: "a.txt",
|
||||||
|
IsDirectory: false,
|
||||||
|
Attributes: &filer_pb.FuseAttributes{},
|
||||||
|
}
|
||||||
|
client := &testFilerClient{
|
||||||
|
entriesByDir: map[string][]*filer_pb.Entry{
|
||||||
|
"/buckets/test": {dir},
|
||||||
|
"/buckets/test/data": {child},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recursive listing under the "data/" prefix.
|
||||||
|
cursor := &ListingCursor{maxKeys: 1000, prefixEndsOnDelimiter: true}
|
||||||
|
var seen []string
|
||||||
|
_, err := s3a.doListFilerEntries(client, "/buckets/test", "data", cursor, "", "", false, "test",
|
||||||
|
func(dir string, entry *filer_pb.Entry) {
|
||||||
|
seen = append(seen, entry.Name)
|
||||||
|
})
|
||||||
|
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, []string{"a.txt"}, seen, "non-empty directory must be represented by its child only")
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestEmptyDirectoryHiddenInFlatListing ensures the marker is only surfaced for an
|
||||||
|
// explicit "<dir>/" probe, not in a plain (no prefix, no delimiter) listing. An empty
|
||||||
|
// directory left behind by deleted objects (e.g. after lifecycle expiration) must not
|
||||||
|
// appear as a phantom key, matching AWS S3.
|
||||||
|
func TestEmptyDirectoryHiddenInFlatListing(t *testing.T) {
|
||||||
|
s3a := &S3ApiServer{
|
||||||
|
option: &S3ApiServerOption{BucketsPath: "/buckets"},
|
||||||
|
}
|
||||||
|
|
||||||
|
emptyDir := &filer_pb.Entry{
|
||||||
|
Name: "expire1",
|
||||||
|
IsDirectory: true,
|
||||||
|
Attributes: &filer_pb.FuseAttributes{Mime: ""},
|
||||||
|
}
|
||||||
|
keepDir := &filer_pb.Entry{
|
||||||
|
Name: "keep2",
|
||||||
|
IsDirectory: true,
|
||||||
|
Attributes: &filer_pb.FuseAttributes{Mime: ""},
|
||||||
|
}
|
||||||
|
keepObj := &filer_pb.Entry{
|
||||||
|
Name: "foo",
|
||||||
|
IsDirectory: false,
|
||||||
|
Attributes: &filer_pb.FuseAttributes{},
|
||||||
|
}
|
||||||
|
client := &testFilerClient{
|
||||||
|
entriesByDir: map[string][]*filer_pb.Entry{
|
||||||
|
"/buckets/test": {emptyDir, keepDir},
|
||||||
|
"/buckets/test/expire1": {},
|
||||||
|
"/buckets/test/keep2": {keepObj},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
// Plain flat listing: no prefix, no delimiter.
|
||||||
|
cursor := &ListingCursor{maxKeys: 1000}
|
||||||
|
var seen []string
|
||||||
|
_, err := s3a.doListFilerEntries(client, "/buckets/test", "", cursor, "", "", false, "test",
|
||||||
|
func(dir string, entry *filer_pb.Entry) {
|
||||||
|
seen = append(seen, entry.Name)
|
||||||
|
})
|
||||||
|
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, []string{"foo"}, seen, "flat listing must hide the empty directory and return only real objects")
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user