mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2026-06-13 23:36:45 +03:00
8cc10460b4
* fix(remote): reject short reads when caching remote objects A short read from the remote (stale listing size, truncated or flaky response) was silently zero-padded: the S3 and Azure clients pre-size the buffer and discard the downloaded byte count, and the chunk is recorded with the requested size. The cached file then matched the expected size but its tail was NULL, and the entry was marked cached so it never re-fetched. Check the byte count against the requested size in both clients, and add a backend-agnostic guard in FetchAndWriteNeedle. The cache now fails loudly and the entry stays remote-only for a later retry. * fix(remote): match S3 default modes when syncing remote metadata Remote object listings carry no POSIX mode, so synced entries were created with a hardcoded 0644. Against a SeaweedFS remote, whose S3 layer writes objects as 0660 and auto-creates directories as 0771 (0660|0111), the mounted copy ended up 0644/0755 and the permissions visibly diverged from the source. Default to the S3 modes instead (files 0660, directories 0771). The filer derives parent-dir modes from the child as fileMode|0111, so fixing the file default also brings the directories into line. Directory mtimes still reflect sync time: S3 listings don't enumerate directories, so the remote's directory timestamps aren't available.
209 lines
6.8 KiB
Go
209 lines
6.8 KiB
Go
package shell
|
|
|
|
import (
|
|
"context"
|
|
"flag"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/filer"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/remote_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/remote_storage"
|
|
"github.com/seaweedfs/seaweedfs/weed/util"
|
|
)
|
|
|
|
func init() {
|
|
Commands = append(Commands, &commandRemoteMetaSync{})
|
|
}
|
|
|
|
type commandRemoteMetaSync struct {
|
|
}
|
|
|
|
func (c *commandRemoteMetaSync) Name() string {
|
|
return "remote.meta.sync"
|
|
}
|
|
|
|
func (c *commandRemoteMetaSync) Help() string {
|
|
return `synchronize the local file meta data with the remote file metadata
|
|
|
|
# assume a remote storage is configured to name "cloud1"
|
|
remote.configure -name=cloud1 -type=s3 -s3.access_key=xxx -s3.secret_key=yyy
|
|
# mount and pull one bucket
|
|
remote.mount -dir=/xxx -remote=cloud1/bucket
|
|
|
|
After mount, if the remote file can be changed,
|
|
run this command to synchronize the metadata of the mounted folder or any sub folder
|
|
|
|
remote.meta.sync -dir=/xxx
|
|
remote.meta.sync -dir=/xxx/some/subdir
|
|
|
|
This is designed to run regularly. So you can add it to some cronjob.
|
|
|
|
If there are no other operations changing remote files, this operation is not needed.
|
|
|
|
`
|
|
}
|
|
|
|
func (c *commandRemoteMetaSync) HasTag(CommandTag) bool {
|
|
return false
|
|
}
|
|
|
|
func (c *commandRemoteMetaSync) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
|
|
|
|
remoteMetaSyncCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
|
|
|
|
dir := remoteMetaSyncCommand.String("dir", "", "a directory in filer")
|
|
|
|
if err = remoteMetaSyncCommand.Parse(args); err != nil {
|
|
return nil
|
|
}
|
|
|
|
mappings, localMountedDir, remoteStorageMountedLocation, remoteStorageConf, detectErr := detectMountInfo(commandEnv, writer, *dir)
|
|
if detectErr != nil {
|
|
jsonPrintln(writer, mappings)
|
|
return detectErr
|
|
}
|
|
|
|
// pull metadata from remote
|
|
if err = pullMetadata(commandEnv, writer, util.FullPath(localMountedDir), remoteStorageMountedLocation, util.FullPath(*dir), remoteStorageConf); err != nil {
|
|
return fmt.Errorf("cache meta data: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func detectMountInfo(commandEnv *CommandEnv, writer io.Writer, dir string) (*remote_pb.RemoteStorageMapping, string, *remote_pb.RemoteStorageLocation, *remote_pb.RemoteConf, error) {
|
|
return filer.DetectMountInfo(commandEnv.option.GrpcDialOption, commandEnv.option.FilerAddress, dir)
|
|
}
|
|
|
|
/*
|
|
This function update entry.RemoteEntry if the remote has any changes.
|
|
|
|
To pull remote updates, or created for the first time, the criteria is:
|
|
|
|
entry == nil or (entry.RemoteEntry != nil and (entry.RemoteEntry.RemoteTag != remote.RemoteTag or entry.RemoteEntry.RemoteMTime < remote.RemoteMTime ))
|
|
|
|
After the meta pull, the entry.RemoteEntry will have:
|
|
|
|
remoteEntry.LastLocalSyncTsNs == 0
|
|
Attributes.FileSize = uint64(remoteEntry.RemoteSize)
|
|
Attributes.Mtime = remoteEntry.RemoteMtime
|
|
remoteEntry.RemoteTag = actual remote tag
|
|
chunks = nil
|
|
|
|
When reading the file content or pulling the file content in "remote.cache", the criteria is:
|
|
|
|
Attributes.FileSize > 0 and len(chunks) == 0
|
|
|
|
After caching the file content, the entry.RemoteEntry will be
|
|
|
|
remoteEntry.LastLocalSyncTsNs == time.Now.UnixNano()
|
|
Attributes.FileSize = uint64(remoteEntry.RemoteSize)
|
|
Attributes.Mtime = remoteEntry.RemoteMtime
|
|
chunks = non-empty
|
|
|
|
When "weed filer.remote.sync" to upload local changes to remote, the criteria is:
|
|
|
|
Attributes.Mtime > remoteEntry.RemoteMtime
|
|
|
|
Right after "weed filer.remote.sync", the entry.RemoteEntry will be
|
|
|
|
remoteEntry.LastLocalSyncTsNs = time.Now.UnixNano()
|
|
remoteEntry.RemoteSize = actual remote size, which should equal to entry.Attributes.FileSize
|
|
remoteEntry.RemoteMtime = actual remote mtime, which should be a little greater than entry.Attributes.Mtime
|
|
remoteEntry.RemoteTag = actual remote tag
|
|
|
|
If entry does not exist, need to pull meta
|
|
If entry.RemoteEntry == nil, this is a new local change and should not be overwritten
|
|
|
|
If entry.RemoteEntry.RemoteTag != remoteEntry.RemoteTag {
|
|
the remote version is updated, need to pull meta
|
|
}
|
|
*/
|
|
|
|
// remoteEntryFileMode returns the POSIX mode for an entry synced from a remote
|
|
// whose listing carries no mode. It matches what SeaweedFS S3 assigns native
|
|
// objects (files 0660) so a mounted bucket matches the source, deriving the
|
|
// directory mode with the same 0111 traversal mask the filer uses for
|
|
// auto-created parents (0660 -> 0771).
|
|
func remoteEntryFileMode(isDirectory bool) uint32 {
|
|
mode := uint32(0660)
|
|
if isDirectory {
|
|
mode = uint32(os.ModeDir) | mode | 0111
|
|
}
|
|
return mode
|
|
}
|
|
|
|
func pullMetadata(commandEnv *CommandEnv, writer io.Writer, localMountedDir util.FullPath, remoteMountedLocation *remote_pb.RemoteStorageLocation, dirToCache util.FullPath, remoteConf *remote_pb.RemoteConf) error {
|
|
|
|
// visit remote storage
|
|
remoteStorage, err := remote_storage.GetRemoteStorage(remoteConf)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
remote := filer.MapFullPathToRemoteStorageLocation(localMountedDir, remoteMountedLocation, dirToCache)
|
|
|
|
err = commandEnv.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
|
|
ctx := context.Background()
|
|
err = remoteStorage.Traverse(remote, func(remoteDir, name string, isDirectory bool, remoteEntry *filer_pb.RemoteEntry) error {
|
|
localDir := filer.MapRemoteStorageLocationPathToFullPath(localMountedDir, remoteMountedLocation, remoteDir)
|
|
fmt.Fprint(writer, localDir.Child(name))
|
|
|
|
lookupResponse, lookupErr := filer_pb.LookupEntry(context.Background(), client, &filer_pb.LookupDirectoryEntryRequest{
|
|
Directory: string(localDir),
|
|
Name: name,
|
|
})
|
|
var existingEntry *filer_pb.Entry
|
|
if lookupErr != nil {
|
|
if lookupErr != filer_pb.ErrNotFound {
|
|
return lookupErr
|
|
}
|
|
} else {
|
|
existingEntry = lookupResponse.Entry
|
|
}
|
|
|
|
if existingEntry == nil {
|
|
_, createErr := client.CreateEntry(ctx, &filer_pb.CreateEntryRequest{
|
|
Directory: string(localDir),
|
|
Entry: &filer_pb.Entry{
|
|
Name: name,
|
|
IsDirectory: isDirectory,
|
|
Attributes: &filer_pb.FuseAttributes{
|
|
FileSize: uint64(remoteEntry.RemoteSize),
|
|
Mtime: remoteEntry.RemoteMtime,
|
|
FileMode: remoteEntryFileMode(isDirectory),
|
|
TtlSec: 0, // Remote entries should not have TTL
|
|
},
|
|
RemoteEntry: remoteEntry,
|
|
},
|
|
})
|
|
fmt.Fprintln(writer, " (create)")
|
|
return createErr
|
|
} else {
|
|
if existingEntry.RemoteEntry == nil {
|
|
// this is a new local change and should not be overwritten
|
|
fmt.Fprintln(writer, " (skip)")
|
|
return nil
|
|
}
|
|
if existingEntry.RemoteEntry.RemoteETag != remoteEntry.RemoteETag || existingEntry.RemoteEntry.RemoteMtime < remoteEntry.RemoteMtime {
|
|
// the remote version is updated, need to pull meta
|
|
fmt.Fprintln(writer, " (update)")
|
|
return doSaveRemoteEntry(client, string(localDir), existingEntry, remoteEntry)
|
|
}
|
|
}
|
|
fmt.Fprintln(writer, " (skip)")
|
|
return nil
|
|
})
|
|
return err
|
|
})
|
|
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|