Files
seaweedfs/weed/replication/replicator.go
T
Chris Lu e12052ee6b fix(filer.sync): replicate a rename as an atomic move, not a no-op update (#9895)
* fix(filer.sync): replicate a rename as create-then-delete, not an in-place update

A rename arrives as a single metadata event carrying both the old and new
entry. The filer sink was routed to UpdateEntry, which looks up the old
path but issues the update against the new parent without changing the
name — and the filer UpdateEntry RPC cannot move an entry. So the rename
was dropped: the old path lingered and the new path never appeared
(same-dir renames rewrote the old name in place).

Route a real move (the sink path changed) through CreateEntry(new) then
DeleteEntry(old) in both the replicator and the filer.sync/backup driver,
the way the other sinks already handle it; reach UpdateEntry only for true
in-place updates. Create before delete so a crash between the two leaves
the entry visible rather than lost.

* fix(filer.sync): derive the rename delete key like the create key, guard the watched root

The rename delete leg rebuilt the old key with a raw util.Join, bypassing the
sink-side key normalization the create leg gets from buildKey — so a rename
could create the new entry and then fail to delete the old one under a
transformed key. Build the old key through buildKey too, and skip the delete
when the moved entry is the watched root itself (where the old key would
resolve to the target root and recursively delete the whole sink tree).

* test(filer.sync): cover the in-place update delete-then-create fallback order

The recording sinks always reported foundExisting, so the fallback that an
in-place update takes when the entry is missing on the sink was never run.
Make it configurable and assert the fallback deletes before it recreates the
same key, in both the replicator and the filer.sync drivers.

* feat(filer.sync): move filer-sink renames natively via AtomicRenameEntry

create-then-delete is unsafe for the filer sink: CreateEntry returns nil
without creating on a transient chunk-copy error, so the paired delete could
remove the only valid destination copy; a directory rename also deleted the
old subtree before descendants were recreated, and left old chunks behind.

Add an optional EntryMover sink capability and implement it on the filer sink
via AtomicRenameEntry — one atomic, metadata-only move that relocates a whole
subtree in a single transaction. Renames prefer it; sinks without a native
move keep create-then-delete. When the old path is already gone (a descendant
the parent rename moved, or one never replicated) MoveEntry creates the new
path instead, re-checking existence with a lookup so a rolled-back move that
left the old entry intact is retried rather than mistaken for gone.

* docs(filer.sync): note entryMissing's gRPC not-found string fallback is deliberate
2026-06-09 12:54:28 -07:00

187 lines
6.3 KiB
Go

package replication
import (
"context"
"fmt"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/replication/repl_util"
"github.com/seaweedfs/seaweedfs/weed/replication/sink"
"github.com/seaweedfs/seaweedfs/weed/replication/source"
"github.com/seaweedfs/seaweedfs/weed/util"
"google.golang.org/grpc"
)
type Replicator struct {
sink sink.ReplicationSink
source *source.FilerSource
excludeDirs []string
}
func NewReplicator(sourceConfig util.Configuration, configPrefix string, dataSink sink.ReplicationSink) *Replicator {
source := &source.FilerSource{}
source.Initialize(sourceConfig, configPrefix)
if err := repl_util.InitializeSSEForReplication(source); err != nil {
glog.Warningf("SSE initialization failed: %v (encrypted objects may fail to replicate)", err)
}
dataSink.SetSourceFiler(source)
return &Replicator{
sink: dataSink,
source: source,
excludeDirs: sourceConfig.GetStringSlice(configPrefix + "excludeDirectories"),
}
}
func (r *Replicator) Replicate(ctx context.Context, key string, message *filer_pb.EventNotification) error {
if message.IsFromOtherCluster && r.sink.GetName() == "filer" {
return nil
}
oldEntry := message.OldEntry
newEntry := message.NewEntry
newParentPath := message.NewParentPath
oldInSource := util.IsEqualOrUnder(key, r.source.Dir) && !r.isExcluded(key)
// For rename events (both old and new entry present), check both paths
// against the source directory. Convert cross-boundary renames to
// create or delete so the sink stays consistent.
if oldEntry != nil && newEntry != nil {
newFullPath, targetParent := metadataEventTarget(key, newEntry, newParentPath)
newInSource := util.IsEqualOrUnder(newFullPath, r.source.Dir) && !r.isExcluded(newFullPath)
if !oldInSource && !newInSource {
return nil
}
if !oldInSource {
// Rename into watched directory: treat as create
oldEntry = nil
key = newFullPath
newParentPath = targetParent
} else if !newInSource {
// Rename out of watched directory: treat as delete
newEntry = nil
newParentPath = ""
}
} else if !oldInSource {
glog.V(4).Infof("skipping %v outside of %v", key, r.source.Dir)
return nil
}
var dateKey string
if r.sink.IsIncremental() {
var mTime int64
if newEntry != nil {
mTime = newEntry.Attributes.Mtime
} else if oldEntry != nil {
mTime = oldEntry.Attributes.Mtime
}
dateKey = time.Unix(mTime, 0).Format("2006-01-02")
}
oldSinkKey := r.sourceToSinkKey(key, dateKey)
glog.V(3).Infof("replicate %s => %s", key, oldSinkKey)
newSinkKey := oldSinkKey
newSinkParentPath := newParentPath
if oldEntry != nil && newEntry != nil {
targetSourceKey, targetSourceParent := metadataEventTarget(key, newEntry, newParentPath)
newSinkKey = r.sourceToSinkKey(targetSourceKey, dateKey)
newSinkParentPath = r.sourceToSinkPath(targetSourceParent, dateKey)
} else if newParentPath != "" && util.IsEqualOrUnder(newParentPath, r.source.Dir) {
newSinkParentPath = r.sourceToSinkPath(newParentPath, dateKey)
}
if oldEntry != nil && newEntry == nil {
glog.V(4).Infof("deleting %v", oldSinkKey)
return r.sink.DeleteEntry(oldSinkKey, oldEntry.IsDirectory, message.DeleteChunks, message.Signatures)
}
if oldEntry == nil && newEntry != nil {
glog.V(4).Infof("creating %v", oldSinkKey)
return r.sink.CreateEntry(oldSinkKey, newEntry, message.Signatures)
}
if oldEntry == nil && newEntry == nil {
glog.V(0).Infof("weird message %+v", message)
return nil
}
if oldSinkKey != newSinkKey {
// A real move: the path changed. UpdateEntry cannot move an entry.
if mover, ok := r.sink.(sink.EntryMover); ok {
glog.V(4).Infof("moving %v => %v", oldSinkKey, newSinkKey)
return mover.MoveEntry(oldSinkKey, newSinkKey, newEntry, message.Signatures)
}
// Sinks without a native move: create at the new key first, then delete the
// old, so a crash between the two leaves the entry visible under both names
// rather than gone.
glog.V(4).Infof("creating renamed %v", newSinkKey)
if err := r.sink.CreateEntry(newSinkKey, newEntry, message.Signatures); err != nil {
return fmt.Errorf("create renamed entry %v: %w", newSinkKey, err)
}
return r.sink.DeleteEntry(oldSinkKey, oldEntry.IsDirectory, false, message.Signatures)
}
// oldSinkKey == newSinkKey: pure in-place update (same path, content/attrs changed).
foundExisting, err := r.sink.UpdateEntry(oldSinkKey, oldEntry, newSinkParentPath, newEntry, message.DeleteChunks, message.Signatures)
if foundExisting {
glog.V(4).Infof("updated %v", oldSinkKey)
return err
}
err = r.sink.DeleteEntry(oldSinkKey, oldEntry.IsDirectory, false, message.Signatures)
if err != nil {
return fmt.Errorf("delete old entry %v: %w", oldSinkKey, err)
}
glog.V(4).Infof("creating missing %v", newSinkKey)
return r.sink.CreateEntry(newSinkKey, newEntry, message.Signatures)
}
func (r *Replicator) isExcluded(path string) bool {
for _, excludeDir := range r.excludeDirs {
if util.IsEqualOrUnder(path, excludeDir) {
return true
}
}
return false
}
func (r *Replicator) sourceToSinkKey(sourceKey, dateKey string) string {
return util.Join(r.sink.GetSinkToDirectory(), dateKey, sourceKey[len(r.source.Dir):])
}
func (r *Replicator) sourceToSinkPath(sourcePath, dateKey string) string {
return util.Join(r.sink.GetSinkToDirectory(), dateKey, sourcePath[len(r.source.Dir):])
}
func metadataEventTarget(key string, newEntry *filer_pb.Entry, newParentPath string) (targetKey, targetParent string) {
if newEntry == nil {
return "", ""
}
targetParent = newParentPath
if targetParent == "" {
targetParent, _ = util.FullPath(key).DirAndName()
}
return util.Join(targetParent, newEntry.Name), targetParent
}
func ReadFilerSignature(grpcDialOption grpc.DialOption, filer pb.ServerAddress) (filerSignature int32, readErr error) {
if readErr = pb.WithFilerClient(false, 0, filer, grpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
if resp, err := client.GetFilerConfiguration(context.Background(), &filer_pb.GetFilerConfigurationRequest{}); err != nil {
return fmt.Errorf("GetFilerConfiguration %s: %v", filer, err)
} else {
filerSignature = resp.Signature
}
return nil
}); readErr != nil {
return 0, readErr
}
return filerSignature, nil
}