Files
seaweedfs/weed/command/filer_backup.go
T
Jaehoon Kim 1b5f1c1f3b feat(filer.backup): -initialSnapshot re-seeds a reinitialized destination (#9828)
* feat(filer.backup): add -resetCheckpoint to force a fresh sync

filer.backup resumes from a per-sink offset persisted in the source filer's KV.
There was no first-class way to discard that checkpoint and re-run from the
beginning short of guessing a large -timeAgo, which also skips -initialSnapshot.

Add -resetCheckpoint: before reading the offset, write 0 for this sink so
getOffset returns 0, isFreshSync stays true, and -initialSnapshot re-runs a full
walk. Effective only when -timeAgo is 0.

The flag is cleared after the first successful reset: runFilerBackup retries
doFilerBackup forever on error, so leaving it set would re-zero the checkpoint
on every retry and never make forward progress after a transient failure. Later
retries resume from the persisted checkpoint instead.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

* fix(filer.backup): keep fresh-sync intent when offset read fails after reset

After -resetCheckpoint writes offset 0, a transient getOffset read-back error
flipped isFreshSync to false, which skipped the -initialSnapshot walk the reset
explicitly requested. Track that the reset happened this iteration and, on a
getOffset error, preserve isFreshSync=true in that case (the non-reset path
keeps treating a read error as "not fresh" to avoid re-walking on transients).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

* refactor(filer.backup): skip offset read-back on reset instead of tracking a flag

Replace the didReset bool by branching: on -resetCheckpoint, clear the offset and
start fresh without reading it back (we just wrote 0, so the state is known);
otherwise read the offset as before. This drops the redundant getOffset RPC after
a reset and removes the read-back error case entirely, so no separate flag is
needed to preserve isFreshSync.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

* filer.backup: -initialSnapshot re-seeds on every start; drop -resetCheckpoint

-initialSnapshot now walks the live tree whenever -timeAgo is 0, seeds the
destination, and overwrites the saved checkpoint, rather than running only on a
fresh sync. That re-seeds a reinitialized destination on its own, so the
separate -resetCheckpoint flag is gone.

The walk runs once per process: the in-memory flag is cleared after the
watermark is persisted, so the retry loop resumes from the persisted checkpoint
instead of re-walking on every transient error. A process restart re-walks, so
remove the flag once the backup is caught up.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Co-authored-by: Chris Lu <chris.lu@gmail.com>
2026-06-07 23:35:53 -07:00

437 lines
19 KiB
Go

package command
import (
"context"
"errors"
"fmt"
nethttp "net/http"
"regexp"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/replication/repl_util"
"github.com/seaweedfs/seaweedfs/weed/replication/sink"
"github.com/seaweedfs/seaweedfs/weed/replication/source"
"github.com/seaweedfs/seaweedfs/weed/security"
"github.com/seaweedfs/seaweedfs/weed/util"
"github.com/seaweedfs/seaweedfs/weed/util/http"
"github.com/seaweedfs/seaweedfs/weed/util/wildcard"
"google.golang.org/grpc"
)
type FilerBackupOptions struct {
isActivePassive *bool
filer *string
path *string
excludePaths *string
excludeFileName *string // deprecated: use excludeFileNames
excludeFileNames *string
excludePathPatterns *string
debug *bool
proxyByFiler *bool
doDeleteFiles *bool
disableErrorRetry *bool
ignore404Error *bool
timeAgo *time.Duration
retentionDays *int
initialSnapshot *bool
}
var (
filerBackupOptions FilerBackupOptions
ignorable404ErrString = fmt.Sprintf("%d %s: %s", nethttp.StatusNotFound, nethttp.StatusText(nethttp.StatusNotFound), http.ErrNotFound.Error())
)
func init() {
cmdFilerBackup.Run = runFilerBackup // break init cycle
filerBackupOptions.filer = cmdFilerBackup.Flag.String("filer", "localhost:8888", "filer of one SeaweedFS cluster")
filerBackupOptions.path = cmdFilerBackup.Flag.String("filerPath", "/", "directory to sync on filer")
filerBackupOptions.excludePaths = cmdFilerBackup.Flag.String("filerExcludePaths", "", "exclude directories to sync on filer")
filerBackupOptions.excludeFileName = cmdFilerBackup.Flag.String("filerExcludeFileName", "", "[DEPRECATED: use -filerExcludeFileNames] exclude file names that match the regexp")
filerBackupOptions.excludeFileNames = cmdFilerBackup.Flag.String("filerExcludeFileNames", "", "comma-separated wildcard patterns to exclude file names, e.g., \"*.tmp,._*\"")
filerBackupOptions.excludePathPatterns = cmdFilerBackup.Flag.String("filerExcludePathPatterns", "", "comma-separated wildcard patterns to exclude paths where any component matches, e.g., \".snapshot,temp*\"")
filerBackupOptions.proxyByFiler = cmdFilerBackup.Flag.Bool("filerProxy", false, "read and write file chunks by filer instead of volume servers")
filerBackupOptions.doDeleteFiles = cmdFilerBackup.Flag.Bool("doDeleteFiles", false, "delete files on the destination")
filerBackupOptions.debug = cmdFilerBackup.Flag.Bool("debug", false, "debug mode to print out received files")
filerBackupOptions.timeAgo = cmdFilerBackup.Flag.Duration("timeAgo", 0, "start time before now. \"300ms\", \"1.5h\" or \"2h45m\". Valid time units are \"ns\", \"us\" (or \"µs\"), \"ms\", \"s\", \"m\", \"h\"")
filerBackupOptions.retentionDays = cmdFilerBackup.Flag.Int("retentionDays", 0, "incremental backup retention days")
filerBackupOptions.disableErrorRetry = cmdFilerBackup.Flag.Bool("disableErrorRetry", false, "disables errors retry, only logs will print")
filerBackupOptions.ignore404Error = cmdFilerBackup.Flag.Bool("ignore404Error", true, "ignore 404 errors from filer")
filerBackupOptions.initialSnapshot = cmdFilerBackup.Flag.Bool("initialSnapshot", false, "before subscribing to metadata updates, walk the live filer tree under -filerPath and seed the destination, then subscribe from the walk-start timestamp so concurrent changes are still captured. Runs on every start when -timeAgo is 0 and overwrites the saved checkpoint, so it also re-seeds a reinitialized destination. Remove it once the backup is caught up, otherwise it re-walks the whole tree on each restart.")
}
var cmdFilerBackup = &Command{
UsageLine: "filer.backup -filer=<filerHost>:<filerPort> ",
Short: "resume-able continuously replicate files from a SeaweedFS cluster to another location defined in replication.toml",
Long: `resume-able continuously replicate files from a SeaweedFS cluster to another location defined in replication.toml
filer.backup listens on filer notifications. If any file is updated, it will fetch the updated content,
and write to the destination. This is to replace filer.replicate command since additional message queue is not needed.
If restarted and "-timeAgo" is not set, the synchronization will resume from the previous checkpoints, persisted every minute.
A fresh sync will start from the earliest metadata logs.
On a fresh sync the metadata event log only re-materializes files that still exist on the source; entries that were
created and later deleted are replayed as a create-then-delete pair and therefore never appear on the destination.
Pass "-initialSnapshot" to walk the live filer tree first and seed the destination with the current tree, then
subscribe from the walk-start timestamp. This also re-seeds a reinitialized destination: it overwrites the saved
checkpoint and re-walks on every start, so remove it once the backup is caught up.
`,
}
func runFilerBackup(cmd *Command, args []string) bool {
util.LoadSecurityConfiguration()
util.LoadConfiguration("replication", true)
// Compile exclude patterns once before the retry loop — these are
// configuration errors and must not be retried.
reExcludeFileName, err := compileExcludePattern(*filerBackupOptions.excludeFileName, "exclude file name")
if err != nil {
glog.Fatalf("invalid -filerExcludeFileName: %v", err)
}
excludeFileNames := wildcard.CompileWildcardMatchers(*filerBackupOptions.excludeFileNames)
excludePathPatterns := wildcard.CompileWildcardMatchers(*filerBackupOptions.excludePathPatterns)
grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.client")
clientId := util.RandomInt32()
var clientEpoch int32
for {
clientEpoch++
err := doFilerBackup(grpcDialOption, &filerBackupOptions, reExcludeFileName, excludeFileNames, excludePathPatterns, clientId, clientEpoch)
if err != nil {
glog.Errorf("backup from %s: %v", *filerBackupOptions.filer, err)
time.Sleep(1747 * time.Millisecond)
}
}
}
const (
BackupKeyPrefix = "backup."
)
func doFilerBackup(grpcDialOption grpc.DialOption, backupOption *FilerBackupOptions, reExcludeFileName *regexp.Regexp, excludeFileNames []*wildcard.WildcardMatcher, excludePathPatterns []*wildcard.WildcardMatcher, clientId int32, clientEpoch int32) error {
// find data sink
dataSink := findSink(util.GetViper())
if dataSink == nil {
return fmt.Errorf("no data sink configured in replication.toml")
}
sourceFiler := pb.ServerAddress(*backupOption.filer)
sourcePath := *backupOption.path
excludePaths := util.StringSplit(*backupOption.excludePaths, ",")
timeAgo := *backupOption.timeAgo
targetPath := dataSink.GetSinkToDirectory()
debug := *backupOption.debug
// get start time for the data sink
startFrom := time.Unix(0, 0)
sinkId := util.HashStringToLong(dataSink.GetName() + dataSink.GetSinkToDirectory())
runSnapshot := *backupOption.initialSnapshot && timeAgo == 0
if timeAgo == 0 {
if runSnapshot {
// snapshot below sets the start point; no checkpoint read needed
glog.V(0).Infof("initialSnapshot requested — walking live tree before subscribing")
} else {
lastOffsetTsNs, err := getOffset(grpcDialOption, sourceFiler, BackupKeyPrefix, int32(sinkId))
if err != nil {
glog.V(0).Infof("starting from %v (offset read failed: %v)", startFrom, err)
} else if lastOffsetTsNs > 0 {
startFrom = time.Unix(0, lastOffsetTsNs)
glog.V(0).Infof("resuming from %v", startFrom)
} else {
glog.V(0).Infof("starting from %v (no prior checkpoint)", startFrom)
}
}
} else {
startFrom = time.Now().Add(-timeAgo)
glog.V(0).Infof("start time is set to %v", startFrom)
}
// create filer sink
filerSource := &source.FilerSource{}
if err := filerSource.DoInitialize(
sourceFiler.ToHttpAddress(),
sourceFiler.ToGrpcAddress(),
sourcePath,
*backupOption.proxyByFiler); err != nil {
return fmt.Errorf("filersource initialization failed: %v", err)
}
if err := repl_util.InitializeSSEForReplication(filerSource); err != nil {
return fmt.Errorf("SSE initialization failed: %v", err)
}
dataSink.SetSourceFiler(filerSource)
// Walk and seed the live tree, then subscribe from the walk-start watermark:
// replaying the event log alone misses entries created-then-deleted before
// the walk. The watermark overwrites any stale checkpoint, re-seeding a wiped
// destination.
if runSnapshot {
snapshotTsNs, err := runInitialSnapshot(sourceFiler.ToGrpcAddress(), filerSource, sourcePath, targetPath, excludePaths, reExcludeFileName, excludeFileNames, excludePathPatterns, dataSink, *backupOption.ignore404Error)
if err != nil {
return fmt.Errorf("initial snapshot: %w", err)
}
// The walk can take hours on large trees; retry the tiny KV write a
// handful of times before giving up so a flaky filer KV doesn't force
// the whole walk to repeat on the next retry loop iteration.
if err := persistSnapshotOffset(grpcDialOption, sourceFiler, int32(sinkId), snapshotTsNs); err != nil {
glog.Errorf("initialSnapshot: FAILED to persist offset %d for sinkId %d after retries: %v — the next retry will redo the full walk", snapshotTsNs, sinkId, err)
return fmt.Errorf("persist initial snapshot offset: %w", err)
}
startFrom = time.Unix(0, snapshotTsNs)
// walk once per process; retries resume from the persisted checkpoint
*backupOption.initialSnapshot = false
glog.V(0).Infof("initialSnapshot done; subscribing from %v", startFrom)
}
var processEventFn func(*filer_pb.SubscribeMetadataResponse) error
if *backupOption.ignore404Error {
processEventFnGenerated := genProcessFunction(sourcePath, targetPath, excludePaths, reExcludeFileName, excludeFileNames, excludePathPatterns, dataSink, *backupOption.doDeleteFiles, debug)
processEventFn = func(resp *filer_pb.SubscribeMetadataResponse) error {
err := processEventFnGenerated(resp)
if err == nil {
return nil
}
if isIgnorable404(err) {
glog.V(0).Infof("got 404 error for %s, ignore it: %s", getSourceKey(resp), err.Error())
return nil
}
return err
}
} else {
processEventFn = genProcessFunction(sourcePath, targetPath, excludePaths, reExcludeFileName, excludeFileNames, excludePathPatterns, dataSink, *backupOption.doDeleteFiles, debug)
}
processEventFnWithOffset := pb.AddOffsetFunc(processEventFn, 3*time.Second, func(counter int64, lastTsNs int64) error {
glog.V(0).Infof("backup %s progressed to %v %0.2f/sec", sourceFiler, time.Unix(0, lastTsNs), float64(counter)/float64(3))
return setOffset(grpcDialOption, sourceFiler, BackupKeyPrefix, int32(sinkId), lastTsNs)
})
if dataSink.IsIncremental() && *filerBackupOptions.retentionDays > 0 {
go func() {
for {
now := time.Now()
time.Sleep(time.Hour * 24)
key := util.Join(targetPath, now.Add(-1*time.Hour*24*time.Duration(*filerBackupOptions.retentionDays)).Format("2006-01-02"))
_ = dataSink.DeleteEntry(util.Join(targetPath, key), true, true, nil)
glog.V(0).Infof("incremental backup delete directory:%s", key)
}
}()
}
prefix := sourcePath
if !strings.HasSuffix(prefix, "/") {
prefix = prefix + "/"
}
eventErrorType := pb.RetryForeverOnError
if *backupOption.disableErrorRetry {
eventErrorType = pb.TrivialOnError
}
metadataFollowOption := &pb.MetadataFollowOption{
ClientName: "backup_" + dataSink.GetName(),
ClientId: clientId,
ClientEpoch: clientEpoch,
SelfSignature: 0,
PathPrefix: prefix,
AdditionalPathPrefixes: nil,
DirectoriesToWatch: nil,
StartTsNs: startFrom.UnixNano(),
StopTsNs: 0,
EventErrorType: eventErrorType,
}
return pb.FollowMetadata(sourceFiler, grpcDialOption, metadataFollowOption, processEventFnWithOffset)
}
func getSourceKey(resp *filer_pb.SubscribeMetadataResponse) string {
if resp == nil || resp.EventNotification == nil {
return ""
}
message := resp.EventNotification
if message.NewEntry != nil {
return string(util.FullPath(message.NewParentPath).Child(message.NewEntry.Name))
}
if message.OldEntry != nil {
return string(util.FullPath(resp.Directory).Child(message.OldEntry.Name))
}
return ""
}
// isIgnorable404 returns true if the error represents a 404/not-found condition
// that should be silently ignored during backup. This covers:
// - errors wrapping http.ErrNotFound (direct volume server 404 via non-S3 sinks)
// - errors containing the "404 Not Found: not found" status string (S3 sink path
// where AWS SDK breaks the errors.Is unwrap chain)
// - LookupFileId or volume-id-not-found errors from the volume id map
func isIgnorable404(err error) bool {
if errors.Is(err, http.ErrNotFound) {
return true
}
errStr := err.Error()
return strings.Contains(errStr, ignorable404ErrString) ||
strings.Contains(errStr, "LookupFileId") ||
(strings.Contains(errStr, "volume id") && strings.Contains(errStr, "not found"))
}
// persistSnapshotOffset writes the snapshot high-water mark to the source
// filer's KV, retrying a few times with exponential backoff on transient
// errors. Losing this write forces a full re-walk on the next retry loop
// iteration, so a small retry budget here is far cheaper than paying the
// walk cost again on a large tree.
func persistSnapshotOffset(grpcDialOption grpc.DialOption, sourceFiler pb.ServerAddress, sinkId int32, tsNs int64) error {
const attempts = 4
backoff := 500 * time.Millisecond
var lastErr error
for i := 1; i <= attempts; i++ {
if err := setOffset(grpcDialOption, sourceFiler, BackupKeyPrefix, sinkId, tsNs); err == nil {
return nil
} else {
lastErr = err
if i == attempts {
break
}
glog.V(0).Infof("initialSnapshot: setOffset attempt %d/%d failed: %v (retrying in %v)", i, attempts, err, backoff)
time.Sleep(backoff)
backoff *= 2
}
}
return lastErr
}
// runInitialSnapshot walks the live filer tree under sourcePath and seeds the
// destination via the sink. The snapshot timestamp is captured before the walk
// starts so any create/update/delete that races with the walk is still caught
// by the subscription that runs afterward (sink CreateEntry is idempotent for
// all builtin sinks, so replaying a concurrent create from the subscription
// over an entry already written by the walk is safe).
//
// Note on excludes: TraverseBfs enumerates every directory and enqueues its
// children unconditionally before the callback runs, so the exclude filters
// below only prevent *processing* of excluded entries — they do not prune the
// listing RPCs for excluded subtrees. For small system excludes (SystemLogDir)
// that is fine; for large user-supplied excludes (say an archive directory
// with millions of files), the listing cost can dominate the snapshot. A
// proper prune signal through TraverseBfs is a separate change.
func runInitialSnapshot(
grpcAddress string,
filerSource *source.FilerSource,
sourcePath string,
targetPath string,
excludePaths []string,
reExcludeFileName *regexp.Regexp,
excludeFileNames []*wildcard.WildcardMatcher,
excludePathPatterns []*wildcard.WildcardMatcher,
dataSink sink.ReplicationSink,
ignore404Error bool,
) (int64, error) {
// Metadata events are stamped server-side, so the backup host clock may be
// ahead of the filer's. Take `now - 1min` as the subscription watermark so
// a fast client clock can't skip events that fire during/right-after the
// walk. meta_aggregator.go uses the same 1-minute margin on initial peer
// traversal; see the note there on why duplicate replay is harmless.
snapshotTsNs := time.Now().Add(-time.Minute).UnixNano()
glog.V(0).Infof("initialSnapshot: walking %s on %s -> %s (snapshotTsNs=%d, -1m skew margin applied)", sourcePath, grpcAddress, targetPath, snapshotTsNs)
// TraverseBfs fans the callback out across 5 worker goroutines, so counter
// updates and the progress-log gate need to be safe under concurrent access.
var entryCount, byteCount atomic.Int64
start := time.Now()
var logMu sync.Mutex
lastLog := start
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
err := filer_pb.TraverseBfs(ctx, filerSource, util.FullPath(sourcePath), func(parentPath util.FullPath, entry *filer_pb.Entry) error {
parent := string(parentPath)
sourceKey := parentPath.Child(entry.Name)
source := string(sourceKey)
// Check exclusion on the entry's own path too — otherwise a walked
// directory whose full path is SystemLogDir or a user-excluded path
// still gets seeded (only its children would be skipped by the parent
// check).
if parent == filer.SystemLogDir || strings.HasPrefix(parent, filer.SystemLogDir+"/") ||
source == filer.SystemLogDir || strings.HasPrefix(source, filer.SystemLogDir+"/") {
return nil
}
if matchesExcludePath(parent, excludePaths) || matchesExcludePath(source, excludePaths) {
return nil
}
if isEntryExcluded(parent, entry, reExcludeFileName, excludeFileNames, excludePathPatterns) {
return nil
}
if !util.IsEqualOrUnder(source, sourcePath) {
return nil
}
targetKey := initialSnapshotTargetKey(dataSink, targetPath, sourcePath, sourceKey, entry)
if err := dataSink.CreateEntry(targetKey, entry, nil); err != nil {
// A file can be listed by TraverseBfs and then deleted before
// CreateEntry reads its chunks. The follow phase ignores 404s when
// -ignore404Error is set; apply the same policy here so the walk
// doesn't abort (and trigger a full re-walk on retry) just because
// a single entry disappeared mid-snapshot.
if ignore404Error && isIgnorable404(err) {
glog.V(0).Infof("initialSnapshot: source entry %s disappeared, ignore it: %s", sourceKey, err.Error())
return nil
}
return fmt.Errorf("seed %s: %w", targetKey, err)
}
curEntries := entryCount.Add(1)
var curBytes int64
if entry.Attributes != nil {
curBytes = byteCount.Add(int64(entry.Attributes.FileSize))
} else {
curBytes = byteCount.Load()
}
now := time.Now()
logMu.Lock()
shouldLog := now.Sub(lastLog) >= 5*time.Second
if shouldLog {
lastLog = now
}
logMu.Unlock()
if shouldLog {
elapsed := now.Sub(start).Seconds()
if elapsed == 0 {
elapsed = 1
}
glog.V(0).Infof("initialSnapshot: %d entries / %d bytes seeded (%.1f/sec)", curEntries, curBytes, float64(curEntries)/elapsed)
}
return nil
})
if err != nil {
return 0, err
}
glog.V(0).Infof("initialSnapshot: done — %d entries / %d bytes in %v", entryCount.Load(), byteCount.Load(), time.Since(start))
return snapshotTsNs, nil
}
// initialSnapshotTargetKey pulls the entry mtime and delegates to the shared
// destKey helper in filer_sync.go so the walk and the event-log path produce
// the same destination key for the same source entry.
func initialSnapshotTargetKey(dataSink sink.ReplicationSink, targetPath, sourcePath string, sourceKey util.FullPath, entry *filer_pb.Entry) string {
var mTime int64
if entry.Attributes != nil {
mTime = entry.Attributes.Mtime
}
return destKey(dataSink, targetPath, sourcePath, sourceKey, mTime)
}