mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2026-06-13 23:36:45 +03:00
1b5f1c1f3b
* feat(filer.backup): add -resetCheckpoint to force a fresh sync filer.backup resumes from a per-sink offset persisted in the source filer's KV. There was no first-class way to discard that checkpoint and re-run from the beginning short of guessing a large -timeAgo, which also skips -initialSnapshot. Add -resetCheckpoint: before reading the offset, write 0 for this sink so getOffset returns 0, isFreshSync stays true, and -initialSnapshot re-runs a full walk. Effective only when -timeAgo is 0. The flag is cleared after the first successful reset: runFilerBackup retries doFilerBackup forever on error, so leaving it set would re-zero the checkpoint on every retry and never make forward progress after a transient failure. Later retries resume from the persisted checkpoint instead. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * fix(filer.backup): keep fresh-sync intent when offset read fails after reset After -resetCheckpoint writes offset 0, a transient getOffset read-back error flipped isFreshSync to false, which skipped the -initialSnapshot walk the reset explicitly requested. Track that the reset happened this iteration and, on a getOffset error, preserve isFreshSync=true in that case (the non-reset path keeps treating a read error as "not fresh" to avoid re-walking on transients). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * refactor(filer.backup): skip offset read-back on reset instead of tracking a flag Replace the didReset bool by branching: on -resetCheckpoint, clear the offset and start fresh without reading it back (we just wrote 0, so the state is known); otherwise read the offset as before. This drops the redundant getOffset RPC after a reset and removes the read-back error case entirely, so no separate flag is needed to preserve isFreshSync. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * filer.backup: -initialSnapshot re-seeds on every start; drop -resetCheckpoint -initialSnapshot now walks the live tree whenever -timeAgo is 0, seeds the destination, and overwrites the saved checkpoint, rather than running only on a fresh sync. That re-seeds a reinitialized destination on its own, so the separate -resetCheckpoint flag is gone. The walk runs once per process: the in-memory flag is cleared after the watermark is persisted, so the retry loop resumes from the persisted checkpoint instead of re-walking on every transient error. A process restart re-walks, so remove the flag once the backup is caught up. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Co-authored-by: Chris Lu <chris.lu@gmail.com>
437 lines
19 KiB
Go
437 lines
19 KiB
Go
package command
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
nethttp "net/http"
|
|
"regexp"
|
|
"strings"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/filer"
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/replication/repl_util"
|
|
"github.com/seaweedfs/seaweedfs/weed/replication/sink"
|
|
"github.com/seaweedfs/seaweedfs/weed/replication/source"
|
|
"github.com/seaweedfs/seaweedfs/weed/security"
|
|
"github.com/seaweedfs/seaweedfs/weed/util"
|
|
"github.com/seaweedfs/seaweedfs/weed/util/http"
|
|
"github.com/seaweedfs/seaweedfs/weed/util/wildcard"
|
|
"google.golang.org/grpc"
|
|
)
|
|
|
|
type FilerBackupOptions struct {
|
|
isActivePassive *bool
|
|
filer *string
|
|
path *string
|
|
excludePaths *string
|
|
excludeFileName *string // deprecated: use excludeFileNames
|
|
excludeFileNames *string
|
|
excludePathPatterns *string
|
|
debug *bool
|
|
proxyByFiler *bool
|
|
doDeleteFiles *bool
|
|
disableErrorRetry *bool
|
|
ignore404Error *bool
|
|
timeAgo *time.Duration
|
|
retentionDays *int
|
|
initialSnapshot *bool
|
|
}
|
|
|
|
var (
|
|
filerBackupOptions FilerBackupOptions
|
|
ignorable404ErrString = fmt.Sprintf("%d %s: %s", nethttp.StatusNotFound, nethttp.StatusText(nethttp.StatusNotFound), http.ErrNotFound.Error())
|
|
)
|
|
|
|
func init() {
|
|
cmdFilerBackup.Run = runFilerBackup // break init cycle
|
|
filerBackupOptions.filer = cmdFilerBackup.Flag.String("filer", "localhost:8888", "filer of one SeaweedFS cluster")
|
|
filerBackupOptions.path = cmdFilerBackup.Flag.String("filerPath", "/", "directory to sync on filer")
|
|
filerBackupOptions.excludePaths = cmdFilerBackup.Flag.String("filerExcludePaths", "", "exclude directories to sync on filer")
|
|
filerBackupOptions.excludeFileName = cmdFilerBackup.Flag.String("filerExcludeFileName", "", "[DEPRECATED: use -filerExcludeFileNames] exclude file names that match the regexp")
|
|
filerBackupOptions.excludeFileNames = cmdFilerBackup.Flag.String("filerExcludeFileNames", "", "comma-separated wildcard patterns to exclude file names, e.g., \"*.tmp,._*\"")
|
|
filerBackupOptions.excludePathPatterns = cmdFilerBackup.Flag.String("filerExcludePathPatterns", "", "comma-separated wildcard patterns to exclude paths where any component matches, e.g., \".snapshot,temp*\"")
|
|
filerBackupOptions.proxyByFiler = cmdFilerBackup.Flag.Bool("filerProxy", false, "read and write file chunks by filer instead of volume servers")
|
|
filerBackupOptions.doDeleteFiles = cmdFilerBackup.Flag.Bool("doDeleteFiles", false, "delete files on the destination")
|
|
filerBackupOptions.debug = cmdFilerBackup.Flag.Bool("debug", false, "debug mode to print out received files")
|
|
filerBackupOptions.timeAgo = cmdFilerBackup.Flag.Duration("timeAgo", 0, "start time before now. \"300ms\", \"1.5h\" or \"2h45m\". Valid time units are \"ns\", \"us\" (or \"µs\"), \"ms\", \"s\", \"m\", \"h\"")
|
|
filerBackupOptions.retentionDays = cmdFilerBackup.Flag.Int("retentionDays", 0, "incremental backup retention days")
|
|
filerBackupOptions.disableErrorRetry = cmdFilerBackup.Flag.Bool("disableErrorRetry", false, "disables errors retry, only logs will print")
|
|
filerBackupOptions.ignore404Error = cmdFilerBackup.Flag.Bool("ignore404Error", true, "ignore 404 errors from filer")
|
|
filerBackupOptions.initialSnapshot = cmdFilerBackup.Flag.Bool("initialSnapshot", false, "before subscribing to metadata updates, walk the live filer tree under -filerPath and seed the destination, then subscribe from the walk-start timestamp so concurrent changes are still captured. Runs on every start when -timeAgo is 0 and overwrites the saved checkpoint, so it also re-seeds a reinitialized destination. Remove it once the backup is caught up, otherwise it re-walks the whole tree on each restart.")
|
|
}
|
|
|
|
var cmdFilerBackup = &Command{
|
|
UsageLine: "filer.backup -filer=<filerHost>:<filerPort> ",
|
|
Short: "resume-able continuously replicate files from a SeaweedFS cluster to another location defined in replication.toml",
|
|
Long: `resume-able continuously replicate files from a SeaweedFS cluster to another location defined in replication.toml
|
|
|
|
filer.backup listens on filer notifications. If any file is updated, it will fetch the updated content,
|
|
and write to the destination. This is to replace filer.replicate command since additional message queue is not needed.
|
|
|
|
If restarted and "-timeAgo" is not set, the synchronization will resume from the previous checkpoints, persisted every minute.
|
|
A fresh sync will start from the earliest metadata logs.
|
|
|
|
On a fresh sync the metadata event log only re-materializes files that still exist on the source; entries that were
|
|
created and later deleted are replayed as a create-then-delete pair and therefore never appear on the destination.
|
|
Pass "-initialSnapshot" to walk the live filer tree first and seed the destination with the current tree, then
|
|
subscribe from the walk-start timestamp. This also re-seeds a reinitialized destination: it overwrites the saved
|
|
checkpoint and re-walks on every start, so remove it once the backup is caught up.
|
|
|
|
`,
|
|
}
|
|
|
|
func runFilerBackup(cmd *Command, args []string) bool {
|
|
|
|
util.LoadSecurityConfiguration()
|
|
util.LoadConfiguration("replication", true)
|
|
|
|
// Compile exclude patterns once before the retry loop — these are
|
|
// configuration errors and must not be retried.
|
|
reExcludeFileName, err := compileExcludePattern(*filerBackupOptions.excludeFileName, "exclude file name")
|
|
if err != nil {
|
|
glog.Fatalf("invalid -filerExcludeFileName: %v", err)
|
|
}
|
|
excludeFileNames := wildcard.CompileWildcardMatchers(*filerBackupOptions.excludeFileNames)
|
|
excludePathPatterns := wildcard.CompileWildcardMatchers(*filerBackupOptions.excludePathPatterns)
|
|
|
|
grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.client")
|
|
|
|
clientId := util.RandomInt32()
|
|
var clientEpoch int32
|
|
|
|
for {
|
|
clientEpoch++
|
|
err := doFilerBackup(grpcDialOption, &filerBackupOptions, reExcludeFileName, excludeFileNames, excludePathPatterns, clientId, clientEpoch)
|
|
if err != nil {
|
|
glog.Errorf("backup from %s: %v", *filerBackupOptions.filer, err)
|
|
time.Sleep(1747 * time.Millisecond)
|
|
}
|
|
}
|
|
}
|
|
|
|
const (
|
|
BackupKeyPrefix = "backup."
|
|
)
|
|
|
|
func doFilerBackup(grpcDialOption grpc.DialOption, backupOption *FilerBackupOptions, reExcludeFileName *regexp.Regexp, excludeFileNames []*wildcard.WildcardMatcher, excludePathPatterns []*wildcard.WildcardMatcher, clientId int32, clientEpoch int32) error {
|
|
|
|
// find data sink
|
|
dataSink := findSink(util.GetViper())
|
|
if dataSink == nil {
|
|
return fmt.Errorf("no data sink configured in replication.toml")
|
|
}
|
|
|
|
sourceFiler := pb.ServerAddress(*backupOption.filer)
|
|
sourcePath := *backupOption.path
|
|
excludePaths := util.StringSplit(*backupOption.excludePaths, ",")
|
|
timeAgo := *backupOption.timeAgo
|
|
targetPath := dataSink.GetSinkToDirectory()
|
|
debug := *backupOption.debug
|
|
|
|
// get start time for the data sink
|
|
startFrom := time.Unix(0, 0)
|
|
sinkId := util.HashStringToLong(dataSink.GetName() + dataSink.GetSinkToDirectory())
|
|
runSnapshot := *backupOption.initialSnapshot && timeAgo == 0
|
|
if timeAgo == 0 {
|
|
if runSnapshot {
|
|
// snapshot below sets the start point; no checkpoint read needed
|
|
glog.V(0).Infof("initialSnapshot requested — walking live tree before subscribing")
|
|
} else {
|
|
lastOffsetTsNs, err := getOffset(grpcDialOption, sourceFiler, BackupKeyPrefix, int32(sinkId))
|
|
if err != nil {
|
|
glog.V(0).Infof("starting from %v (offset read failed: %v)", startFrom, err)
|
|
} else if lastOffsetTsNs > 0 {
|
|
startFrom = time.Unix(0, lastOffsetTsNs)
|
|
glog.V(0).Infof("resuming from %v", startFrom)
|
|
} else {
|
|
glog.V(0).Infof("starting from %v (no prior checkpoint)", startFrom)
|
|
}
|
|
}
|
|
} else {
|
|
startFrom = time.Now().Add(-timeAgo)
|
|
glog.V(0).Infof("start time is set to %v", startFrom)
|
|
}
|
|
|
|
// create filer sink
|
|
filerSource := &source.FilerSource{}
|
|
if err := filerSource.DoInitialize(
|
|
sourceFiler.ToHttpAddress(),
|
|
sourceFiler.ToGrpcAddress(),
|
|
sourcePath,
|
|
*backupOption.proxyByFiler); err != nil {
|
|
return fmt.Errorf("filersource initialization failed: %v", err)
|
|
}
|
|
|
|
if err := repl_util.InitializeSSEForReplication(filerSource); err != nil {
|
|
return fmt.Errorf("SSE initialization failed: %v", err)
|
|
}
|
|
dataSink.SetSourceFiler(filerSource)
|
|
|
|
// Walk and seed the live tree, then subscribe from the walk-start watermark:
|
|
// replaying the event log alone misses entries created-then-deleted before
|
|
// the walk. The watermark overwrites any stale checkpoint, re-seeding a wiped
|
|
// destination.
|
|
if runSnapshot {
|
|
snapshotTsNs, err := runInitialSnapshot(sourceFiler.ToGrpcAddress(), filerSource, sourcePath, targetPath, excludePaths, reExcludeFileName, excludeFileNames, excludePathPatterns, dataSink, *backupOption.ignore404Error)
|
|
if err != nil {
|
|
return fmt.Errorf("initial snapshot: %w", err)
|
|
}
|
|
// The walk can take hours on large trees; retry the tiny KV write a
|
|
// handful of times before giving up so a flaky filer KV doesn't force
|
|
// the whole walk to repeat on the next retry loop iteration.
|
|
if err := persistSnapshotOffset(grpcDialOption, sourceFiler, int32(sinkId), snapshotTsNs); err != nil {
|
|
glog.Errorf("initialSnapshot: FAILED to persist offset %d for sinkId %d after retries: %v — the next retry will redo the full walk", snapshotTsNs, sinkId, err)
|
|
return fmt.Errorf("persist initial snapshot offset: %w", err)
|
|
}
|
|
startFrom = time.Unix(0, snapshotTsNs)
|
|
// walk once per process; retries resume from the persisted checkpoint
|
|
*backupOption.initialSnapshot = false
|
|
glog.V(0).Infof("initialSnapshot done; subscribing from %v", startFrom)
|
|
}
|
|
|
|
var processEventFn func(*filer_pb.SubscribeMetadataResponse) error
|
|
if *backupOption.ignore404Error {
|
|
processEventFnGenerated := genProcessFunction(sourcePath, targetPath, excludePaths, reExcludeFileName, excludeFileNames, excludePathPatterns, dataSink, *backupOption.doDeleteFiles, debug)
|
|
processEventFn = func(resp *filer_pb.SubscribeMetadataResponse) error {
|
|
err := processEventFnGenerated(resp)
|
|
if err == nil {
|
|
return nil
|
|
}
|
|
if isIgnorable404(err) {
|
|
glog.V(0).Infof("got 404 error for %s, ignore it: %s", getSourceKey(resp), err.Error())
|
|
return nil
|
|
}
|
|
return err
|
|
}
|
|
} else {
|
|
processEventFn = genProcessFunction(sourcePath, targetPath, excludePaths, reExcludeFileName, excludeFileNames, excludePathPatterns, dataSink, *backupOption.doDeleteFiles, debug)
|
|
}
|
|
|
|
processEventFnWithOffset := pb.AddOffsetFunc(processEventFn, 3*time.Second, func(counter int64, lastTsNs int64) error {
|
|
glog.V(0).Infof("backup %s progressed to %v %0.2f/sec", sourceFiler, time.Unix(0, lastTsNs), float64(counter)/float64(3))
|
|
return setOffset(grpcDialOption, sourceFiler, BackupKeyPrefix, int32(sinkId), lastTsNs)
|
|
})
|
|
|
|
if dataSink.IsIncremental() && *filerBackupOptions.retentionDays > 0 {
|
|
go func() {
|
|
for {
|
|
now := time.Now()
|
|
time.Sleep(time.Hour * 24)
|
|
key := util.Join(targetPath, now.Add(-1*time.Hour*24*time.Duration(*filerBackupOptions.retentionDays)).Format("2006-01-02"))
|
|
_ = dataSink.DeleteEntry(util.Join(targetPath, key), true, true, nil)
|
|
glog.V(0).Infof("incremental backup delete directory:%s", key)
|
|
}
|
|
}()
|
|
}
|
|
|
|
prefix := sourcePath
|
|
if !strings.HasSuffix(prefix, "/") {
|
|
prefix = prefix + "/"
|
|
}
|
|
|
|
eventErrorType := pb.RetryForeverOnError
|
|
if *backupOption.disableErrorRetry {
|
|
eventErrorType = pb.TrivialOnError
|
|
}
|
|
|
|
metadataFollowOption := &pb.MetadataFollowOption{
|
|
ClientName: "backup_" + dataSink.GetName(),
|
|
ClientId: clientId,
|
|
ClientEpoch: clientEpoch,
|
|
SelfSignature: 0,
|
|
PathPrefix: prefix,
|
|
AdditionalPathPrefixes: nil,
|
|
DirectoriesToWatch: nil,
|
|
StartTsNs: startFrom.UnixNano(),
|
|
StopTsNs: 0,
|
|
EventErrorType: eventErrorType,
|
|
}
|
|
|
|
return pb.FollowMetadata(sourceFiler, grpcDialOption, metadataFollowOption, processEventFnWithOffset)
|
|
|
|
}
|
|
|
|
func getSourceKey(resp *filer_pb.SubscribeMetadataResponse) string {
|
|
if resp == nil || resp.EventNotification == nil {
|
|
return ""
|
|
}
|
|
message := resp.EventNotification
|
|
if message.NewEntry != nil {
|
|
return string(util.FullPath(message.NewParentPath).Child(message.NewEntry.Name))
|
|
}
|
|
if message.OldEntry != nil {
|
|
return string(util.FullPath(resp.Directory).Child(message.OldEntry.Name))
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// isIgnorable404 returns true if the error represents a 404/not-found condition
|
|
// that should be silently ignored during backup. This covers:
|
|
// - errors wrapping http.ErrNotFound (direct volume server 404 via non-S3 sinks)
|
|
// - errors containing the "404 Not Found: not found" status string (S3 sink path
|
|
// where AWS SDK breaks the errors.Is unwrap chain)
|
|
// - LookupFileId or volume-id-not-found errors from the volume id map
|
|
func isIgnorable404(err error) bool {
|
|
if errors.Is(err, http.ErrNotFound) {
|
|
return true
|
|
}
|
|
errStr := err.Error()
|
|
return strings.Contains(errStr, ignorable404ErrString) ||
|
|
strings.Contains(errStr, "LookupFileId") ||
|
|
(strings.Contains(errStr, "volume id") && strings.Contains(errStr, "not found"))
|
|
}
|
|
|
|
// persistSnapshotOffset writes the snapshot high-water mark to the source
|
|
// filer's KV, retrying a few times with exponential backoff on transient
|
|
// errors. Losing this write forces a full re-walk on the next retry loop
|
|
// iteration, so a small retry budget here is far cheaper than paying the
|
|
// walk cost again on a large tree.
|
|
func persistSnapshotOffset(grpcDialOption grpc.DialOption, sourceFiler pb.ServerAddress, sinkId int32, tsNs int64) error {
|
|
const attempts = 4
|
|
backoff := 500 * time.Millisecond
|
|
var lastErr error
|
|
for i := 1; i <= attempts; i++ {
|
|
if err := setOffset(grpcDialOption, sourceFiler, BackupKeyPrefix, sinkId, tsNs); err == nil {
|
|
return nil
|
|
} else {
|
|
lastErr = err
|
|
if i == attempts {
|
|
break
|
|
}
|
|
glog.V(0).Infof("initialSnapshot: setOffset attempt %d/%d failed: %v (retrying in %v)", i, attempts, err, backoff)
|
|
time.Sleep(backoff)
|
|
backoff *= 2
|
|
}
|
|
}
|
|
return lastErr
|
|
}
|
|
|
|
// runInitialSnapshot walks the live filer tree under sourcePath and seeds the
|
|
// destination via the sink. The snapshot timestamp is captured before the walk
|
|
// starts so any create/update/delete that races with the walk is still caught
|
|
// by the subscription that runs afterward (sink CreateEntry is idempotent for
|
|
// all builtin sinks, so replaying a concurrent create from the subscription
|
|
// over an entry already written by the walk is safe).
|
|
//
|
|
// Note on excludes: TraverseBfs enumerates every directory and enqueues its
|
|
// children unconditionally before the callback runs, so the exclude filters
|
|
// below only prevent *processing* of excluded entries — they do not prune the
|
|
// listing RPCs for excluded subtrees. For small system excludes (SystemLogDir)
|
|
// that is fine; for large user-supplied excludes (say an archive directory
|
|
// with millions of files), the listing cost can dominate the snapshot. A
|
|
// proper prune signal through TraverseBfs is a separate change.
|
|
func runInitialSnapshot(
|
|
grpcAddress string,
|
|
filerSource *source.FilerSource,
|
|
sourcePath string,
|
|
targetPath string,
|
|
excludePaths []string,
|
|
reExcludeFileName *regexp.Regexp,
|
|
excludeFileNames []*wildcard.WildcardMatcher,
|
|
excludePathPatterns []*wildcard.WildcardMatcher,
|
|
dataSink sink.ReplicationSink,
|
|
ignore404Error bool,
|
|
) (int64, error) {
|
|
// Metadata events are stamped server-side, so the backup host clock may be
|
|
// ahead of the filer's. Take `now - 1min` as the subscription watermark so
|
|
// a fast client clock can't skip events that fire during/right-after the
|
|
// walk. meta_aggregator.go uses the same 1-minute margin on initial peer
|
|
// traversal; see the note there on why duplicate replay is harmless.
|
|
snapshotTsNs := time.Now().Add(-time.Minute).UnixNano()
|
|
glog.V(0).Infof("initialSnapshot: walking %s on %s -> %s (snapshotTsNs=%d, -1m skew margin applied)", sourcePath, grpcAddress, targetPath, snapshotTsNs)
|
|
|
|
// TraverseBfs fans the callback out across 5 worker goroutines, so counter
|
|
// updates and the progress-log gate need to be safe under concurrent access.
|
|
var entryCount, byteCount atomic.Int64
|
|
start := time.Now()
|
|
var logMu sync.Mutex
|
|
lastLog := start
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
err := filer_pb.TraverseBfs(ctx, filerSource, util.FullPath(sourcePath), func(parentPath util.FullPath, entry *filer_pb.Entry) error {
|
|
parent := string(parentPath)
|
|
sourceKey := parentPath.Child(entry.Name)
|
|
source := string(sourceKey)
|
|
// Check exclusion on the entry's own path too — otherwise a walked
|
|
// directory whose full path is SystemLogDir or a user-excluded path
|
|
// still gets seeded (only its children would be skipped by the parent
|
|
// check).
|
|
if parent == filer.SystemLogDir || strings.HasPrefix(parent, filer.SystemLogDir+"/") ||
|
|
source == filer.SystemLogDir || strings.HasPrefix(source, filer.SystemLogDir+"/") {
|
|
return nil
|
|
}
|
|
if matchesExcludePath(parent, excludePaths) || matchesExcludePath(source, excludePaths) {
|
|
return nil
|
|
}
|
|
if isEntryExcluded(parent, entry, reExcludeFileName, excludeFileNames, excludePathPatterns) {
|
|
return nil
|
|
}
|
|
|
|
if !util.IsEqualOrUnder(source, sourcePath) {
|
|
return nil
|
|
}
|
|
|
|
targetKey := initialSnapshotTargetKey(dataSink, targetPath, sourcePath, sourceKey, entry)
|
|
if err := dataSink.CreateEntry(targetKey, entry, nil); err != nil {
|
|
// A file can be listed by TraverseBfs and then deleted before
|
|
// CreateEntry reads its chunks. The follow phase ignores 404s when
|
|
// -ignore404Error is set; apply the same policy here so the walk
|
|
// doesn't abort (and trigger a full re-walk on retry) just because
|
|
// a single entry disappeared mid-snapshot.
|
|
if ignore404Error && isIgnorable404(err) {
|
|
glog.V(0).Infof("initialSnapshot: source entry %s disappeared, ignore it: %s", sourceKey, err.Error())
|
|
return nil
|
|
}
|
|
return fmt.Errorf("seed %s: %w", targetKey, err)
|
|
}
|
|
|
|
curEntries := entryCount.Add(1)
|
|
var curBytes int64
|
|
if entry.Attributes != nil {
|
|
curBytes = byteCount.Add(int64(entry.Attributes.FileSize))
|
|
} else {
|
|
curBytes = byteCount.Load()
|
|
}
|
|
|
|
now := time.Now()
|
|
logMu.Lock()
|
|
shouldLog := now.Sub(lastLog) >= 5*time.Second
|
|
if shouldLog {
|
|
lastLog = now
|
|
}
|
|
logMu.Unlock()
|
|
if shouldLog {
|
|
elapsed := now.Sub(start).Seconds()
|
|
if elapsed == 0 {
|
|
elapsed = 1
|
|
}
|
|
glog.V(0).Infof("initialSnapshot: %d entries / %d bytes seeded (%.1f/sec)", curEntries, curBytes, float64(curEntries)/elapsed)
|
|
}
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
glog.V(0).Infof("initialSnapshot: done — %d entries / %d bytes in %v", entryCount.Load(), byteCount.Load(), time.Since(start))
|
|
return snapshotTsNs, nil
|
|
}
|
|
|
|
// initialSnapshotTargetKey pulls the entry mtime and delegates to the shared
|
|
// destKey helper in filer_sync.go so the walk and the event-log path produce
|
|
// the same destination key for the same source entry.
|
|
func initialSnapshotTargetKey(dataSink sink.ReplicationSink, targetPath, sourcePath string, sourceKey util.FullPath, entry *filer_pb.Entry) string {
|
|
var mTime int64
|
|
if entry.Attributes != nil {
|
|
mTime = entry.Attributes.Mtime
|
|
}
|
|
return destKey(dataSink, targetPath, sourcePath, sourceKey, mTime)
|
|
}
|