fix(storage): never let an empty .dat delete healthy distributed EC shards (#9930)

* fix(storage): never let an empty .dat delete healthy distributed EC shards

A leftover empty .dat stub (a phantom from the pre-fix loader; zero
needles) next to a distributed EC volume's local shards made startup
classify the volume as an interrupted local encode: validateEcVolume
requires >= dataShards local shards when a .dat is present, fails with
the 1-2 shards a distributed volume keeps per disk, and the cleanup
deletes those shards -- the only copies of that part of the volume.
Repeated across restart waves this destroys enough shards cluster-wide
to make the volume unrecoverable.

Go:
- loadExistingVolume: hoist the empty-stub sweep above the EC presence
  checks. Previously the .vif-next-to-.ecx guard returned before the
  sweep ever ran, so exactly the dangerous layout (stub + .ecx + local
  shards) kept its stub and then lost its shards in loadAllEcShards.
- validateEcVolume / checkDatFileExists: treat a .dat <= a superblock
  (zero needles) as absent. An empty .dat cannot be the encode source,
  so it must never gate shard deletion; this also covers stubs without
  a .vif, which the sweep cannot prove are EC leftovers.

Rust mirror (seaweed-volume): the same gate in validate_ec_volume and
check_dat_file_exists (the Rust sweep already ran before validation);
the volume-load skip keeps a plain existence check so fresh,
needle-less volumes still load.

Regression tests in Go and Rust reproduce the production layout (a
zero-byte .dat beside .ecx/.ecj and two shards of a 10+4 volume, with
and without a .vif) and fail without the fix with the shards deleted.

* fix(ec): gate source volume deletion on a recoverable shard set

After EC encode, the shell command and the (plugin) worker task refused
to delete the source volume unless every shard was present, and aborted
otherwise -- leaving the source .dat next to live shards, exactly the
mixed state the startup cleanup mishandles.

Replace the full-set requirement with a recoverability gate shared by
both callers (RequireRecoverableShardSet): deleting a non-empty source
.dat requires at least dataShards distinct shards cluster-wide. Below
that the source is kept and the encode fails as before. A degraded but
recoverable set (>= dataShards, < total) now proceeds with a warning
instead of aborting: the missing shards can be rebuilt from the
survivors, while keeping the source would preserve the dangerous mixed
state. Empty stub replicas are still swept unguarded (OnlyEmpty) -- an
empty .dat has nothing to lose.

dataShards/totalShards stay parameters so enterprise custom EC ratios
share the helper verbatim.

* test(ec): use recoverable shard verification gate
This commit is contained in:
Chris Lu
2026-06-11 20:26:20 -07:00
committed by GitHub
parent b44cf51fe9
commit 34f9b91d69
10 changed files with 407 additions and 71 deletions
+23 -8
View File
@@ -594,12 +594,16 @@ func verifyEcShardsBeforeDelete(commandEnv *CommandEnv, volumeIds []needle.Volum
// volume-server heartbeats, so freshly distributed shards may not all be
// visible in the master topology immediately. Poll a few times before
// concluding the shard set is incomplete, so a heartbeat-propagation lag is
// not mistaken for missing data (which would abort the encode). Genuine loss
// still fails after the retries are exhausted.
// not mistaken for missing data. After the retries: a volume below the
// recoverable threshold (dataShards) aborts the deletion; a recoverable
// but degraded set proceeds with a warning, since the missing shards can
// be rebuilt from the survivors while keeping the source next to live
// shards is the more dangerous mixed state.
const maxAttempts = 10
const retryInterval = 2 * time.Second
var lastErr error
var lastDegraded []string
for attempt := 0; attempt < maxAttempts; attempt++ {
topoInfo, _, err := collectTopologyInfo(commandEnv, 0)
if err != nil {
@@ -607,6 +611,7 @@ func verifyEcShardsBeforeDelete(commandEnv *CommandEnv, volumeIds []needle.Volum
}
lastErr = nil
lastDegraded = lastDegraded[:0]
for _, vid := range volumeIds {
nodeShards, _ := collectEcNodeShardsInfo(topoInfo, vid, diskType)
@@ -616,7 +621,8 @@ func verifyEcShardsBeforeDelete(commandEnv *CommandEnv, volumeIds []needle.Volum
}
totalShards := erasure_coding.TotalShardsCount
if err := erasure_coding.RequireFullShardSet(uint32(vid), union, totalShards); err != nil {
degraded, err := erasure_coding.RequireRecoverableShardSet(uint32(vid), union, erasure_coding.DataShardsCount, totalShards)
if err != nil {
summary := make([]string, 0, len(nodeShards))
for node, info := range nodeShards {
summary = append(summary, fmt.Sprintf("%s=%s", node, info.String()))
@@ -625,23 +631,32 @@ func verifyEcShardsBeforeDelete(commandEnv *CommandEnv, volumeIds []needle.Volum
lastErr = fmt.Errorf("volume %d: %w (observed: %v)", vid, err, summary)
break
}
if degraded {
lastDegraded = append(lastDegraded, fmt.Sprintf("volume %d: %d/%d shards", vid, union.Count(), totalShards))
continue
}
glog.V(0).Infof("EC shard verification ok for volume %d on diskType %q: %d/%d shards present across %d nodes",
vid, diskType.ReadableString(), union.Count(), totalShards, len(nodeShards))
}
if lastErr == nil {
if lastErr == nil && len(lastDegraded) == 0 {
return nil
}
if attempt < maxAttempts-1 {
glog.V(0).Infof("EC shard verification incomplete (attempt %d/%d), waiting for shard locations to propagate: %v",
attempt+1, maxAttempts, lastErr)
glog.V(0).Infof("EC shard verification incomplete (attempt %d/%d), waiting for shard locations to propagate: %v %v",
attempt+1, maxAttempts, lastErr, lastDegraded)
time.Sleep(retryInterval)
}
}
glog.Errorf("EC shard verification failed after %d attempts: %v", maxAttempts, lastErr)
return lastErr
if lastErr != nil {
glog.Errorf("EC shard verification failed after %d attempts: %v", maxAttempts, lastErr)
return lastErr
}
glog.Warningf("EC shard set incomplete but recoverable after %d attempts, proceeding with source deletion (rebuild missing shards with ec.rebuild): %v",
maxAttempts, lastDegraded)
return nil
}
// doDeleteVolumesWithLocations deletes volumes using pre-collected location information
+8 -6
View File
@@ -215,6 +215,14 @@ func (l *DiskLocation) loadExistingVolume(dirEntry os.DirEntry, needleMapKind Ne
return false
}
// Sweep a leftover empty .dat stub before any EC presence checks below.
// It must go first: next to an .ecx it would otherwise make
// validateEcVolume mistake a healthy distributed EC volume for an
// interrupted local encode and delete its shards.
if l.removeEmptyEcDatStub(volumeName, vid, collection) {
return false
}
// .vif next to .ecx is EC shard metadata, not a regular volume.
// Without this guard NewVolume below would create a phantom empty .dat.
if strings.HasSuffix(basename, ".vif") && l.hasEcxFile(volumeName) {
@@ -256,12 +264,6 @@ func (l *DiskLocation) loadExistingVolume(dirEntry os.DirEntry, needleMapKind Ne
return true
}
// Sweep a leftover empty .dat stub (a phantom from the pre-fix loader)
// before it loads as a phantom volume.
if l.removeEmptyEcDatStub(volumeName, vid, collection) {
return false
}
// Load existing data only; never let NewVolume create a phantom .dat. A
// lone .vif/.idx (e.g. an EC sidecar whose .ecx is on a sibling disk,
// which the same-disk hasEcxFile() guard misses) would otherwise get an
+16 -7
View File
@@ -13,6 +13,7 @@ import (
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
"github.com/seaweedfs/seaweedfs/weed/storage/super_block"
"github.com/seaweedfs/seaweedfs/weed/storage/volume_info"
)
@@ -388,12 +389,14 @@ func (l *DiskLocation) handleFoundEcxFile(shards []string, collection string, vo
}
}
// checkDatFileExists checks if .dat file exists with robust error handling.
// Unexpected errors (permission, I/O) are treated as "exists" to avoid misclassifying
// local EC as distributed EC, which is the safer fallback.
// checkDatFileExists checks if a .dat file with actual data exists with robust
// error handling. An empty .dat (<= a superblock, zero needles) is a leftover
// stub, not an encode source, and is treated as absent so it never justifies
// deleting shards. Unexpected errors (permission, I/O) are treated as "exists"
// to avoid misclassifying local EC as distributed EC, which is the safer fallback.
func (l *DiskLocation) checkDatFileExists(datFileName string) bool {
if _, err := os.Stat(datFileName); err == nil {
return true
if fi, err := os.Stat(datFileName); err == nil {
return fi.Size() > int64(super_block.SuperBlockSize)
} else if !os.IsNotExist(err) {
glog.Warningf("Failed to stat .dat file %s: %v", datFileName, err)
// Safer to assume local .dat exists to avoid misclassifying as distributed EC
@@ -477,9 +480,15 @@ func (l *DiskLocation) validateEcVolume(collection string, vid needle.VolumeId)
dataShards := l.ecDataShardsFromVif(collection, vid)
// If .dat file exists, compute exact expected shard size from it.
// An empty .dat (<= a superblock, zero needles) cannot be the encode
// source -- it is a leftover stub -- so treat it as absent rather than
// letting it mark a healthy distributed EC volume as an interrupted
// local encode, which would delete its shards.
if datFileInfo, err := os.Stat(datFileName); err == nil {
datExists = true
expectedShardSize = calculateExpectedShardSize(datFileInfo.Size(), dataShards)
if datFileInfo.Size() > int64(super_block.SuperBlockSize) {
datExists = true
expectedShardSize = calculateExpectedShardSize(datFileInfo.Size(), dataShards)
}
} else if !os.IsNotExist(err) {
// If stat fails with unexpected error (permission, I/O), fail validation
// Don't treat this as "distributed EC" - it could be a temporary error
+23 -9
View File
@@ -19,7 +19,7 @@ type ServerShardInventory struct {
// Query errors are recorded per-server and treated as zero shards rather
// than aborting the scan, so the caller still sees partial coverage from
// healthy peers when one server is down. The caller gates destructive
// actions on RequireFullShardSet against the returned union.
// actions on RequireRecoverableShardSet against the returned union.
func VerifyShardsAcrossServers(ctx context.Context, volumeID uint32,
servers []string, dialOption grpc.DialOption) (
union ShardBits, perServer map[string]ServerShardInventory) {
@@ -63,14 +63,25 @@ func VerifyShardsAcrossServers(ctx context.Context, volumeID uint32,
return union, perServer
}
// totalShards is the configured DataShards+ParityShards for this volume.
// Passed as a parameter (not derived from TotalShardsCount) so enterprise
// builds with custom EC ratios share this helper verbatim.
func RequireFullShardSet(volumeID uint32, shardsPresent ShardBits, totalShards int) error {
// RequireRecoverableShardSet gates source-volume deletion after EC encode:
// a non-empty .dat may only be deleted when enough distinct shards exist to
// reconstruct the volume (>= dataShards). A full set returns (false, nil); a
// degraded-but-recoverable set returns (true, nil) so the caller can warn and
// proceed -- the missing shards can be rebuilt from the survivors, while
// keeping the source next to live shards is the more dangerous mixed state.
// Below dataShards it returns an error and the source must be kept.
// dataShards/totalShards are passed as parameters (not derived from the
// package constants) so enterprise builds with custom EC ratios share this
// helper verbatim.
func RequireRecoverableShardSet(volumeID uint32, shardsPresent ShardBits, dataShards, totalShards int) (degraded bool, err error) {
if totalShards <= 0 || totalShards > MaxShardCount {
return fmt.Errorf("invalid totalShards %d for volume %d (must be in [1, %d])",
return false, fmt.Errorf("invalid totalShards %d for volume %d (must be in [1, %d])",
totalShards, volumeID, MaxShardCount)
}
if dataShards <= 0 || dataShards > totalShards {
return false, fmt.Errorf("invalid dataShards %d for volume %d (must be in [1, %d])",
dataShards, volumeID, totalShards)
}
var missing []int
for id := 0; id < totalShards; id++ {
if !shardsPresent.Has(ShardId(id)) {
@@ -78,11 +89,14 @@ func RequireFullShardSet(volumeID uint32, shardsPresent ShardBits, totalShards i
}
}
if len(missing) == 0 {
return nil
return false, nil
}
if totalShards-len(missing) >= dataShards {
return true, nil
}
sort.Ints(missing)
return fmt.Errorf("EC shard set incomplete for volume %d: %d/%d shards present, missing shard ids %v",
volumeID, shardsPresent.Count(), totalShards, missing)
return false, fmt.Errorf("EC shard set unrecoverable for volume %d: %d/%d shards present, need %d to reconstruct, missing shard ids %v",
volumeID, totalShards-len(missing), totalShards, dataShards, missing)
}
func SummarizeShardInventory(perServer map[string]ServerShardInventory) string {
@@ -5,17 +5,23 @@ import (
"testing"
)
func TestRequireFullShardSet_AllPresent(t *testing.T) {
func TestRequireRecoverableShardSet_AllPresent(t *testing.T) {
var bits ShardBits
for id := 0; id < TotalShardsCount; id++ {
bits = bits.Set(ShardId(id))
}
if err := RequireFullShardSet(42, bits, TotalShardsCount); err != nil {
degraded, err := RequireRecoverableShardSet(42, bits, DataShardsCount, TotalShardsCount)
if err != nil {
t.Fatalf("unexpected error for full set: %v", err)
}
if degraded {
t.Error("full set must not be reported as degraded")
}
}
func TestRequireFullShardSet_ReportsMissingIds(t *testing.T) {
func TestRequireRecoverableShardSet_DegradedButRecoverable(t *testing.T) {
// 12 of 14 shards: enough to reconstruct (>= 10), so the source may be
// deleted, but the caller is told to warn and schedule a rebuild.
var bits ShardBits
for id := 0; id < TotalShardsCount; id++ {
if id == 3 || id == 7 {
@@ -23,24 +29,39 @@ func TestRequireFullShardSet_ReportsMissingIds(t *testing.T) {
}
bits = bits.Set(ShardId(id))
}
err := RequireFullShardSet(42, bits, TotalShardsCount)
degraded, err := RequireRecoverableShardSet(42, bits, DataShardsCount, TotalShardsCount)
if err != nil {
t.Fatalf("recoverable set must not error: %v", err)
}
if !degraded {
t.Error("missing shards must be reported as degraded")
}
}
func TestRequireRecoverableShardSet_BelowDataShards(t *testing.T) {
// 9 of 14 shards: one short of reconstructable; the source must be kept.
var bits ShardBits
for id := 0; id < DataShardsCount-1; id++ {
bits = bits.Set(ShardId(id))
}
_, err := RequireRecoverableShardSet(42, bits, DataShardsCount, TotalShardsCount)
if err == nil {
t.Fatal("expected error for incomplete set, got nil")
t.Fatal("expected error for unrecoverable set, got nil")
}
msg := err.Error()
if !strings.Contains(msg, "volume 42") {
t.Errorf("error should name the volume id: %s", msg)
}
if !strings.Contains(msg, "[3 7]") {
t.Errorf("error should list missing ids 3 and 7: %s", msg)
if !strings.Contains(msg, "9/14") {
t.Errorf("error should report 9/14 shards present: %s", msg)
}
if !strings.Contains(msg, "12/14") {
t.Errorf("error should report 12/14 shards present: %s", msg)
if !strings.Contains(msg, "[9 10 11 12 13]") {
t.Errorf("error should list the missing ids: %s", msg)
}
}
func TestRequireFullShardSet_EmptyBitmap(t *testing.T) {
err := RequireFullShardSet(1, 0, TotalShardsCount)
func TestRequireRecoverableShardSet_EmptyBitmap(t *testing.T) {
_, err := RequireRecoverableShardSet(1, 0, DataShardsCount, TotalShardsCount)
if err == nil {
t.Fatal("expected error for empty bitmap")
}
@@ -49,37 +70,49 @@ func TestRequireFullShardSet_EmptyBitmap(t *testing.T) {
}
}
func TestRequireFullShardSet_CustomRatio(t *testing.T) {
func TestRequireRecoverableShardSet_CustomRatio(t *testing.T) {
// 6+3 ratio: total=9, all present
var bits ShardBits
for id := 0; id < 9; id++ {
bits = bits.Set(ShardId(id))
}
if err := RequireFullShardSet(7, bits, 9); err != nil {
t.Fatalf("unexpected error for full 6+3 set: %v", err)
if degraded, err := RequireRecoverableShardSet(7, bits, 6, 9); err != nil || degraded {
t.Fatalf("full 6+3 set: degraded=%v err=%v", degraded, err)
}
// 6+3, missing shard 5
// 6+3, missing shard 5: 8 >= 6 remain, recoverable but degraded
bits = bits.Clear(5)
err := RequireFullShardSet(7, bits, 9)
if degraded, err := RequireRecoverableShardSet(7, bits, 6, 9); err != nil || !degraded {
t.Fatalf("8/9 shards of 6+3: degraded=%v err=%v", degraded, err)
}
// 6+3, only 5 shards left: below dataShards, must error
bits = ShardBits(0)
for id := 0; id < 5; id++ {
bits = bits.Set(ShardId(id))
}
_, err := RequireRecoverableShardSet(7, bits, 6, 9)
if err == nil {
t.Fatal("expected error when shard 5 is missing in 6+3 ratio")
t.Fatal("expected error with 5/9 shards in 6+3 ratio")
}
if !strings.Contains(err.Error(), "8/9") {
t.Errorf("error should report 8/9: %s", err.Error())
}
if !strings.Contains(err.Error(), "[5]") {
t.Errorf("error should list missing id 5: %s", err.Error())
if !strings.Contains(err.Error(), "5/9") {
t.Errorf("error should report 5/9: %s", err.Error())
}
}
func TestRequireFullShardSet_RejectsInvalidTotal(t *testing.T) {
if err := RequireFullShardSet(1, 0, 0); err == nil {
func TestRequireRecoverableShardSet_RejectsInvalidParams(t *testing.T) {
if _, err := RequireRecoverableShardSet(1, 0, DataShardsCount, 0); err == nil {
t.Error("expected error for totalShards=0")
}
if err := RequireFullShardSet(1, 0, MaxShardCount+1); err == nil {
if _, err := RequireRecoverableShardSet(1, 0, DataShardsCount, MaxShardCount+1); err == nil {
t.Errorf("expected error for totalShards > MaxShardCount")
}
if _, err := RequireRecoverableShardSet(1, 0, 0, TotalShardsCount); err == nil {
t.Error("expected error for dataShards=0")
}
if _, err := RequireRecoverableShardSet(1, 0, TotalShardsCount+1, TotalShardsCount); err == nil {
t.Error("expected error for dataShards > totalShards")
}
}
func TestSummarizeShardInventory_Deterministic(t *testing.T) {
+139
View File
@@ -244,3 +244,142 @@ func TestRemoveEmptyEcDatStubFindsVifInIdxDir(t *testing.T) {
t.Error(".dat stub was not removed")
}
}
// startEcTestStore starts a single-disk store over dir and drains its
// notification channels until cleanup.
func startEcTestStore(t *testing.T, dir string) *Store {
t.Helper()
store := NewStore(nil, "localhost", 8080, 18080, "http://localhost:8080", "store-id",
[]string{dir},
[]int32{100},
[]util.MinFreeSpace{{}},
"",
NeedleMapInMemory,
[]types.DiskType{types.HardDriveType},
nil,
3,
stats.DefaultDiskIOProbeConfig(),
)
done := make(chan struct{})
go func() {
for {
select {
case <-store.NewVolumesChan:
case <-store.NewEcShardsChan:
case <-store.DeletedVolumesChan:
case <-store.DeletedEcShardsChan:
case <-store.StateUpdateChan:
case <-done:
return
}
}
}()
t.Cleanup(func() {
store.Close()
close(done)
})
return store
}
// writeEcShardFixture lays down .ecx, .ecj, and the given shards for a
// distributed EC volume in dir, each shard truncated to shardSize.
func writeEcShardFixture(t *testing.T, base string, shardIds []int, shardSize int64) {
t.Helper()
for _, sid := range shardIds {
f, err := os.Create(base + erasure_coding.ToExt(sid))
if err != nil {
t.Fatalf("create shard %d: %v", sid, err)
}
if err := f.Truncate(shardSize); err != nil {
f.Close()
t.Fatalf("truncate shard %d: %v", sid, err)
}
if err := f.Close(); err != nil {
t.Fatalf("close shard %d: %v", sid, err)
}
}
if err := os.WriteFile(base+".ecx", make([]byte, 20), 0o644); err != nil {
t.Fatalf("write .ecx: %v", err)
}
if err := os.WriteFile(base+".ecj", nil, 0o644); err != nil {
t.Fatalf("write .ecj: %v", err)
}
}
// TestEmptyDatStubNextToEcxDoesNotDeleteShards: a disk holding a few local
// shards of a healthy distributed EC volume plus a leftover empty .dat stub.
// The stub used to make startup classify the volume as an interrupted local
// encode (fewer than dataShards local shards) and delete the only copies of
// those shards. The stub must be swept and the shards must load.
func TestEmptyDatStubNextToEcxDoesNotDeleteShards(t *testing.T) {
dir := t.TempDir()
collection := "warp-rec"
vid := needle.VolumeId(87)
base := erasure_coding.EcShardFileName(collection, dir, int(vid))
shardIds := []int{0, 5}
writeEcShardFixture(t, base, shardIds, int64(erasure_coding.ErasureCodingSmallBlockSize))
// Zero-byte stub .dat: the phantom left by the pre-fix loader.
if err := os.WriteFile(base+".dat", nil, 0o644); err != nil {
t.Fatalf("write stub .dat: %v", err)
}
if err := volume_info.SaveVolumeInfo(base+".vif", &volume_server_pb.VolumeInfo{
Version: uint32(needle.Version3),
EcShardConfig: &volume_server_pb.EcShardConfig{DataShards: 10, ParityShards: 4},
}); err != nil {
t.Fatalf("save .vif: %v", err)
}
store := startEcTestStore(t, dir)
loc := store.Locations[0]
for _, sid := range shardIds {
if !util.FileExists(base + erasure_coding.ToExt(sid)) {
t.Errorf("EC shard file %d was deleted", sid)
}
if _, found := loc.FindEcShard(vid, erasure_coding.ShardId(sid)); !found {
t.Errorf("EC shard %d was not loaded", sid)
}
}
if !util.FileExists(base + ".ecx") {
t.Error(".ecx was deleted")
}
if util.FileExists(base + ".dat") {
t.Error("empty .dat stub was not swept")
}
if store.findVolume(vid) != nil {
t.Errorf("stub was loaded as a phantom volume %d", vid)
}
}
// TestEmptyDatWithoutVifDoesNotDeleteShards: same shard-holding disk but the
// stub has no .vif at all, so the sweep has no EC evidence and must leave it.
// The empty .dat still must not count as an encode source: the shards survive
// and load as a distributed EC volume.
func TestEmptyDatWithoutVifDoesNotDeleteShards(t *testing.T) {
dir := t.TempDir()
collection := "warp-rec"
vid := needle.VolumeId(88)
base := erasure_coding.EcShardFileName(collection, dir, int(vid))
shardIds := []int{1, 7}
writeEcShardFixture(t, base, shardIds, int64(erasure_coding.ErasureCodingSmallBlockSize))
if err := os.WriteFile(base+".dat", nil, 0o644); err != nil {
t.Fatalf("write stub .dat: %v", err)
}
store := startEcTestStore(t, dir)
loc := store.Locations[0]
for _, sid := range shardIds {
if !util.FileExists(base + erasure_coding.ToExt(sid)) {
t.Errorf("EC shard file %d was deleted", sid)
}
if _, found := loc.FindEcShard(vid, erasure_coding.ShardId(sid)); !found {
t.Errorf("EC shard %d was not loaded", sid)
}
}
if store.findVolume(vid) != nil {
t.Errorf("stub was loaded as a phantom volume %d", vid)
}
}
+13 -1
View File
@@ -708,7 +708,8 @@ func (t *ErasureCodingTask) verifyEcShardsBeforeDelete(ctx context.Context) erro
"per_server": summary,
}).Info("EC shard inventory before source deletion")
if err := erasure_coding.RequireFullShardSet(t.volumeID, union, totalShards); err != nil {
degraded, err := erasure_coding.RequireRecoverableShardSet(t.volumeID, union, int(t.dataShards), totalShards)
if err != nil {
t.GetLogger().WithFields(map[string]interface{}{
"volume_id": t.volumeID,
"per_server": summary,
@@ -716,6 +717,17 @@ func (t *ErasureCodingTask) verifyEcShardsBeforeDelete(ctx context.Context) erro
}).Error("EC shard verification failed — source volume will be kept")
return err
}
if degraded {
// Enough shards to reconstruct; the missing ones can be rebuilt from
// the survivors, while keeping the source next to live shards is the
// more dangerous mixed state.
t.GetLogger().WithFields(map[string]interface{}{
"volume_id": t.volumeID,
"shards_seen": union.Count(),
"shards_total": totalShards,
"per_server": summary,
}).Warning("EC shard set incomplete but recoverable; proceeding with source deletion")
}
return nil
}