fix(storage): never let an empty .dat delete healthy distributed EC shards (#9930)

* fix(storage): never let an empty .dat delete healthy distributed EC shards A leftover empty .dat stub (a phantom from the pre-fix loader; zero needles) next to a distributed EC volume's local shards made startup classify the volume as an interrupted local encode: validateEcVolume requires >= dataShards local shards when a .dat is present, fails with the 1-2 shards a distributed volume keeps per disk, and the cleanup deletes those shards -- the only copies of that part of the volume. Repeated across restart waves this destroys enough shards cluster-wide to make the volume unrecoverable. Go: - loadExistingVolume: hoist the empty-stub sweep above the EC presence checks. Previously the .vif-next-to-.ecx guard returned before the sweep ever ran, so exactly the dangerous layout (stub + .ecx + local shards) kept its stub and then lost its shards in loadAllEcShards. - validateEcVolume / checkDatFileExists: treat a .dat <= a superblock (zero needles) as absent. An empty .dat cannot be the encode source, so it must never gate shard deletion; this also covers stubs without a .vif, which the sweep cannot prove are EC leftovers. Rust mirror (seaweed-volume): the same gate in validate_ec_volume and check_dat_file_exists (the Rust sweep already ran before validation); the volume-load skip keeps a plain existence check so fresh, needle-less volumes still load. Regression tests in Go and Rust reproduce the production layout (a zero-byte .dat beside .ecx/.ecj and two shards of a 10+4 volume, with and without a .vif) and fail without the fix with the shards deleted. * fix(ec): gate source volume deletion on a recoverable shard set After EC encode, the shell command and the (plugin) worker task refused to delete the source volume unless every shard was present, and aborted otherwise -- leaving the source .dat next to live shards, exactly the mixed state the startup cleanup mishandles. Replace the full-set requirement with a recoverability gate shared by both callers (RequireRecoverableShardSet): deleting a non-empty source .dat requires at least dataShards distinct shards cluster-wide. Below that the source is kept and the encode fails as before. A degraded but recoverable set (>= dataShards, < total) now proceeds with a warning instead of aborting: the missing shards can be rebuilt from the survivors, while keeping the source would preserve the dangerous mixed state. Empty stub replicas are still swept unguarded (OnlyEmpty) -- an empty .dat has nothing to lose. dataShards/totalShards stay parameters so enterprise custom EC ratios share the helper verbatim. * test(ec): use recoverable shard verification gate
2026-06-13 23:36:45 +03:00 · 2026-06-11 20:26:20 -07:00
parent b44cf51fe9
commit 34f9b91d69
10 changed files with 407 additions and 71 deletions
@@ -594,12 +594,16 @@ func verifyEcShardsBeforeDelete(commandEnv *CommandEnv, volumeIds []needle.Volum
 	// volume-server heartbeats, so freshly distributed shards may not all be
 	// visible in the master topology immediately. Poll a few times before
 	// concluding the shard set is incomplete, so a heartbeat-propagation lag is
-	// not mistaken for missing data (which would abort the encode). Genuine loss
-	// still fails after the retries are exhausted.
+	// not mistaken for missing data. After the retries: a volume below the
+	// recoverable threshold (dataShards) aborts the deletion; a recoverable
+	// but degraded set proceeds with a warning, since the missing shards can
+	// be rebuilt from the survivors while keeping the source next to live
+	// shards is the more dangerous mixed state.
 	const maxAttempts = 10
 	const retryInterval = 2 * time.Second

 	var lastErr error
+	var lastDegraded []string
 	for attempt := 0; attempt < maxAttempts; attempt++ {
 		topoInfo, _, err := collectTopologyInfo(commandEnv, 0)
 		if err != nil {
@@ -607,6 +611,7 @@ func verifyEcShardsBeforeDelete(commandEnv *CommandEnv, volumeIds []needle.Volum
 		}

 		lastErr = nil
+		lastDegraded = lastDegraded[:0]
 		for _, vid := range volumeIds {
 			nodeShards, _ := collectEcNodeShardsInfo(topoInfo, vid, diskType)

@@ -616,7 +621,8 @@ func verifyEcShardsBeforeDelete(commandEnv *CommandEnv, volumeIds []needle.Volum
 			}

 			totalShards := erasure_coding.TotalShardsCount
-			if err := erasure_coding.RequireFullShardSet(uint32(vid), union, totalShards); err != nil {
+			degraded, err := erasure_coding.RequireRecoverableShardSet(uint32(vid), union, erasure_coding.DataShardsCount, totalShards)
+			if err != nil {
 				summary := make([]string, 0, len(nodeShards))
 				for node, info := range nodeShards {
 					summary = append(summary, fmt.Sprintf("%s=%s", node, info.String()))
@@ -625,23 +631,32 @@ func verifyEcShardsBeforeDelete(commandEnv *CommandEnv, volumeIds []needle.Volum
 				lastErr = fmt.Errorf("volume %d: %w (observed: %v)", vid, err, summary)
 				break
 			}
+			if degraded {
+				lastDegraded = append(lastDegraded, fmt.Sprintf("volume %d: %d/%d shards", vid, union.Count(), totalShards))
+				continue
+			}

 			glog.V(0).Infof("EC shard verification ok for volume %d on diskType %q: %d/%d shards present across %d nodes",
 				vid, diskType.ReadableString(), union.Count(), totalShards, len(nodeShards))
 		}

-		if lastErr == nil {
+		if lastErr == nil && len(lastDegraded) == 0 {
 			return nil
 		}
 		if attempt < maxAttempts-1 {
-			glog.V(0).Infof("EC shard verification incomplete (attempt %d/%d), waiting for shard locations to propagate: %v",
-				attempt+1, maxAttempts, lastErr)
+			glog.V(0).Infof("EC shard verification incomplete (attempt %d/%d), waiting for shard locations to propagate: %v %v",
+				attempt+1, maxAttempts, lastErr, lastDegraded)
 			time.Sleep(retryInterval)
 		}
 	}

-	glog.Errorf("EC shard verification failed after %d attempts: %v", maxAttempts, lastErr)
-	return lastErr
+	if lastErr != nil {
+		glog.Errorf("EC shard verification failed after %d attempts: %v", maxAttempts, lastErr)
+		return lastErr
+	}
+	glog.Warningf("EC shard set incomplete but recoverable after %d attempts, proceeding with source deletion (rebuild missing shards with ec.rebuild): %v",
+		maxAttempts, lastDegraded)
+	return nil
 }

 // doDeleteVolumesWithLocations deletes volumes using pre-collected location information
@@ -215,6 +215,14 @@ func (l *DiskLocation) loadExistingVolume(dirEntry os.DirEntry, needleMapKind Ne
 		return false
 	}

+	// Sweep a leftover empty .dat stub before any EC presence checks below.
+	// It must go first: next to an .ecx it would otherwise make
+	// validateEcVolume mistake a healthy distributed EC volume for an
+	// interrupted local encode and delete its shards.
+	if l.removeEmptyEcDatStub(volumeName, vid, collection) {
+		return false
+	}
+
 	// .vif next to .ecx is EC shard metadata, not a regular volume.
 	// Without this guard NewVolume below would create a phantom empty .dat.
 	if strings.HasSuffix(basename, ".vif") && l.hasEcxFile(volumeName) {
@@ -256,12 +264,6 @@ func (l *DiskLocation) loadExistingVolume(dirEntry os.DirEntry, needleMapKind Ne
 		return true
 	}

-	// Sweep a leftover empty .dat stub (a phantom from the pre-fix loader)
-	// before it loads as a phantom volume.
-	if l.removeEmptyEcDatStub(volumeName, vid, collection) {
-		return false
-	}
-
 	// Load existing data only; never let NewVolume create a phantom .dat. A
 	// lone .vif/.idx (e.g. an EC sidecar whose .ecx is on a sibling disk,
 	// which the same-disk hasEcxFile() guard misses) would otherwise get an
@@ -13,6 +13,7 @@ import (
 	"github.com/seaweedfs/seaweedfs/weed/glog"
 	"github.com/seaweedfs/seaweedfs/weed/storage/erasure_coding"
 	"github.com/seaweedfs/seaweedfs/weed/storage/needle"
+	"github.com/seaweedfs/seaweedfs/weed/storage/super_block"
 	"github.com/seaweedfs/seaweedfs/weed/storage/volume_info"
 )

@@ -388,12 +389,14 @@ func (l *DiskLocation) handleFoundEcxFile(shards []string, collection string, vo
 	}
 }

-// checkDatFileExists checks if .dat file exists with robust error handling.
-// Unexpected errors (permission, I/O) are treated as "exists" to avoid misclassifying
-// local EC as distributed EC, which is the safer fallback.
+// checkDatFileExists checks if a .dat file with actual data exists with robust
+// error handling. An empty .dat (<= a superblock, zero needles) is a leftover
+// stub, not an encode source, and is treated as absent so it never justifies
+// deleting shards. Unexpected errors (permission, I/O) are treated as "exists"
+// to avoid misclassifying local EC as distributed EC, which is the safer fallback.
 func (l *DiskLocation) checkDatFileExists(datFileName string) bool {
-	if _, err := os.Stat(datFileName); err == nil {
-		return true
+	if fi, err := os.Stat(datFileName); err == nil {
+		return fi.Size() > int64(super_block.SuperBlockSize)
 	} else if !os.IsNotExist(err) {
 		glog.Warningf("Failed to stat .dat file %s: %v", datFileName, err)
 		// Safer to assume local .dat exists to avoid misclassifying as distributed EC
@@ -477,9 +480,15 @@ func (l *DiskLocation) validateEcVolume(collection string, vid needle.VolumeId)
 	dataShards := l.ecDataShardsFromVif(collection, vid)

 	// If .dat file exists, compute exact expected shard size from it.
+	// An empty .dat (<= a superblock, zero needles) cannot be the encode
+	// source -- it is a leftover stub -- so treat it as absent rather than
+	// letting it mark a healthy distributed EC volume as an interrupted
+	// local encode, which would delete its shards.
 	if datFileInfo, err := os.Stat(datFileName); err == nil {
-		datExists = true
-		expectedShardSize = calculateExpectedShardSize(datFileInfo.Size(), dataShards)
+		if datFileInfo.Size() > int64(super_block.SuperBlockSize) {
+			datExists = true
+			expectedShardSize = calculateExpectedShardSize(datFileInfo.Size(), dataShards)
+		}
 	} else if !os.IsNotExist(err) {
 		// If stat fails with unexpected error (permission, I/O), fail validation
 		// Don't treat this as "distributed EC" - it could be a temporary error
@@ -19,7 +19,7 @@ type ServerShardInventory struct {
 // Query errors are recorded per-server and treated as zero shards rather
 // than aborting the scan, so the caller still sees partial coverage from
 // healthy peers when one server is down. The caller gates destructive
-// actions on RequireFullShardSet against the returned union.
+// actions on RequireRecoverableShardSet against the returned union.
 func VerifyShardsAcrossServers(ctx context.Context, volumeID uint32,
 	servers []string, dialOption grpc.DialOption) (
 	union ShardBits, perServer map[string]ServerShardInventory) {
@@ -63,14 +63,25 @@ func VerifyShardsAcrossServers(ctx context.Context, volumeID uint32,
 	return union, perServer
 }

-// totalShards is the configured DataShards+ParityShards for this volume.
-// Passed as a parameter (not derived from TotalShardsCount) so enterprise
-// builds with custom EC ratios share this helper verbatim.
-func RequireFullShardSet(volumeID uint32, shardsPresent ShardBits, totalShards int) error {
+// RequireRecoverableShardSet gates source-volume deletion after EC encode:
+// a non-empty .dat may only be deleted when enough distinct shards exist to
+// reconstruct the volume (>= dataShards). A full set returns (false, nil); a
+// degraded-but-recoverable set returns (true, nil) so the caller can warn and
+// proceed -- the missing shards can be rebuilt from the survivors, while
+// keeping the source next to live shards is the more dangerous mixed state.
+// Below dataShards it returns an error and the source must be kept.
+// dataShards/totalShards are passed as parameters (not derived from the
+// package constants) so enterprise builds with custom EC ratios share this
+// helper verbatim.
+func RequireRecoverableShardSet(volumeID uint32, shardsPresent ShardBits, dataShards, totalShards int) (degraded bool, err error) {
 	if totalShards <= 0 || totalShards > MaxShardCount {
-		return fmt.Errorf("invalid totalShards %d for volume %d (must be in [1, %d])",
+		return false, fmt.Errorf("invalid totalShards %d for volume %d (must be in [1, %d])",
 			totalShards, volumeID, MaxShardCount)
 	}
+	if dataShards <= 0 || dataShards > totalShards {
+		return false, fmt.Errorf("invalid dataShards %d for volume %d (must be in [1, %d])",
+			dataShards, volumeID, totalShards)
+	}
 	var missing []int
 	for id := 0; id < totalShards; id++ {
 		if !shardsPresent.Has(ShardId(id)) {
@@ -78,11 +89,14 @@ func RequireFullShardSet(volumeID uint32, shardsPresent ShardBits, totalShards i
 		}
 	}
 	if len(missing) == 0 {
-		return nil
+		return false, nil
+	}
+	if totalShards-len(missing) >= dataShards {
+		return true, nil
 	}
 	sort.Ints(missing)
-	return fmt.Errorf("EC shard set incomplete for volume %d: %d/%d shards present, missing shard ids %v",
-		volumeID, shardsPresent.Count(), totalShards, missing)
+	return false, fmt.Errorf("EC shard set unrecoverable for volume %d: %d/%d shards present, need %d to reconstruct, missing shard ids %v",
+		volumeID, totalShards-len(missing), totalShards, dataShards, missing)
 }

 func SummarizeShardInventory(perServer map[string]ServerShardInventory) string {
@@ -5,17 +5,23 @@ import (
 	"testing"
 )

-func TestRequireFullShardSet_AllPresent(t *testing.T) {
+func TestRequireRecoverableShardSet_AllPresent(t *testing.T) {
 	var bits ShardBits
 	for id := 0; id < TotalShardsCount; id++ {
 		bits = bits.Set(ShardId(id))
 	}
-	if err := RequireFullShardSet(42, bits, TotalShardsCount); err != nil {
+	degraded, err := RequireRecoverableShardSet(42, bits, DataShardsCount, TotalShardsCount)
+	if err != nil {
 		t.Fatalf("unexpected error for full set: %v", err)
 	}
+	if degraded {
+		t.Error("full set must not be reported as degraded")
+	}
 }

-func TestRequireFullShardSet_ReportsMissingIds(t *testing.T) {
+func TestRequireRecoverableShardSet_DegradedButRecoverable(t *testing.T) {
+	// 12 of 14 shards: enough to reconstruct (>= 10), so the source may be
+	// deleted, but the caller is told to warn and schedule a rebuild.
 	var bits ShardBits
 	for id := 0; id < TotalShardsCount; id++ {
 		if id == 3 || id == 7 {
@@ -23,24 +29,39 @@ func TestRequireFullShardSet_ReportsMissingIds(t *testing.T) {
 		}
 		bits = bits.Set(ShardId(id))
 	}
-	err := RequireFullShardSet(42, bits, TotalShardsCount)
+	degraded, err := RequireRecoverableShardSet(42, bits, DataShardsCount, TotalShardsCount)
+	if err != nil {
+		t.Fatalf("recoverable set must not error: %v", err)
+	}
+	if !degraded {
+		t.Error("missing shards must be reported as degraded")
+	}
+}
+
+func TestRequireRecoverableShardSet_BelowDataShards(t *testing.T) {
+	// 9 of 14 shards: one short of reconstructable; the source must be kept.
+	var bits ShardBits
+	for id := 0; id < DataShardsCount-1; id++ {
+		bits = bits.Set(ShardId(id))
+	}
+	_, err := RequireRecoverableShardSet(42, bits, DataShardsCount, TotalShardsCount)
 	if err == nil {
-		t.Fatal("expected error for incomplete set, got nil")
+		t.Fatal("expected error for unrecoverable set, got nil")
 	}
 	msg := err.Error()
 	if !strings.Contains(msg, "volume 42") {
 		t.Errorf("error should name the volume id: %s", msg)
 	}
-	if !strings.Contains(msg, "[3 7]") {
-		t.Errorf("error should list missing ids 3 and 7: %s", msg)
+	if !strings.Contains(msg, "9/14") {
+		t.Errorf("error should report 9/14 shards present: %s", msg)
 	}
-	if !strings.Contains(msg, "12/14") {
-		t.Errorf("error should report 12/14 shards present: %s", msg)
+	if !strings.Contains(msg, "[9 10 11 12 13]") {
+		t.Errorf("error should list the missing ids: %s", msg)
 	}
 }

-func TestRequireFullShardSet_EmptyBitmap(t *testing.T) {
-	err := RequireFullShardSet(1, 0, TotalShardsCount)
+func TestRequireRecoverableShardSet_EmptyBitmap(t *testing.T) {
+	_, err := RequireRecoverableShardSet(1, 0, DataShardsCount, TotalShardsCount)
 	if err == nil {
 		t.Fatal("expected error for empty bitmap")
 	}
@@ -49,37 +70,49 @@ func TestRequireFullShardSet_EmptyBitmap(t *testing.T) {
 	}
 }

-func TestRequireFullShardSet_CustomRatio(t *testing.T) {
+func TestRequireRecoverableShardSet_CustomRatio(t *testing.T) {
 	// 6+3 ratio: total=9, all present
 	var bits ShardBits
 	for id := 0; id < 9; id++ {
 		bits = bits.Set(ShardId(id))
 	}
-	if err := RequireFullShardSet(7, bits, 9); err != nil {
-		t.Fatalf("unexpected error for full 6+3 set: %v", err)
+	if degraded, err := RequireRecoverableShardSet(7, bits, 6, 9); err != nil || degraded {
+		t.Fatalf("full 6+3 set: degraded=%v err=%v", degraded, err)
 	}

-	// 6+3, missing shard 5
+	// 6+3, missing shard 5: 8 >= 6 remain, recoverable but degraded
 	bits = bits.Clear(5)
-	err := RequireFullShardSet(7, bits, 9)
+	if degraded, err := RequireRecoverableShardSet(7, bits, 6, 9); err != nil || !degraded {
+		t.Fatalf("8/9 shards of 6+3: degraded=%v err=%v", degraded, err)
+	}
+
+	// 6+3, only 5 shards left: below dataShards, must error
+	bits = ShardBits(0)
+	for id := 0; id < 5; id++ {
+		bits = bits.Set(ShardId(id))
+	}
+	_, err := RequireRecoverableShardSet(7, bits, 6, 9)
 	if err == nil {
-		t.Fatal("expected error when shard 5 is missing in 6+3 ratio")
+		t.Fatal("expected error with 5/9 shards in 6+3 ratio")
 	}
-	if !strings.Contains(err.Error(), "8/9") {
-		t.Errorf("error should report 8/9: %s", err.Error())
-	}
-	if !strings.Contains(err.Error(), "[5]") {
-		t.Errorf("error should list missing id 5: %s", err.Error())
+	if !strings.Contains(err.Error(), "5/9") {
+		t.Errorf("error should report 5/9: %s", err.Error())
 	}
 }

-func TestRequireFullShardSet_RejectsInvalidTotal(t *testing.T) {
-	if err := RequireFullShardSet(1, 0, 0); err == nil {
+func TestRequireRecoverableShardSet_RejectsInvalidParams(t *testing.T) {
+	if _, err := RequireRecoverableShardSet(1, 0, DataShardsCount, 0); err == nil {
 		t.Error("expected error for totalShards=0")
 	}
-	if err := RequireFullShardSet(1, 0, MaxShardCount+1); err == nil {
+	if _, err := RequireRecoverableShardSet(1, 0, DataShardsCount, MaxShardCount+1); err == nil {
 		t.Errorf("expected error for totalShards > MaxShardCount")
 	}
+	if _, err := RequireRecoverableShardSet(1, 0, 0, TotalShardsCount); err == nil {
+		t.Error("expected error for dataShards=0")
+	}
+	if _, err := RequireRecoverableShardSet(1, 0, TotalShardsCount+1, TotalShardsCount); err == nil {
+		t.Error("expected error for dataShards > totalShards")
+	}
 }

 func TestSummarizeShardInventory_Deterministic(t *testing.T) {
@@ -244,3 +244,142 @@ func TestRemoveEmptyEcDatStubFindsVifInIdxDir(t *testing.T) {
 		t.Error(".dat stub was not removed")
 	}
 }
+
+// startEcTestStore starts a single-disk store over dir and drains its
+// notification channels until cleanup.
+func startEcTestStore(t *testing.T, dir string) *Store {
+	t.Helper()
+	store := NewStore(nil, "localhost", 8080, 18080, "http://localhost:8080", "store-id",
+		[]string{dir},
+		[]int32{100},
+		[]util.MinFreeSpace{{}},
+		"",
+		NeedleMapInMemory,
+		[]types.DiskType{types.HardDriveType},
+		nil,
+		3,
+		stats.DefaultDiskIOProbeConfig(),
+	)
+	done := make(chan struct{})
+	go func() {
+		for {
+			select {
+			case <-store.NewVolumesChan:
+			case <-store.NewEcShardsChan:
+			case <-store.DeletedVolumesChan:
+			case <-store.DeletedEcShardsChan:
+			case <-store.StateUpdateChan:
+			case <-done:
+				return
+			}
+		}
+	}()
+	t.Cleanup(func() {
+		store.Close()
+		close(done)
+	})
+	return store
+}
+
+// writeEcShardFixture lays down .ecx, .ecj, and the given shards for a
+// distributed EC volume in dir, each shard truncated to shardSize.
+func writeEcShardFixture(t *testing.T, base string, shardIds []int, shardSize int64) {
+	t.Helper()
+	for _, sid := range shardIds {
+		f, err := os.Create(base + erasure_coding.ToExt(sid))
+		if err != nil {
+			t.Fatalf("create shard %d: %v", sid, err)
+		}
+		if err := f.Truncate(shardSize); err != nil {
+			f.Close()
+			t.Fatalf("truncate shard %d: %v", sid, err)
+		}
+		if err := f.Close(); err != nil {
+			t.Fatalf("close shard %d: %v", sid, err)
+		}
+	}
+	if err := os.WriteFile(base+".ecx", make([]byte, 20), 0o644); err != nil {
+		t.Fatalf("write .ecx: %v", err)
+	}
+	if err := os.WriteFile(base+".ecj", nil, 0o644); err != nil {
+		t.Fatalf("write .ecj: %v", err)
+	}
+}
+
+// TestEmptyDatStubNextToEcxDoesNotDeleteShards: a disk holding a few local
+// shards of a healthy distributed EC volume plus a leftover empty .dat stub.
+// The stub used to make startup classify the volume as an interrupted local
+// encode (fewer than dataShards local shards) and delete the only copies of
+// those shards. The stub must be swept and the shards must load.
+func TestEmptyDatStubNextToEcxDoesNotDeleteShards(t *testing.T) {
+	dir := t.TempDir()
+	collection := "warp-rec"
+	vid := needle.VolumeId(87)
+	base := erasure_coding.EcShardFileName(collection, dir, int(vid))
+
+	shardIds := []int{0, 5}
+	writeEcShardFixture(t, base, shardIds, int64(erasure_coding.ErasureCodingSmallBlockSize))
+	// Zero-byte stub .dat: the phantom left by the pre-fix loader.
+	if err := os.WriteFile(base+".dat", nil, 0o644); err != nil {
+		t.Fatalf("write stub .dat: %v", err)
+	}
+	if err := volume_info.SaveVolumeInfo(base+".vif", &volume_server_pb.VolumeInfo{
+		Version:       uint32(needle.Version3),
+		EcShardConfig: &volume_server_pb.EcShardConfig{DataShards: 10, ParityShards: 4},
+	}); err != nil {
+		t.Fatalf("save .vif: %v", err)
+	}
+
+	store := startEcTestStore(t, dir)
+
+	loc := store.Locations[0]
+	for _, sid := range shardIds {
+		if !util.FileExists(base + erasure_coding.ToExt(sid)) {
+			t.Errorf("EC shard file %d was deleted", sid)
+		}
+		if _, found := loc.FindEcShard(vid, erasure_coding.ShardId(sid)); !found {
+			t.Errorf("EC shard %d was not loaded", sid)
+		}
+	}
+	if !util.FileExists(base + ".ecx") {
+		t.Error(".ecx was deleted")
+	}
+	if util.FileExists(base + ".dat") {
+		t.Error("empty .dat stub was not swept")
+	}
+	if store.findVolume(vid) != nil {
+		t.Errorf("stub was loaded as a phantom volume %d", vid)
+	}
+}
+
+// TestEmptyDatWithoutVifDoesNotDeleteShards: same shard-holding disk but the
+// stub has no .vif at all, so the sweep has no EC evidence and must leave it.
+// The empty .dat still must not count as an encode source: the shards survive
+// and load as a distributed EC volume.
+func TestEmptyDatWithoutVifDoesNotDeleteShards(t *testing.T) {
+	dir := t.TempDir()
+	collection := "warp-rec"
+	vid := needle.VolumeId(88)
+	base := erasure_coding.EcShardFileName(collection, dir, int(vid))
+
+	shardIds := []int{1, 7}
+	writeEcShardFixture(t, base, shardIds, int64(erasure_coding.ErasureCodingSmallBlockSize))
+	if err := os.WriteFile(base+".dat", nil, 0o644); err != nil {
+		t.Fatalf("write stub .dat: %v", err)
+	}
+
+	store := startEcTestStore(t, dir)
+
+	loc := store.Locations[0]
+	for _, sid := range shardIds {
+		if !util.FileExists(base + erasure_coding.ToExt(sid)) {
+			t.Errorf("EC shard file %d was deleted", sid)
+		}
+		if _, found := loc.FindEcShard(vid, erasure_coding.ShardId(sid)); !found {
+			t.Errorf("EC shard %d was not loaded", sid)
+		}
+	}
+	if store.findVolume(vid) != nil {
+		t.Errorf("stub was loaded as a phantom volume %d", vid)
+	}
+}
@@ -708,7 +708,8 @@ func (t *ErasureCodingTask) verifyEcShardsBeforeDelete(ctx context.Context) erro
 		"per_server":    summary,
 	}).Info("EC shard inventory before source deletion")

-	if err := erasure_coding.RequireFullShardSet(t.volumeID, union, totalShards); err != nil {
+	degraded, err := erasure_coding.RequireRecoverableShardSet(t.volumeID, union, int(t.dataShards), totalShards)
+	if err != nil {
 		t.GetLogger().WithFields(map[string]interface{}{
 			"volume_id":  t.volumeID,
 			"per_server": summary,
@@ -716,6 +717,17 @@ func (t *ErasureCodingTask) verifyEcShardsBeforeDelete(ctx context.Context) erro
 		}).Error("EC shard verification failed — source volume will be kept")
 		return err
 	}
+	if degraded {
+		// Enough shards to reconstruct; the missing ones can be rebuilt from
+		// the survivors, while keeping the source next to live shards is the
+		// more dangerous mixed state.
+		t.GetLogger().WithFields(map[string]interface{}{
+			"volume_id":    t.volumeID,
+			"shards_seen":  union.Count(),
+			"shards_total": totalShards,
+			"per_server":   summary,
+		}).Warning("EC shard set incomplete but recoverable; proceeding with source deletion")
+	}
 	return nil
 }