diff --git a/seaweed-volume/src/storage/volume.rs b/seaweed-volume/src/storage/volume.rs index b01bf6b0a..a26509460 100644 --- a/seaweed-volume/src/storage/volume.rs +++ b/seaweed-volume/src/storage/volume.rs @@ -3592,6 +3592,57 @@ mod tests { assert!(matches!(err, VolumeError::Deleted)); } + // Guard the Rust integrity-check tombstone path against the Go regression + // where verifyDeletedNeedleIntegrity forwarded TombstoneFileSize into the + // needle-size check, mismatched against the on-disk Size=0 header, and + // sent every volume with a trailing deletion read-only on load. The Rust + // check guards its size comparison with !size.is_deleted(); this test + // keeps that guarantee from silently regressing. + #[test] + fn test_check_volume_data_integrity_with_deletion_tombstone() { + let tmp = TempDir::new().unwrap(); + let dir = tmp.path().to_str().unwrap(); + + { + let mut v = make_test_volume(dir); + for i in 1..=3 { + let data = format!("data {}", i); + let mut n = Needle { + id: NeedleId(i), + cookie: Cookie(i as u32), + data: data.as_bytes().to_vec(), + data_size: data.len() as u32, + ..Needle::default() + }; + v.write_needle(&mut n, true).unwrap(); + } + v.delete_needle(&mut Needle { + id: NeedleId(2), + cookie: Cookie(2), + ..Needle::default() + }) + .unwrap(); + v.sync_to_disk().unwrap(); + } + + let v = Volume::new( + dir, + dir, + "", + VolumeId(1), + NeedleMapKind::InMemory, + None, + None, + 0, + Version::current(), + ) + .unwrap(); + assert!( + !v.is_no_write_or_delete(), + "volume should not be read-only after reload with trailing deletion tombstone" + ); + } + #[test] fn test_volume_multiple_needles() { let tmp = TempDir::new().unwrap(); diff --git a/weed/storage/volume_checking.go b/weed/storage/volume_checking.go index 29d84f3a9..f398b82e2 100644 --- a/weed/storage/volume_checking.go +++ b/weed/storage/volume_checking.go @@ -154,8 +154,14 @@ func doCheckAndFixVolumeData(v *Volume, indexFile *os.File, indexOffset int64) ( return 0, nil } if size < 0 { - // read the deletion entry + // read the deletion entry. Pass io.EOF and ErrorSizeMismatch through + // unwrapped so CheckVolumeDataIntegrity can recognize them and run its + // trailing-truncation / wrap-around recovery loop, matching the live + // branch below. if lastAppendAtNs, err = verifyDeletedNeedleIntegrity(v.DataBackend, v.Version(), offset.ToActualOffset(), key); err != nil { + if err == io.EOF || err == ErrorSizeMismatch { + return lastAppendAtNs, err + } return lastAppendAtNs, fmt.Errorf("verifyDeletedNeedleIntegrity %s failed: %v", indexFile.Name(), err) } } else { @@ -243,8 +249,17 @@ func verifyNeedleIntegrity(datFile backend.BackendStorageFile, v needle.Version, func verifyDeletedNeedleIntegrity(datFile backend.BackendStorageFile, v needle.Version, offset int64, key types.NeedleId) (lastAppendAtNs uint64, err error) { n := new(needle.Needle) - size := types.TombstoneFileSize + // Tombstones are appended with DataSize=0, so the on-disk header carries + // Size=0. TombstoneFileSize (-1) lives only in the .idx; passing it to + // ReadData fails the size check and triggers the 32GB wrap-around retry, + // which reads past EOF and falsely marks the volume read-only. + size := types.Size(0) if err = n.ReadData(datFile, offset, size, v); err != nil { + // Preserve io.EOF and ErrorSizeMismatch as-is so CheckVolumeDataIntegrity + // can detect trailing truncation and trigger its wrap-around retry. + if err == io.EOF || err == ErrorSizeMismatch { + return n.AppendAtNs, err + } return n.AppendAtNs, fmt.Errorf("read data [%d,%d) : %v", offset, offset+needle.GetActualSize(size, v), err) } if n.Id != key { diff --git a/weed/storage/volume_checking_test.go b/weed/storage/volume_checking_test.go index 3da226d9d..2a57f3676 100644 --- a/weed/storage/volume_checking_test.go +++ b/weed/storage/volume_checking_test.go @@ -81,6 +81,58 @@ func TestScrubVolumeData(t *testing.T) { } } +// TestCheckVolumeDataIntegrityWithDeletionTombstone guards the size argument +// passed to ReadData when verifying a trailing deletion tombstone. The .idx +// entry carries TombstoneFileSize (-1) but the appended tombstone needle in +// .dat carries Size=0. Forwarding the .idx size into ReadData fails the size +// check, activates the 32GB 4-byte-offset wrap-around retry, and surfaces EOF; +// CheckVolumeDataIntegrity then treats the volume as corrupt and the loader +// marks every such volume read-only on startup. +func TestCheckVolumeDataIntegrityWithDeletionTombstone(t *testing.T) { + dir := t.TempDir() + + v, err := NewVolume(dir, dir, "", 1, NeedleMapInMemory, &super_block.ReplicaPlacement{}, &needle.TTL{}, 0, needle.GetCurrentVersion(), 0, 0) + if err != nil { + t.Fatalf("volume creation: %v", err) + } + defer v.Close() + + for i := 1; i <= 3; i++ { + n := newRandomNeedle(uint64(i)) + if _, _, _, err := v.writeNeedle2(n, true, false); err != nil { + t.Fatalf("write needle %d: %v", i, err) + } + } + // Delete a live needle so a tombstone (Size=0 on disk, -1 in idx) is the + // last entry the integrity check examines. + if _, err := v.doDeleteRequest(newEmptyNeedle(2)); err != nil { + t.Fatalf("delete needle: %v", err) + } + if err := v.DataBackend.Sync(); err != nil { + t.Fatalf("sync .dat: %v", err) + } + if err := v.nm.Sync(); err != nil { + t.Fatalf("sync .idx: %v", err) + } + + idxFile, err := os.OpenFile(v.FileName(".idx"), os.O_RDONLY, 0644) + if err != nil { + t.Fatalf("open .idx: %v", err) + } + defer idxFile.Close() + + lastAppendAtNs, err := CheckVolumeDataIntegrity(v, idxFile) + if err != nil { + t.Fatalf("CheckVolumeDataIntegrity should succeed with a trailing deletion tombstone: %v", err) + } + // V3 tombstones still record AppendAtNs; a zero return means ReadData + // bailed out before populating the needle and would mask future regressions + // that quietly skip the tombstone body. + if lastAppendAtNs == 0 { + t.Errorf("expected non-zero lastAppendAtNs for V3 deletion tombstone, got 0") + } +} + // TestMaxNeedleEnd ensures the needle map's MaxNeedleEnd accumulator lets // volume.load() detect an .idx that references bytes past the end of the .dat // — the deeper-than-tail corruption shape from issue #8928 that the existing