mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2026-06-13 23:36:45 +03:00
* fix(volume): pass on-disk tombstone size to ReadData in verifyDeletedNeedleIntegrity verifyDeletedNeedleIntegrity was forwarding TombstoneFileSize (-1) into Needle.ReadData. A deletion tombstone is appended to .dat with DataSize=0 so the on-disk needle header carries Size=0; TombstoneFileSize is only the .idx sentinel for "this entry is deleted" and is never written into a needle header. ReadBytes' size check therefore mismatched on every tombstone (-1 != 0), returned ErrorSizeMismatch, and triggered the 4-byte-offset wrap-around retry in ReadData (offset + 32 GB). On any volume large enough that offset+32 GB exceeds dat fileSize the retry read EOF, CheckVolumeDataIntegrity reported corruption, and the loader set noWriteOrDelete = true. Every volume whose last 10 .idx entries included a deletion went read-only on startup — i.e. any healthy volume where the most recent operations included a delete. Pass Size(0) so the size check matches the on-disk tombstone header. Add a regression test that writes three needles, deletes one, and asserts CheckVolumeDataIntegrity succeeds with a tombstone at the .idx tail. Without this fix the test reproduces the exact log shape from the bug report: read 0 dataSize 32 offset <orig+32GB> fileSize <much smaller>: EOF verifyDeletedNeedleIntegrity ...idx failed: read data [N,N+32) : EOF The Rust port guards its integrity-check size comparison with !size.is_deleted() (seaweed-volume/src/storage/volume.rs) and never hits this path, so no Rust mirror change is needed. * test(seaweed-volume): mirror Go regression for deletion-tombstone integrity The Rust integrity check already guards its size-mismatch comparison with !size.is_deleted() (volume.rs:1859) and reads tombstone AppendAtNs with body_size=0, so the Go regression fixed in the previous commit does not apply. Lock that guarantee in with a parallel reload test: write three needles, delete one, sync, reopen via Volume::new, assert the volume is not flipped read-only. Catches any future change that removes the deleted-entry guard or re-introduces a size-strict path in check_volume_data_integrity for tombstones. * fix(volume): propagate io.EOF and ErrorSizeMismatch from verifyDeletedNeedleIntegrity CheckVolumeDataIntegrity relies on identity comparison against io.EOF and ErrorSizeMismatch to walk back through the last ten .idx entries and tolerate a partial truncation at the tail (the "fix and continue" loop). The live-needle branch in doCheckAndFixVolumeData already returns those sentinels unwrapped; the deletion branch wrapped them in fmt.Errorf, so a genuine .dat truncation past a tombstone offset broke the recovery and flipped the volume read-only. Mirror the live-needle handling: both verifyDeletedNeedleIntegrity and doCheckAndFixVolumeData now short-circuit on io.EOF / ErrorSizeMismatch and pass them through unwrapped. Other errors keep their existing context wrapping. Also tighten the regression test to capture lastAppendAtNs and assert it's non-zero, so a future regression that skips the tombstone body (and therefore never populates AppendAtNs) is caught even when the err check still passes.
This commit is contained in:
@@ -3592,6 +3592,57 @@ mod tests {
|
||||
assert!(matches!(err, VolumeError::Deleted));
|
||||
}
|
||||
|
||||
// Guard the Rust integrity-check tombstone path against the Go regression
|
||||
// where verifyDeletedNeedleIntegrity forwarded TombstoneFileSize into the
|
||||
// needle-size check, mismatched against the on-disk Size=0 header, and
|
||||
// sent every volume with a trailing deletion read-only on load. The Rust
|
||||
// check guards its size comparison with !size.is_deleted(); this test
|
||||
// keeps that guarantee from silently regressing.
|
||||
#[test]
|
||||
fn test_check_volume_data_integrity_with_deletion_tombstone() {
|
||||
let tmp = TempDir::new().unwrap();
|
||||
let dir = tmp.path().to_str().unwrap();
|
||||
|
||||
{
|
||||
let mut v = make_test_volume(dir);
|
||||
for i in 1..=3 {
|
||||
let data = format!("data {}", i);
|
||||
let mut n = Needle {
|
||||
id: NeedleId(i),
|
||||
cookie: Cookie(i as u32),
|
||||
data: data.as_bytes().to_vec(),
|
||||
data_size: data.len() as u32,
|
||||
..Needle::default()
|
||||
};
|
||||
v.write_needle(&mut n, true).unwrap();
|
||||
}
|
||||
v.delete_needle(&mut Needle {
|
||||
id: NeedleId(2),
|
||||
cookie: Cookie(2),
|
||||
..Needle::default()
|
||||
})
|
||||
.unwrap();
|
||||
v.sync_to_disk().unwrap();
|
||||
}
|
||||
|
||||
let v = Volume::new(
|
||||
dir,
|
||||
dir,
|
||||
"",
|
||||
VolumeId(1),
|
||||
NeedleMapKind::InMemory,
|
||||
None,
|
||||
None,
|
||||
0,
|
||||
Version::current(),
|
||||
)
|
||||
.unwrap();
|
||||
assert!(
|
||||
!v.is_no_write_or_delete(),
|
||||
"volume should not be read-only after reload with trailing deletion tombstone"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_volume_multiple_needles() {
|
||||
let tmp = TempDir::new().unwrap();
|
||||
|
||||
@@ -154,8 +154,14 @@ func doCheckAndFixVolumeData(v *Volume, indexFile *os.File, indexOffset int64) (
|
||||
return 0, nil
|
||||
}
|
||||
if size < 0 {
|
||||
// read the deletion entry
|
||||
// read the deletion entry. Pass io.EOF and ErrorSizeMismatch through
|
||||
// unwrapped so CheckVolumeDataIntegrity can recognize them and run its
|
||||
// trailing-truncation / wrap-around recovery loop, matching the live
|
||||
// branch below.
|
||||
if lastAppendAtNs, err = verifyDeletedNeedleIntegrity(v.DataBackend, v.Version(), offset.ToActualOffset(), key); err != nil {
|
||||
if err == io.EOF || err == ErrorSizeMismatch {
|
||||
return lastAppendAtNs, err
|
||||
}
|
||||
return lastAppendAtNs, fmt.Errorf("verifyDeletedNeedleIntegrity %s failed: %v", indexFile.Name(), err)
|
||||
}
|
||||
} else {
|
||||
@@ -243,8 +249,17 @@ func verifyNeedleIntegrity(datFile backend.BackendStorageFile, v needle.Version,
|
||||
|
||||
func verifyDeletedNeedleIntegrity(datFile backend.BackendStorageFile, v needle.Version, offset int64, key types.NeedleId) (lastAppendAtNs uint64, err error) {
|
||||
n := new(needle.Needle)
|
||||
size := types.TombstoneFileSize
|
||||
// Tombstones are appended with DataSize=0, so the on-disk header carries
|
||||
// Size=0. TombstoneFileSize (-1) lives only in the .idx; passing it to
|
||||
// ReadData fails the size check and triggers the 32GB wrap-around retry,
|
||||
// which reads past EOF and falsely marks the volume read-only.
|
||||
size := types.Size(0)
|
||||
if err = n.ReadData(datFile, offset, size, v); err != nil {
|
||||
// Preserve io.EOF and ErrorSizeMismatch as-is so CheckVolumeDataIntegrity
|
||||
// can detect trailing truncation and trigger its wrap-around retry.
|
||||
if err == io.EOF || err == ErrorSizeMismatch {
|
||||
return n.AppendAtNs, err
|
||||
}
|
||||
return n.AppendAtNs, fmt.Errorf("read data [%d,%d) : %v", offset, offset+needle.GetActualSize(size, v), err)
|
||||
}
|
||||
if n.Id != key {
|
||||
|
||||
@@ -81,6 +81,58 @@ func TestScrubVolumeData(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestCheckVolumeDataIntegrityWithDeletionTombstone guards the size argument
|
||||
// passed to ReadData when verifying a trailing deletion tombstone. The .idx
|
||||
// entry carries TombstoneFileSize (-1) but the appended tombstone needle in
|
||||
// .dat carries Size=0. Forwarding the .idx size into ReadData fails the size
|
||||
// check, activates the 32GB 4-byte-offset wrap-around retry, and surfaces EOF;
|
||||
// CheckVolumeDataIntegrity then treats the volume as corrupt and the loader
|
||||
// marks every such volume read-only on startup.
|
||||
func TestCheckVolumeDataIntegrityWithDeletionTombstone(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
|
||||
v, err := NewVolume(dir, dir, "", 1, NeedleMapInMemory, &super_block.ReplicaPlacement{}, &needle.TTL{}, 0, needle.GetCurrentVersion(), 0, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("volume creation: %v", err)
|
||||
}
|
||||
defer v.Close()
|
||||
|
||||
for i := 1; i <= 3; i++ {
|
||||
n := newRandomNeedle(uint64(i))
|
||||
if _, _, _, err := v.writeNeedle2(n, true, false); err != nil {
|
||||
t.Fatalf("write needle %d: %v", i, err)
|
||||
}
|
||||
}
|
||||
// Delete a live needle so a tombstone (Size=0 on disk, -1 in idx) is the
|
||||
// last entry the integrity check examines.
|
||||
if _, err := v.doDeleteRequest(newEmptyNeedle(2)); err != nil {
|
||||
t.Fatalf("delete needle: %v", err)
|
||||
}
|
||||
if err := v.DataBackend.Sync(); err != nil {
|
||||
t.Fatalf("sync .dat: %v", err)
|
||||
}
|
||||
if err := v.nm.Sync(); err != nil {
|
||||
t.Fatalf("sync .idx: %v", err)
|
||||
}
|
||||
|
||||
idxFile, err := os.OpenFile(v.FileName(".idx"), os.O_RDONLY, 0644)
|
||||
if err != nil {
|
||||
t.Fatalf("open .idx: %v", err)
|
||||
}
|
||||
defer idxFile.Close()
|
||||
|
||||
lastAppendAtNs, err := CheckVolumeDataIntegrity(v, idxFile)
|
||||
if err != nil {
|
||||
t.Fatalf("CheckVolumeDataIntegrity should succeed with a trailing deletion tombstone: %v", err)
|
||||
}
|
||||
// V3 tombstones still record AppendAtNs; a zero return means ReadData
|
||||
// bailed out before populating the needle and would mask future regressions
|
||||
// that quietly skip the tombstone body.
|
||||
if lastAppendAtNs == 0 {
|
||||
t.Errorf("expected non-zero lastAppendAtNs for V3 deletion tombstone, got 0")
|
||||
}
|
||||
}
|
||||
|
||||
// TestMaxNeedleEnd ensures the needle map's MaxNeedleEnd accumulator lets
|
||||
// volume.load() detect an .idx that references bytes past the end of the .dat
|
||||
// — the deeper-than-tail corruption shape from issue #8928 that the existing
|
||||
|
||||
Reference in New Issue
Block a user