mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2026-06-13 23:36:45 +03:00
225 lines
8.5 KiB
Go
225 lines
8.5 KiB
Go
package erasure_coding
|
|
|
|
import (
|
|
"bytes"
|
|
"crypto/rand"
|
|
"fmt"
|
|
"os"
|
|
"testing"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/types"
|
|
)
|
|
|
|
// TestEcConsistency_WritesBetweenEncodeAndEcx reproduces a race condition that
|
|
// existed in VolumeEcShardsGenerate before the fix in this PR.
|
|
//
|
|
// Previously, the order was:
|
|
// 1. WriteEcFilesWithContext(baseFileName, ecCtx) — EC shards from .dat
|
|
// 2. WriteSortedFileFromIdx(v.IndexFileName(), ".ecx") — .ecx from .idx
|
|
//
|
|
// If a write appended data to .dat/.idx between steps 1 and 2, the .ecx would
|
|
// have entries pointing to data that doesn't exist in the EC shards.
|
|
//
|
|
// The fix reverses the order (write .ecx first, then generate EC shards), so
|
|
// that .ecx is always a subset of what the EC shards contain.
|
|
//
|
|
// This test simulates the old buggy sequence to validate that the problem is real.
|
|
func TestEcConsistency_WritesBetweenEncodeAndEcx(t *testing.T) {
|
|
dir := t.TempDir()
|
|
baseFileName := dir + "/consistency"
|
|
|
|
ctx := NewDefaultECContext("", 0)
|
|
|
|
// Phase 1: Create initial .dat and .idx with known data
|
|
datSize := int64(largeBlockSize*DataShardsCount + smallBlockSize*DataShardsCount*3) // 1 large row + 3 small rows
|
|
originalData := make([]byte, datSize)
|
|
rand.Read(originalData)
|
|
|
|
err := os.WriteFile(baseFileName+".dat", originalData, 0644)
|
|
require.NoError(t, err)
|
|
|
|
// Create a minimal .idx with one entry pointing to the data
|
|
createTestIdx(t, baseFileName+".idx", []idxEntry{
|
|
{id: 1, offset: 0, size: types.Size(datSize)},
|
|
})
|
|
|
|
// Phase 2: EC encode — generates .ec00-.ec13 from current .dat
|
|
err = generateEcFiles(baseFileName, int(smallBlockSize), largeBlockSize, smallBlockSize, ctx)
|
|
require.NoError(t, err, "EC encoding")
|
|
|
|
// Phase 3: SIMULATE a write between EC encoding and .ecx generation
|
|
// (reproducing the old buggy order where .ecx was generated after EC shards)
|
|
extraData := make([]byte, 5000)
|
|
rand.Read(extraData)
|
|
|
|
f, err := os.OpenFile(baseFileName+".dat", os.O_WRONLY|os.O_APPEND, 0644)
|
|
require.NoError(t, err)
|
|
_, err = f.Write(extraData)
|
|
require.NoError(t, err)
|
|
f.Close()
|
|
|
|
// Update .idx with the new entry
|
|
createTestIdx(t, baseFileName+".idx", []idxEntry{
|
|
{id: 1, offset: 0, size: types.Size(datSize)},
|
|
{id: 2, offset: datSize, size: types.Size(len(extraData))},
|
|
})
|
|
|
|
// Phase 4: Generate .ecx from the UPDATED .idx (as the old buggy code did)
|
|
err = WriteSortedFileFromIdx(baseFileName, ".ecx")
|
|
require.NoError(t, err, "WriteSortedFileFromIdx")
|
|
|
|
// Phase 5: Now try to read needle 2 via EC shards — it should fail
|
|
// because the EC shards were generated from the OLD .dat (without the extra data)
|
|
ecFiles, err := openEcFiles(baseFileName, true, ctx)
|
|
require.NoError(t, err)
|
|
defer closeEcFiles(ecFiles)
|
|
|
|
ecStat, err := ecFiles[0].Stat()
|
|
require.NoError(t, err)
|
|
shardSize := ecStat.Size()
|
|
|
|
// Read needle 2 (the one added after EC encoding) using LocateData.
|
|
// Use shardSize-1 to simulate the ecdFileSize fallback path used by
|
|
// LocateEcShardNeedleInterval when datFileSize is unavailable.
|
|
actualSize := needle.GetActualSize(types.Size(len(extraData)), needle.Version3)
|
|
intervals := LocateData(largeBlockSize, smallBlockSize, shardSize-1, datSize, types.Size(actualSize))
|
|
|
|
t.Logf("Trying to read needle 2 at offset %d size %d from EC shards (shardSize=%d)", datSize, actualSize, shardSize)
|
|
t.Logf("Intervals: %+v", intervals)
|
|
|
|
// Try to read — this will either fail with an error (offset out of bounds)
|
|
// or return garbage data (the padded zeros from EC encoding)
|
|
ecData, readErr := assembleFromIntervalsAllowError(ecFiles, intervals, largeBlockSize, smallBlockSize)
|
|
|
|
if readErr != nil {
|
|
t.Logf("CONFIRMED: Read error for needle written after EC encoding: %v", readErr)
|
|
} else {
|
|
// If we got data, it should be zeros (padding) or garbage, not the actual extraData
|
|
isAllZeros := true
|
|
for _, b := range ecData {
|
|
if b != 0 {
|
|
isAllZeros = false
|
|
break
|
|
}
|
|
}
|
|
if isAllZeros {
|
|
t.Logf("CONFIRMED: Read returned zero-padded data (EC shards don't have the needle)")
|
|
} else if !bytes.Equal(ecData[:len(extraData)], extraData) {
|
|
t.Logf("CONFIRMED: Read returned wrong data (EC shards don't have the needle)")
|
|
} else {
|
|
t.Error("UNEXPECTED: Read returned correct data — needle should NOT be in EC shards")
|
|
}
|
|
}
|
|
|
|
// Phase 6: Verify a small read from the original data still works.
|
|
// Use the correct shardDatSize (from the original datSize, not the modified one)
|
|
// to avoid the fallback heuristic issues.
|
|
shardDatSize := datSize / int64(DataShardsCount)
|
|
readSize := types.Size(smallBlockSize)
|
|
intervals1 := LocateData(largeBlockSize, smallBlockSize, shardDatSize, 0, readSize)
|
|
ecData1, err := assembleFromIntervalsAllowError(ecFiles, intervals1, largeBlockSize, smallBlockSize)
|
|
require.NoError(t, err, "reading original data from EC shards")
|
|
|
|
assert.True(t, bytes.Equal(originalData[:readSize], ecData1),
|
|
"Original data at offset 0 should match EC shard data")
|
|
t.Logf("Original data reads correctly from EC shards")
|
|
}
|
|
|
|
// TestEcConsistency_ExactLargeRowEncoding verifies that generateEcFiles correctly
|
|
// encodes a .dat file whose size is exactly one large row (DataShardsCount *
|
|
// largeBlockSize), producing shards of exactly largeBlockSize each, and that
|
|
// every chunk of the encoded data can be read back correctly via LocateData.
|
|
func TestEcConsistency_ExactLargeRowEncoding(t *testing.T) {
|
|
dir := t.TempDir()
|
|
baseFileName := dir + "/exact"
|
|
ctx := NewDefaultECContext("", 0)
|
|
|
|
datSize := int64(largeBlockSize * DataShardsCount) // exactly 1 large row
|
|
data := make([]byte, datSize)
|
|
rand.Read(data)
|
|
err := os.WriteFile(baseFileName+".dat", data, 0644)
|
|
require.NoError(t, err)
|
|
|
|
// EC encode
|
|
err = generateEcFiles(baseFileName, int(smallBlockSize), largeBlockSize, smallBlockSize, ctx)
|
|
require.NoError(t, err)
|
|
|
|
// Check shard sizes — each shard should be exactly largeBlockSize
|
|
ecFiles, err := openEcFiles(baseFileName, true, ctx)
|
|
require.NoError(t, err)
|
|
defer closeEcFiles(ecFiles)
|
|
|
|
for i := 0; i < ctx.DataShards; i++ {
|
|
stat, err := ecFiles[i].Stat()
|
|
require.NoError(t, err, "stat shard %d", i)
|
|
assert.Equal(t, int64(largeBlockSize), stat.Size(),
|
|
"data shard %d should be exactly largeBlockSize", i)
|
|
}
|
|
|
|
// Verify data reads correctly at every smallBlockSize offset via LocateData
|
|
shardDatSize := datSize / int64(ctx.DataShards)
|
|
readSize := types.Size(smallBlockSize)
|
|
for offset := int64(0); offset+int64(readSize) <= datSize; offset += int64(readSize) {
|
|
intervals := LocateData(largeBlockSize, smallBlockSize, shardDatSize, offset, readSize)
|
|
ecData, err := assembleFromIntervalsAllowError(ecFiles, intervals, largeBlockSize, smallBlockSize)
|
|
require.NoError(t, err, "reading at offset %d", offset)
|
|
expected := data[offset : offset+int64(readSize)]
|
|
assert.True(t, bytes.Equal(expected, ecData),
|
|
"data mismatch at offset %d", offset)
|
|
}
|
|
}
|
|
|
|
type idxEntry struct {
|
|
id types.NeedleId
|
|
offset int64
|
|
size types.Size
|
|
}
|
|
|
|
func createTestIdx(t *testing.T, filename string, entries []idxEntry) {
|
|
t.Helper()
|
|
f, err := os.Create(filename)
|
|
require.NoError(t, err)
|
|
defer f.Close()
|
|
|
|
buf := make([]byte, types.NeedleMapEntrySize)
|
|
for _, e := range entries {
|
|
types.NeedleIdToBytes(buf[:types.NeedleIdSize], e.id)
|
|
types.OffsetToBytes(buf[types.NeedleIdSize:types.NeedleIdSize+types.OffsetSize], types.ToOffset(e.offset))
|
|
types.SizeToBytes(buf[types.NeedleIdSize+types.OffsetSize:], e.size)
|
|
_, err := f.Write(buf)
|
|
require.NoError(t, err)
|
|
}
|
|
}
|
|
|
|
func assembleFromIntervalsAllowError(ecFiles []*os.File, intervals []Interval, large, small int64) ([]byte, error) {
|
|
var data []byte
|
|
for _, interval := range intervals {
|
|
shardId, shardOffset := interval.ToShardIdAndOffset(large, small)
|
|
if int(shardId) >= len(ecFiles) {
|
|
return nil, fmt.Errorf("shard %d out of range (have %d files)", shardId, len(ecFiles))
|
|
}
|
|
stat, err := ecFiles[shardId].Stat()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("stat shard %d: %v", shardId, err)
|
|
}
|
|
if shardOffset+int64(interval.Size) > stat.Size() {
|
|
return nil, fmt.Errorf("read past end of shard %d: offset %d + size %d > fileSize %d",
|
|
shardId, shardOffset, interval.Size, stat.Size())
|
|
}
|
|
chunk := make([]byte, interval.Size)
|
|
n, err := ecFiles[shardId].ReadAt(chunk, shardOffset)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("read shard %d offset %d: %v", shardId, shardOffset, err)
|
|
}
|
|
if n != int(interval.Size) {
|
|
return nil, fmt.Errorf("short read shard %d: got %d want %d", shardId, n, interval.Size)
|
|
}
|
|
data = append(data, chunk...)
|
|
}
|
|
return data, nil
|
|
}
|