Files
seaweedfs/weed/storage/erasure_coding/ec_encoder.go
T
Chris Lu 18cdb3819b fix(ec): crash-safe ecx-journal fold and shard rebuild (fsync before publish, no short-read-as-success) (#9938)
* fix(ec): make ecx-journal fold and shard rebuild crash-safe

Two EC rebuild paths could silently lose or corrupt data:

RebuildEcxFile folded the .ecj deletion journal into .ecx (in-place
WriteAt tombstones) and then unlinked the journal without flushing the
.ecx writes first. A crash could persist the unlink ahead of the
tombstones, resurrecting deleted needles on the next load. It also read
journal records with a bare n!=size break, so a torn tail silently
dropped the remaining tombstones before the unlink. Now: read records
with io.ReadFull (io.EOF ends cleanly, a torn tail aborts and leaves
.ecj in place for retry), fsync .ecx before removing the journal.

rebuildEcFiles treated a zero/short ReadAt as a clean end-of-input and
discarded the read error, so a truncated or unreadable input shard
produced truncated regenerated shards that were then published as
restored redundancy; the regenerated shards were also never fsynced on
the no-sidecar path. Now: derive the expected shard size from the
present inputs up front (rejecting a divergent/zero-size input), drive
the loop by that size, fail on any short read or short write, and fsync
every regenerated shard before it is mounted/renamed.

Rust volume server mirrors the rebuild fix: rebuild_ec_files now checks
the read_at byte count (it previously discarded it, the same truncation
bug). The Rust ecx fold already synced .ecx before removing the journal.

Custom EC ratios are unaffected: the shard size derives from the input
shards and the loop uses the .vif-resolved data/parity counts, never a
hardcoded 10+4.

* storage: close ecx journal files via defer in RebuildEcxFile

Per review: a single deferred Close per file replaces the per-error-path
manual closes, so new early returns cannot leak descriptors. The journal
is still closed explicitly before its unlink since Windows cannot delete
an open file; the deferred second Close is a harmless no-op.
2026-06-12 22:28:56 -07:00

618 lines
22 KiB
Go

package erasure_coding
import (
"fmt"
"io"
"os"
"path/filepath"
"github.com/klauspost/reedsolomon"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
"github.com/seaweedfs/seaweedfs/weed/storage/idx"
"github.com/seaweedfs/seaweedfs/weed/storage/needle_map"
"github.com/seaweedfs/seaweedfs/weed/storage/types"
"github.com/seaweedfs/seaweedfs/weed/storage/volume_info"
"github.com/seaweedfs/seaweedfs/weed/util"
)
const (
DataShardsCount = 10
ParityShardsCount = 4
TotalShardsCount = DataShardsCount + ParityShardsCount
MaxShardCount = 32 // Maximum number of shards since ShardBits is uint32 (bits 0-31)
MinTotalDisks = TotalShardsCount/ParityShardsCount + 1
ErasureCodingLargeBlockSize = 1024 * 1024 * 1024 // 1GB
ErasureCodingSmallBlockSize = 1024 * 1024 // 1MB
)
// WriteSortedFileFromIdx generates .ecx file from existing .idx file
// all keys are sorted in ascending order
func WriteSortedFileFromIdx(baseFileName string, ext string) (e error) {
nm, err := readNeedleMap(baseFileName)
if nm != nil {
defer nm.Close()
}
if err != nil {
return fmt.Errorf("readNeedleMap: %w", err)
}
ecxFile, err := os.OpenFile(baseFileName+ext, os.O_TRUNC|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return fmt.Errorf("failed to open ecx file: %w", err)
}
defer ecxFile.Close()
err = nm.AscendingVisit(func(value needle_map.NeedleValue) error {
bytes := value.ToBytes()
_, writeErr := ecxFile.Write(bytes)
return writeErr
})
if err != nil {
return fmt.Errorf("failed to visit idx file: %w", err)
}
return nil
}
// WriteEcFiles generates .ec00 ~ .ec13 files from baseFileName.dat. Pass
// BackgroundECContext for the default ratio, or an explicit ctx for a configured
// (e.g. custom-ratio) layout. It returns the bitrot protection (per-shard block
// CRC32C) computed during the single encode pass; the caller persists it as a
// <base>.ecsum sidecar.
func WriteEcFiles(baseFileName string, ctx *ECContext) (*volume_server_pb.EcBitrotProtection, error) {
if ctx == nil || ctx.Total() == 0 {
ctx = NewDefaultECContext("", 0)
}
return generateEcFiles(baseFileName, 256*1024, ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize, ctx)
}
// RebuildEcFiles rebuilds missing EC shard files. Pass BackgroundECContext to
// resolve the layout from the volume's .vif (falling back to the default ratio),
// or an explicit ctx when the caller already knows the shard layout.
// additionalDirs are extra directories to search for existing shard files,
// which handles multi-disk servers where shards may be spread across disks.
// When a bitrot checksum sidecar is present for the (generation-0) volume,
// present input shards are verified against it and corrupt ones are excluded
// from Reed-Solomon and regenerated; unsafeIgnoreSidecar bypasses that guard.
func RebuildEcFiles(baseFileName string, ctx *ECContext, unsafeIgnoreSidecar bool, additionalDirs ...string) ([]uint32, error) {
if ctx == nil || ctx.Total() == 0 {
// Resolve the layout from the .vif to preserve the original configuration.
volumeInfo, _, foundVif, vifErr := volume_info.MaybeLoadVolumeInfo(baseFileName + ".vif")
if vifErr != nil {
// The .vif exists but cannot be read or parsed. Fail closed rather
// than silently falling back to the default ratio, which would
// rebuild a custom-ratio volume with the wrong layout. Pass an
// explicit ctx to override.
return nil, fmt.Errorf("RebuildEcFiles %s: cannot load .vif: %w", baseFileName, vifErr)
}
if foundVif && volumeInfo.EcShardConfig != nil {
ds := int(volumeInfo.EcShardConfig.DataShards)
ps := int(volumeInfo.EcShardConfig.ParityShards)
// Validate EC config before using it
if ds > 0 && ps > 0 && ds+ps <= MaxShardCount {
ctx = &ECContext{
DataShards: ds,
ParityShards: ps,
}
glog.V(0).Infof("Rebuilding EC files for %s with config from .vif: %s", baseFileName, ctx.String())
} else {
glog.Warningf("Invalid EC config in .vif for %s (data=%d, parity=%d), using default", baseFileName, ds, ps)
ctx = NewDefaultECContext("", 0)
}
} else {
glog.V(0).Infof("Rebuilding EC files for %s with default config", baseFileName)
ctx = NewDefaultECContext("", 0)
}
}
return generateMissingEcFiles(baseFileName, 256*1024, ErasureCodingLargeBlockSize, ErasureCodingSmallBlockSize, ctx, unsafeIgnoreSidecar, additionalDirs)
}
func ToExt(ecIndex int) string {
return fmt.Sprintf(".ec%02d", ecIndex)
}
func generateEcFiles(baseFileName string, bufferSize int, largeBlockSize int64, smallBlockSize int64, ctx *ECContext) (*volume_server_pb.EcBitrotProtection, error) {
file, err := os.OpenFile(baseFileName+".dat", os.O_RDONLY, 0)
if err != nil {
return nil, fmt.Errorf("failed to open dat file: %w", err)
}
defer file.Close()
fi, err := file.Stat()
if err != nil {
return nil, fmt.Errorf("failed to stat dat file: %w", err)
}
// One rolling-CRC builder per shard; fed as each shard's bytes are written.
builders := make([]*shardChecksumBuilder, ctx.Total())
for i := range builders {
builders[i] = newShardChecksumBuilder(BitrotBlockSize)
}
glog.V(0).Infof("encodeDatFile %s.dat size:%d with EC context %s", baseFileName, fi.Size(), ctx.String())
err = encodeDatFile(fi.Size(), baseFileName, bufferSize, largeBlockSize, file, smallBlockSize, ctx, builders)
if err != nil {
return nil, fmt.Errorf("encodeDatFile: %w", err)
}
return buildProtectionFromBuilders(ctx, builders, BitrotBlockSize), nil
}
// findShardFile looks for a shard file at baseFileName+ext, then in additionalDirs.
func findShardFile(baseFileName string, ext string, additionalDirs []string) string {
primary := baseFileName + ext
if util.FileExists(primary) {
return primary
}
baseName := filepath.Base(baseFileName)
for _, dir := range additionalDirs {
candidate := filepath.Join(dir, baseName+ext)
if util.FileExists(candidate) {
return candidate
}
}
return ""
}
func generateMissingEcFiles(baseFileName string, bufferSize int, largeBlockSize int64, smallBlockSize int64, ctx *ECContext, unsafeIgnoreSidecar bool, additionalDirs []string) (generatedShardIds []uint32, err error) {
// Pass 1: discover which shards exist and which are missing,
// opening input files but NOT creating output files yet.
shardHasData := make([]bool, ctx.Total())
shardPaths := make([]string, ctx.Total()) // non-empty for present shards (also the in-place output for a reclassified-corrupt shard)
inputFiles := make([]*os.File, ctx.Total())
presentCount := 0
for shardId := 0; shardId < ctx.Total(); shardId++ {
ext := ctx.ToExt(shardId)
shardPath := findShardFile(baseFileName, ext, additionalDirs)
if shardPath != "" {
shardHasData[shardId] = true
shardPaths[shardId] = shardPath
inputFiles[shardId], err = os.OpenFile(shardPath, os.O_RDONLY, 0)
if err != nil {
return nil, err
}
defer inputFiles[shardId].Close()
presentCount++
} else {
generatedShardIds = append(generatedShardIds, uint32(shardId))
}
}
// Bitrot verify-and-exclude: when a generation-0 checksum sidecar is present
// and valid, verify each present input shard against it and reclassify
// corrupt ones as missing so Reed-Solomon regenerates them instead of
// silently consuming corrupt bytes. corruptOwned marks shards whose
// (corrupt) original file must be replaced in place at its discovered path.
corruptOwned := make([]bool, ctx.Total())
prot, status := loadRebuildSidecar(baseFileName, ctx, additionalDirs)
switch status {
case BitrotInvalid:
if !unsafeIgnoreSidecar {
return nil, fmt.Errorf("bitrot sidecar for %s is malformed/unverifiable; refusing to rebuild (pass unsafeIgnoreSidecar to override)", baseFileName)
}
glog.Warningf("bitrot sidecar for %s is malformed/unverifiable; proceeding because unsafeIgnoreSidecar is set", baseFileName)
case BitrotOn:
corrupt := make([]int, 0, ctx.Total())
for shardId := 0; shardId < ctx.Total(); shardId++ {
if !shardHasData[shardId] {
continue
}
entry := shardChecksums(prot, uint32(shardId))
if entry == nil {
continue
}
mismatched, verr := verifyShardFileBlocks(shardPaths[shardId], entry, int64(prot.BlockSize))
if verr != nil {
// A read error means we cannot trust this shard as a Reed-Solomon
// input. Exclude it (treat as corrupt) rather than silently
// feeding possibly-corrupt bytes into reconstruction.
glog.Warningf("bitrot: failed to verify present shard %d for %s: %v; excluding it", shardId, baseFileName, verr)
corrupt = append(corrupt, shardId)
continue
}
if len(mismatched) > 0 {
corrupt = append(corrupt, shardId)
}
}
if len(corrupt) > 0 {
// Wholesale-mismatch guard (RS-arbiter conservative form): localized
// bitrot touches a few shards; a stale/wrong sidecar mismatches more
// than parity_shards. In that case refuse rather than excluding good
// shards en masse.
if len(corrupt) > ctx.ParityShards && !unsafeIgnoreSidecar {
return nil, fmt.Errorf("bitrot sidecar suspect for %s: %d/%d present shards mismatch (> parity %d); refusing to rebuild (pass unsafeIgnoreSidecar to override)",
baseFileName, len(corrupt), presentCount, ctx.ParityShards)
}
if presentCount-len(corrupt) < ctx.DataShards && !unsafeIgnoreSidecar {
return nil, fmt.Errorf("bitrot: only %d verified-good shards for %s, need %d data shards; sidecar may be stale (pass unsafeIgnoreSidecar to override)",
presentCount-len(corrupt), baseFileName, ctx.DataShards)
}
if !unsafeIgnoreSidecar {
for _, shardId := range corrupt {
glog.Warningf("bitrot: present shard %d for %s fails checksum; excluding from rebuild inputs and regenerating", shardId, baseFileName)
shardHasData[shardId] = false
corruptOwned[shardId] = true
generatedShardIds = append(generatedShardIds, uint32(shardId))
presentCount--
}
}
}
}
// Pre-check: bail out before creating any output files.
if presentCount < ctx.DataShards {
return nil, fmt.Errorf("not enough shards to rebuild %s: found %d shards, need at least %d (data shards), missing shards: %v",
baseFileName, presentCount, ctx.DataShards, generatedShardIds)
}
glog.V(0).Infof("rebuilding %s: %d shards present, %d missing %v, config %s",
baseFileName, presentCount, len(generatedShardIds), generatedShardIds, ctx.String())
// Pass 2: create output files for missing shards. A genuinely-absent shard
// is written at baseFileName+ext; a reclassified-corrupt shard is written to
// a temp file beside its discovered location and atomically renamed over the
// corrupt original after the rebuild (and checksum) succeed, so we never
// leave a duplicate shard id or a half-written file.
outputFiles := make([]*os.File, ctx.Total())
writePaths := make([]string, ctx.Total())
finalPaths := make([]string, ctx.Total())
for shardId := 0; shardId < ctx.Total(); shardId++ {
if shardHasData[shardId] {
continue
}
finalPath := baseFileName + ctx.ToExt(shardId)
writePath := finalPath
if corruptOwned[shardId] && shardPaths[shardId] != "" {
finalPath = shardPaths[shardId]
writePath = shardPaths[shardId] + ".rebuilding"
}
outputFiles[shardId], err = os.OpenFile(writePath, os.O_TRUNC|os.O_WRONLY|os.O_CREATE, 0644)
if err != nil {
return nil, err
}
defer outputFiles[shardId].Close()
writePaths[shardId] = writePath
finalPaths[shardId] = finalPath
}
if err = rebuildEcFiles(shardHasData, inputFiles, outputFiles, ctx); err != nil {
return nil, fmt.Errorf("rebuildEcFiles: %w", err)
}
// Verify regenerated shards against the sidecar. Reed-Solomon is
// deterministic, so a regenerated shard that does NOT match the sidecar
// means the sidecar is wrong/stale (not the shard) — fail closed rather than
// publishing bytes we cannot trust. On ANY verification failure (sync, read
// error, or mismatch) remove every generated output so the rebuild publishes
// nothing: a genuinely-missing shard returns to missing; a reclassified-
// corrupt shard keeps its untouched original.
if status == BitrotOn && !unsafeIgnoreSidecar {
for shardId := 0; shardId < ctx.Total(); shardId++ {
if writePaths[shardId] == "" {
continue
}
entry := shardChecksums(prot, uint32(shardId))
if entry == nil {
continue
}
if err = outputFiles[shardId].Sync(); err != nil {
cleanupRebuildOutputs(outputFiles, writePaths)
return nil, fmt.Errorf("sync regenerated shard %d: %w", shardId, err)
}
mismatched, verr := verifyShardFileBlocks(writePaths[shardId], entry, int64(prot.BlockSize))
if verr != nil {
cleanupRebuildOutputs(outputFiles, writePaths)
return nil, fmt.Errorf("bitrot: verify regenerated shard %d for %s: %w", shardId, baseFileName, verr)
}
if len(mismatched) > 0 {
cleanupRebuildOutputs(outputFiles, writePaths)
return nil, fmt.Errorf("bitrot: regenerated shard %d for %s does not match sidecar (%d blocks differ); sidecar likely stale — aborting (pass unsafeIgnoreSidecar to override)",
shardId, baseFileName, len(mismatched))
}
}
}
// Atomically move reclassified-corrupt rebuilds over their originals.
for shardId := 0; shardId < ctx.Total(); shardId++ {
if writePaths[shardId] != "" && writePaths[shardId] != finalPaths[shardId] {
outputFiles[shardId].Close()
if rerr := os.Rename(writePaths[shardId], finalPaths[shardId]); rerr != nil {
return nil, fmt.Errorf("bitrot: replace corrupt shard %d (%s -> %s): %w", shardId, writePaths[shardId], finalPaths[shardId], rerr)
}
}
}
return
}
// cleanupRebuildOutputs removes every generated output on a failed fail-closed
// rebuild: temp replacements AND genuinely-missing shards written directly at
// their final path, so no unverified bytes are published. A reclassified-corrupt
// shard's untouched original (at finalPath, distinct from its temp writePath) is
// left in place; a genuinely-missing shard (writePath == finalPath) returns to
// missing.
func cleanupRebuildOutputs(outputFiles []*os.File, writePaths []string) {
for i := range writePaths {
if writePaths[i] == "" {
continue
}
if outputFiles[i] != nil {
outputFiles[i].Close()
}
os.Remove(writePaths[i])
}
}
// loadRebuildSidecar loads and validates the generation-0 checksum sidecar for a
// rebuild. RebuildEcFiles operates on the un-suffixed (generation 0) shard
// names, so only the legacy sidecar is relevant here. Returns BitrotOff when
// absent or describing a different generation/config, BitrotInvalid on a
// self-integrity/manifest failure, BitrotOn when usable.
func loadRebuildSidecar(baseFileName string, ctx *ECContext, additionalDirs []string) (*volume_server_pb.EcBitrotProtection, BitrotStatus) {
path := findBitrotSidecar(0, baseFileName, baseFileName, additionalDirs...)
if path == "" {
return nil, BitrotOff
}
prot, err := LoadBitrotSidecar(path)
if err != nil {
glog.Warningf("bitrot: sidecar %s self-integrity failed: %v", path, err)
return nil, BitrotInvalid
}
if prot.Generation != 0 {
return nil, BitrotOff
}
if prot.EcShardConfig == nil ||
int(prot.EcShardConfig.DataShards) != ctx.DataShards ||
int(prot.EcShardConfig.ParityShards) != ctx.ParityShards {
return nil, BitrotOff
}
if err := ValidateBitrotManifest(prot, ctx.DataShards, ctx.ParityShards); err != nil {
glog.Warningf("bitrot: sidecar %s manifest invalid: %v", path, err)
return nil, BitrotInvalid
}
return prot, BitrotOn
}
func encodeData(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize int64, buffers [][]byte, outputs []*os.File, ctx *ECContext, builders []*shardChecksumBuilder) error {
bufferSize := int64(len(buffers[0]))
if bufferSize == 0 {
glog.Fatal("unexpected zero buffer size")
}
batchCount := blockSize / bufferSize
if blockSize%bufferSize != 0 {
glog.Fatalf("unexpected block size %d buffer size %d", blockSize, bufferSize)
}
for b := int64(0); b < batchCount; b++ {
err := encodeDataOneBatch(file, enc, startOffset+b*bufferSize, blockSize, buffers, outputs, ctx, builders)
if err != nil {
return err
}
}
return nil
}
func openEcFiles(baseFileName string, forRead bool, ctx *ECContext) (files []*os.File, err error) {
for i := 0; i < ctx.Total(); i++ {
fname := baseFileName + ctx.ToExt(i)
openOption := os.O_TRUNC | os.O_CREATE | os.O_WRONLY
if forRead {
openOption = os.O_RDONLY
}
f, err := os.OpenFile(fname, openOption, 0644)
if err != nil {
return files, fmt.Errorf("failed to open file %s: %v", fname, err)
}
files = append(files, f)
}
return
}
func closeEcFiles(files []*os.File) {
for _, f := range files {
if f != nil {
f.Close()
}
}
}
func encodeDataOneBatch(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize int64, buffers [][]byte, outputs []*os.File, ctx *ECContext, builders []*shardChecksumBuilder) error {
// read data into buffers
for i := 0; i < ctx.DataShards; i++ {
n, err := file.ReadAt(buffers[i], startOffset+blockSize*int64(i))
if err != nil {
if err != io.EOF {
return err
}
}
if n < len(buffers[i]) {
for t := len(buffers[i]) - 1; t >= n; t-- {
buffers[i][t] = 0
}
}
}
err := enc.Encode(buffers)
if err != nil {
return err
}
for i := 0; i < ctx.Total(); i++ {
_, err := outputs[i].Write(buffers[i])
if err != nil {
return err
}
// Accumulate this shard's block CRC over exactly the bytes written.
if builders != nil && builders[i] != nil {
builders[i].write(buffers[i])
}
}
return nil
}
func encodeDatFile(remainingSize int64, baseFileName string, bufferSize int, largeBlockSize int64, file *os.File, smallBlockSize int64, ctx *ECContext, builders []*shardChecksumBuilder) error {
var processedSize int64
enc, err := ctx.CreateEncoder()
if err != nil {
return fmt.Errorf("failed to create encoder: %w", err)
}
buffers := make([][]byte, ctx.Total())
for i := range buffers {
buffers[i] = make([]byte, bufferSize)
}
outputs, err := openEcFiles(baseFileName, false, ctx)
defer closeEcFiles(outputs)
if err != nil {
return fmt.Errorf("failed to open ec files %s: %v", baseFileName, err)
}
// Pre-calculate row sizes to avoid redundant calculations in loops
largeRowSize := largeBlockSize * int64(ctx.DataShards)
smallRowSize := smallBlockSize * int64(ctx.DataShards)
for remainingSize >= largeRowSize {
err = encodeData(file, enc, processedSize, largeBlockSize, buffers, outputs, ctx, builders)
if err != nil {
return fmt.Errorf("failed to encode large chunk data: %w", err)
}
remainingSize -= largeRowSize
processedSize += largeRowSize
}
for remainingSize > 0 {
err = encodeData(file, enc, processedSize, smallBlockSize, buffers, outputs, ctx, builders)
if err != nil {
return fmt.Errorf("failed to encode small chunk data: %w", err)
}
remainingSize -= smallRowSize
processedSize += smallRowSize
}
return nil
}
func rebuildEcFiles(shardHasData []bool, inputFiles []*os.File, outputFiles []*os.File, ctx *ECContext) error {
enc, err := ctx.CreateEncoder()
if err != nil {
return fmt.Errorf("failed to create encoder: %w", err)
}
// The output shard size equals the present input shards' size (all EC
// shards are equal length). Deriving it up front turns a short read on a
// truncated/corrupt input into an error instead of a silent early return
// that would publish truncated shards as restored redundancy.
var expectedShardSize int64 = -1
for i := 0; i < ctx.Total(); i++ {
if !shardHasData[i] {
continue
}
fi, statErr := inputFiles[i].Stat()
if statErr != nil {
return fmt.Errorf("stat input shard %d: %w", i, statErr)
}
if expectedShardSize < 0 {
expectedShardSize = fi.Size()
} else if fi.Size() != expectedShardSize {
return fmt.Errorf("ec rebuild: input shard %d size %d != %d (truncated input?)", i, fi.Size(), expectedShardSize)
}
}
if expectedShardSize <= 0 {
return fmt.Errorf("ec rebuild: no input shard data (expected shard size %d)", expectedShardSize)
}
buffers := make([][]byte, ctx.Total())
for i := range buffers {
if shardHasData[i] {
buffers[i] = make([]byte, ErasureCodingSmallBlockSize)
}
}
for startOffset := int64(0); startOffset < expectedShardSize; {
thisBlock := int64(ErasureCodingSmallBlockSize)
if remaining := expectedShardSize - startOffset; remaining < thisBlock {
thisBlock = remaining
}
// read the input data; a short read means a truncated input shard.
shards := make([][]byte, ctx.Total())
for i := 0; i < ctx.Total(); i++ {
if !shardHasData[i] {
continue // nil: reconstructed below
}
b := buffers[i][:thisBlock]
n, readErr := inputFiles[i].ReadAt(b, startOffset)
if readErr != nil && readErr != io.EOF {
return fmt.Errorf("ec rebuild read shard %d at %d: %w", i, startOffset, readErr)
}
if int64(n) != thisBlock {
return fmt.Errorf("ec rebuild short read shard %d at %d: got %d want %d", i, startOffset, n, thisBlock)
}
shards[i] = b
}
if err = enc.Reconstruct(shards); err != nil {
return fmt.Errorf("reconstruct: %w", err)
}
for i := 0; i < ctx.Total(); i++ {
if shardHasData[i] {
continue
}
n, writeErr := outputFiles[i].WriteAt(shards[i][:thisBlock], startOffset)
if writeErr != nil {
return fmt.Errorf("ec rebuild write shard %d at %d: %w", i, startOffset, writeErr)
}
if int64(n) != thisBlock {
return fmt.Errorf("ec rebuild short write shard %d at %d: got %d want %d", i, startOffset, n, thisBlock)
}
}
startOffset += thisBlock
}
// Flush every regenerated shard before it is mounted/renamed and published
// as restored redundancy, so a crash cannot leave a peer trusting a shard
// whose bytes never reached disk.
for i := 0; i < ctx.Total(); i++ {
if shardHasData[i] {
continue
}
if err = outputFiles[i].Sync(); err != nil {
return fmt.Errorf("ec rebuild sync shard %d: %w", i, err)
}
}
return nil
}
func readNeedleMap(baseFileName string) (*needle_map.MemDb, error) {
indexFile, err := os.OpenFile(baseFileName+".idx", os.O_RDONLY, 0644)
if err != nil {
return nil, fmt.Errorf("cannot read Volume Index %s.idx: %v", baseFileName, err)
}
defer indexFile.Close()
cm := needle_map.NewMemDb()
err = idx.WalkIndexFile(indexFile, 0, func(key types.NeedleId, offset types.Offset, size types.Size) error {
if !offset.IsZero() && !size.IsDeleted() {
cm.Set(key, offset, size)
} else {
cm.Delete(key)
}
return nil
})
return cm, err
}