mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2026-06-13 23:36:45 +03:00
filer: warm-up + fail-closed cooling for POSIX locks on owner (re)start (#9673)
After a (re)start the owner defers would-be grants for posixLockWarmup while mounts re-assert, trusting only locally-visible conflicts, so it does not double-grant from empty state; a deferred grant is a retry for SetLkw and EAGAIN for non-blocking SetLk, never a spurious grant. Cooling now fail-closes: if the previous owner is unreachable during a ring change, defer rather than risk a double-grant. readyAt is atomic so the handler reads it without locking.
This commit is contained in:
@@ -20,12 +20,18 @@ const (
|
||||
// posixCoolingProbeTimeout bounds the dual-read probe to the prior owner so a
|
||||
// slow peer can't stall a non-blocking lock call during the cooling window.
|
||||
posixCoolingProbeTimeout = 2 * time.Second
|
||||
// posixLockWarmup is how long after a (re)start the owner defers would-be
|
||||
// grants while mounts re-assert their held locks, so it does not double-grant
|
||||
// a lock its fresh, still-empty state has not yet rebuilt. Must exceed the
|
||||
// mount keepalive interval and stay below the TTL.
|
||||
posixLockWarmup = 10 * time.Second
|
||||
)
|
||||
|
||||
// startPosixLockSweeper periodically reaps the locks of leased sessions (mounts)
|
||||
// that stopped sending keepalives. Sessions that never renew are never reaped, so
|
||||
// this is inert until mounts run with -posixLock.
|
||||
func (fs *FilerServer) startPosixLockSweeper() {
|
||||
fs.posixLockReadyAt.Store(time.Now().UnixNano())
|
||||
fs.posixLockSweeperStop = make(chan struct{})
|
||||
go func() {
|
||||
ticker := time.NewTicker(posixLockSweepInterval)
|
||||
@@ -93,13 +99,18 @@ func (fs *FilerServer) PosixLock(ctx context.Context, req *filer_pb.PosixLockReq
|
||||
resp := &filer_pb.PosixLockResponse{}
|
||||
switch req.Op {
|
||||
case filer_pb.PosixLockOp_TRY_LOCK:
|
||||
// During a ring change, a previous owner may still hold a conflicting lock
|
||||
// this fresh owner has not rebuilt yet; consult it before granting.
|
||||
// A fresh owner's state may be incomplete (post-restart warm-up, or a ring
|
||||
// change whose previous owner can't be reached): report a known conflict,
|
||||
// else defer the grant so the client retries rather than risk a
|
||||
// double-grant. A deferred grant becomes EAGAIN for non-blocking SetLk and
|
||||
// a retry for the blocking SetLkw poll — never a spurious grant.
|
||||
if !req.CoolingProbe {
|
||||
if c, found := fs.coolingConflict(ctx, req.Key, lk); found {
|
||||
if c, deferGrant := fs.posixCoolingOrWarmup(ctx, req.Key, lk); c != nil {
|
||||
resp.HasConflict = true
|
||||
resp.Conflict = c
|
||||
break
|
||||
} else if deferGrant {
|
||||
break
|
||||
}
|
||||
}
|
||||
if c, granted := fs.posixLocks.TryLock(req.Key, lk); granted {
|
||||
@@ -111,8 +122,9 @@ func (fs *FilerServer) PosixLock(ctx context.Context, req *filer_pb.PosixLockReq
|
||||
case filer_pb.PosixLockOp_UNLOCK:
|
||||
fs.posixLocks.Unlock(req.Key, lk)
|
||||
case filer_pb.PosixLockOp_GET_LK:
|
||||
// A query is best-effort: report a known conflict but never defer.
|
||||
if !req.CoolingProbe {
|
||||
if c, found := fs.coolingConflict(ctx, req.Key, lk); found {
|
||||
if c, _ := fs.posixCoolingOrWarmup(ctx, req.Key, lk); c != nil {
|
||||
resp.HasConflict = true
|
||||
resp.Conflict = c
|
||||
break
|
||||
@@ -147,13 +159,37 @@ func (fs *FilerServer) PosixLock(ctx context.Context, req *filer_pb.PosixLockReq
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// coolingConflict asks the key's previous owner, during a ring-change cooling
|
||||
// window, whether it still holds a lock that blocks lk — so a fresh owner does
|
||||
// not double-grant before re-assertion rebuilds its local state. Best-effort: if
|
||||
// the previous owner is unreachable (e.g. it left, which caused the change, so
|
||||
// its locks are gone) the caller proceeds. The probe is marked cooling_probe so
|
||||
// the previous owner answers from local state without itself cooling off.
|
||||
func (fs *FilerServer) coolingConflict(ctx context.Context, key string, lk posixlock.Range) (*filer_pb.PosixLockRange, bool) {
|
||||
// posixWarmingUp reports whether this filer is still within posixLockWarmup of
|
||||
// when it began serving POSIX locks. The zero readyAt (e.g. in tests) is never
|
||||
// warming up.
|
||||
func (fs *FilerServer) posixWarmingUp() bool {
|
||||
readyAt := fs.posixLockReadyAt.Load()
|
||||
return readyAt != 0 && time.Since(time.Unix(0, readyAt)) < posixLockWarmup
|
||||
}
|
||||
|
||||
// posixCoolingOrWarmup decides whether a fresh owner can trust its local state
|
||||
// for key before granting. It returns a known blocking lock (conflict != nil),
|
||||
// or asks the caller to defer the grant (deferGrant) when the state may be
|
||||
// incomplete and no conflict can be confirmed. It returns (nil, false) when the
|
||||
// owner is authoritative and should consult its local table.
|
||||
//
|
||||
// - Warm-up: just after a (re)start the owner is still rebuilding from
|
||||
// re-assertions, so only a locally-visible conflict is trustworthy; a
|
||||
// would-be grant is deferred.
|
||||
// - Ring change (cooling window): the previous owner may still hold a lock this
|
||||
// owner hasn't rebuilt. Ask it (marked cooling_probe so it answers locally
|
||||
// without recursing), under a short deadline so a slow peer can't stall the
|
||||
// non-blocking lock path. If it is unreachable — typically because it
|
||||
// crashed, which caused the change — we cannot confirm, so we defer rather
|
||||
// than risk a double-grant; re-assertion rebuilds this owner before the
|
||||
// window ends.
|
||||
func (fs *FilerServer) posixCoolingOrWarmup(ctx context.Context, key string, lk posixlock.Range) (conflict *filer_pb.PosixLockRange, deferGrant bool) {
|
||||
if fs.posixWarmingUp() {
|
||||
if c, found := fs.posixLocks.GetLk(key, lk); found {
|
||||
return posixRangeToPb(c), false
|
||||
}
|
||||
return nil, true
|
||||
}
|
||||
if fs.filer.Dlm == nil {
|
||||
return nil, false
|
||||
}
|
||||
@@ -161,9 +197,6 @@ func (fs *FilerServer) coolingConflict(ctx context.Context, key string, lk posix
|
||||
if prior == "" || prior == fs.option.Host {
|
||||
return nil, false
|
||||
}
|
||||
// Bound the probe with its own short deadline: it runs on the non-blocking
|
||||
// lock path, so a slow prior owner must not stall the caller. On timeout the
|
||||
// probe is treated as unreachable (best-effort).
|
||||
probeCtx, cancel := context.WithTimeout(ctx, posixCoolingProbeTimeout)
|
||||
defer cancel()
|
||||
var resp *filer_pb.PosixLockResponse
|
||||
@@ -179,11 +212,13 @@ func (fs *FilerServer) coolingConflict(ctx context.Context, key string, lk posix
|
||||
return e
|
||||
})
|
||||
if err != nil {
|
||||
glog.V(2).InfofCtx(ctx, "posix cooling probe %s -> %s: %v", key, prior, err)
|
||||
return nil, false
|
||||
// Cannot confirm — defer rather than risk a double-grant (the prior owner
|
||||
// likely crashed; re-assertion rebuilds this owner before the window ends).
|
||||
glog.V(2).InfofCtx(ctx, "posix cooling probe %s -> %s: %v (deferring)", key, prior, err)
|
||||
return nil, true
|
||||
}
|
||||
if resp.GetHasConflict() {
|
||||
return resp.GetConflict(), true
|
||||
return resp.GetConflict(), false
|
||||
}
|
||||
return nil, false
|
||||
}
|
||||
|
||||
@@ -149,3 +149,30 @@ func TestPosixLockForwardsToOwner(t *testing.T) {
|
||||
t.Fatal("sender must forward, not apply locally")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPosixLockWarmupDefersGrants(t *testing.T) {
|
||||
fs := newPosixTestServer()
|
||||
fs.posixLockReadyAt.Store(time.Now().UnixNano()) // warming up
|
||||
|
||||
// A would-be grant is deferred (not granted, no conflict) so the client retries.
|
||||
r := posixOp(t, fs, filer_pb.PosixLockOp_TRY_LOCK, pbLock(0, 99, posixlock.Write, 1, 1, 7, false))
|
||||
if r.Granted {
|
||||
t.Fatal("grant should be deferred during warm-up")
|
||||
}
|
||||
if r.HasConflict {
|
||||
t.Fatalf("deferred grant should report no conflict, got %+v", r.Conflict)
|
||||
}
|
||||
|
||||
// A lock the owner already knows about is still reported as a conflict.
|
||||
fs.posixLocks.TryLock("s3.fuse.lock:/x", posixlock.Range{Start: 0, End: 99, Type: posixlock.Write, Sid: 9, Owner: 1, Pid: 5})
|
||||
if r := posixOp(t, fs, filer_pb.PosixLockOp_TRY_LOCK, pbLock(50, 60, posixlock.Write, 1, 1, 7, false)); r.Granted || !r.HasConflict {
|
||||
t.Fatalf("known conflict should be reported during warm-up: %+v", r)
|
||||
}
|
||||
|
||||
// After warm-up, grants resume.
|
||||
posixOp(t, fs, filer_pb.PosixLockOp_UNLOCK, pbLock(0, 99, posixlock.Unlock, 9, 1, 5, false))
|
||||
fs.posixLockReadyAt.Store(time.Now().Add(-2 * posixLockWarmup).UnixNano())
|
||||
if r := posixOp(t, fs, filer_pb.PosixLockOp_TRY_LOCK, pbLock(0, 99, posixlock.Write, 1, 1, 7, false)); !r.Granted {
|
||||
t.Fatal("grant should succeed after warm-up")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -141,6 +141,12 @@ type FilerServer struct {
|
||||
posixLocks *posixlock.Manager
|
||||
// posixLockSweeperStop stops the lease-reaping sweeper goroutine on Shutdown.
|
||||
posixLockSweeperStop chan struct{}
|
||||
// posixLockReadyAt is the unix-nanos when this filer began serving POSIX
|
||||
// locks. For posixLockWarmup after it, the owner defers would-be grants while
|
||||
// mounts re-assert, so a (re)started owner does not double-grant from empty
|
||||
// state. Atomic so the handler reads it without locking; 0 means "not warming
|
||||
// up" (e.g. in tests).
|
||||
posixLockReadyAt atomic.Int64
|
||||
}
|
||||
|
||||
func NewFilerServer(defaultMux, readonlyMux *http.ServeMux, option *FilerOption) (fs *FilerServer, err error) {
|
||||
|
||||
Reference in New Issue
Block a user