diff --git a/lbclient.go b/lbclient.go index 690f4d0..9a871c0 100644 --- a/lbclient.go +++ b/lbclient.go @@ -1,11 +1,16 @@ package fasthttp import ( + "errors" "sync" "sync/atomic" "time" ) +// ErrNoAvailableClients is returned by LBClient methods when no clients are +// available, for example after every client has been removed. +var ErrNoAvailableClients = errors.New("no available clients") + // BalancingClient is the interface for clients, which may be passed // to LBClient.Clients. type BalancingClient interface { @@ -62,13 +67,21 @@ const DefaultLBClientTimeout = time.Second // DoDeadline calls DoDeadline on the least loaded client. func (cc *LBClient) DoDeadline(req *Request, resp *Response, deadline time.Time) error { - return cc.get().DoDeadline(req, resp, deadline) + c := cc.get() + if c == nil { + return ErrNoAvailableClients + } + return c.DoDeadline(req, resp, deadline) } // DoTimeout calculates deadline and calls DoDeadline on the least loaded client. func (cc *LBClient) DoTimeout(req *Request, resp *Response, timeout time.Duration) error { deadline := time.Now().Add(timeout) - return cc.get().DoDeadline(req, resp, deadline) + c := cc.get() + if c == nil { + return ErrNoAvailableClients + } + return c.DoDeadline(req, resp, deadline) } // Do calculates timeout using LBClient.Timeout and calls DoTimeout @@ -100,11 +113,11 @@ func (cc *LBClient) init() { // returns the new total number of clients. func (cc *LBClient) AddClient(c BalancingClient) int { cc.mu.Lock() + defer cc.mu.Unlock() cc.cs = append(cc.cs, &lbClient{ c: c, healthCheck: cc.HealthCheck, }) - cc.mu.Unlock() return len(cc.cs) } @@ -113,18 +126,24 @@ func (cc *LBClient) AddClient(c BalancingClient) int { // Returns the new total number of clients. func (cc *LBClient) RemoveClients(rc func(BalancingClient) bool) int { cc.mu.Lock() + // defer so a panic in the user-supplied rc can't leak the lock. + defer cc.mu.Unlock() n := 0 - for idx, cs := range cc.cs { - cc.cs[idx] = nil + for _, cs := range cc.cs { if rc(cs.c) { continue } cc.cs[n] = cs n++ } + // Nil out the now-unused tail so removed clients can be garbage collected. + // This is done only after the loop so a panic in rc can't leave cc.cs with + // nil holes that would later crash get(). + for i := n; i < len(cc.cs); i++ { + cc.cs[i] = nil + } cc.cs = cc.cs[:n] - cc.mu.Unlock() return len(cc.cs) } @@ -132,7 +151,13 @@ func (cc *LBClient) get() *lbClient { cc.once.Do(cc.init) cc.mu.RLock() + defer cc.mu.RUnlock() + cs := cc.cs + if len(cs) == 0 { + // No clients (e.g. all removed): avoid panicking on cs[0]. + return nil + } minC := cs[0] minN := minC.PendingRequests() @@ -146,7 +171,6 @@ func (cc *LBClient) get() *lbClient { minT = t } } - cc.mu.RUnlock() return minC } diff --git a/lbclient_test.go b/lbclient_test.go new file mode 100644 index 0000000..913ee8e --- /dev/null +++ b/lbclient_test.go @@ -0,0 +1,122 @@ +package fasthttp + +import ( + "errors" + "testing" + "time" +) + +type mockBalancingClient struct { + err error + pending int +} + +func (m *mockBalancingClient) DoDeadline(_ *Request, _ *Response, _ time.Time) error { + return m.err +} + +func (m *mockBalancingClient) PendingRequests() int { + return m.pending +} + +// TestLBClientRemoveAllClients reproduces the deadlock reported in #2270: +// once every client is removed, cc.cs is empty and get() used to panic on +// cs[0] while holding (and never releasing) the RLock, deadlocking every +// subsequent AddClient/RemoveClients call. +func TestLBClientRemoveAllClients(t *testing.T) { + t.Parallel() + + lbc := &LBClient{Clients: []BalancingClient{&mockBalancingClient{}}} + + var req Request + var resp Response + deadline := func() time.Time { return time.Now().Add(time.Second) } + + // Trigger the lazy init and confirm the configured client serves the request. + if err := lbc.DoDeadline(&req, &resp, deadline()); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // Remove every client so cc.cs becomes empty. + if n := lbc.RemoveClients(func(BalancingClient) bool { return true }); n != 0 { + t.Fatalf("unexpected client count after removal: %d", n) + } + + // get() must not panic and must release the RLock, so the request fails + // cleanly with ErrNoAvailableClients instead. + if err := lbc.DoDeadline(&req, &resp, deadline()); !errors.Is(err, ErrNoAvailableClients) { + t.Fatalf("unexpected error: %v. Expecting %v", err, ErrNoAvailableClients) + } + + // With the bug the leaked RLock makes AddClient block forever. + done := make(chan struct{}) + go func() { + lbc.AddClient(&mockBalancingClient{}) + close(done) + }() + select { + case <-done: + case <-time.After(time.Second): + t.Fatal("AddClient deadlocked after all clients were removed") + } + + // The freshly added client must be usable again. + if err := lbc.DoDeadline(&req, &resp, deadline()); err != nil { + t.Fatalf("unexpected error: %v", err) + } +} + +// TestLBClientRemoveClientsCallbackPanic ensures a panic in the user-supplied +// RemoveClients callback does not leak the write lock and deadlock later calls. +func TestLBClientRemoveClientsCallbackPanic(t *testing.T) { + t.Parallel() + + lbc := &LBClient{Clients: []BalancingClient{ + &mockBalancingClient{}, + &mockBalancingClient{}, + &mockBalancingClient{}, + }} + + // Trigger the lazy init so cc.cs is populated and the callback runs. + var req Request + var resp Response + if err := lbc.DoDeadline(&req, &resp, time.Now().Add(time.Second)); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // Panic part way through so the old code would have already niled an + // earlier cc.cs slot, leaving a hole that crashes later get() calls. + calls := 0 + func() { + defer func() { + if recover() == nil { + t.Fatal("expected panic from RemoveClients callback") + } + }() + lbc.RemoveClients(func(BalancingClient) bool { + calls++ + if calls == 2 { + panic("boom") + } + return false + }) + }() + + // If the lock leaked, AddClient would block forever. + done := make(chan struct{}) + go func() { + lbc.AddClient(&mockBalancingClient{}) + close(done) + }() + select { + case <-done: + case <-time.After(time.Second): + t.Fatal("AddClient deadlocked after RemoveClients callback panicked") + } + + // cc.cs must not be left with nil holes: a request still has to work + // instead of panicking on a nil client. + if err := lbc.DoDeadline(&req, &resp, time.Now().Add(time.Second)); err != nil { + t.Fatalf("unexpected error after panicking RemoveClients: %v", err) + } +}