mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2026-06-13 23:36:45 +03:00
fix(s3api): return 503 + Retry-After when remote object not cached yet (#9233)
* fix(s3api): return 503 with Retry-After when remote object not cached yet When a GET hits a remote-only object whose cache fill timed out or was canceled, the handler returned 500 InternalError. SDK clients treat 500 as a server bug and surface it as a fatal error (boto3 S3DownloadFailedError), even though the cache is often still filling in the background and the next request would succeed. Return 503 ServiceUnavailable with Retry-After: 5 instead, matching AWS S3's "try again later" semantics. AWS SDKs already classify 503 as retryable and apply exponential backoff transparently, so clients recover without changes. Refs https://github.com/seaweedfs/seaweedfs/discussions/9174 * treat client cancel as cancellation, not 503 If r.Context() is already canceled when the cache attempt returns no chunks, the cache failure is almost certainly a side-effect of the client disconnecting, not real backpressure. Surface the context error so GetObjectHandler logs at V(3) and skips writing a response, instead of synthesizing a 503 that nobody will read. Addresses Gemini review feedback on #9233. * simplify comments
This commit is contained in:
@@ -1000,10 +1000,15 @@ func (s3a *S3ApiServer) streamFromVolumeServers(w http.ResponseWriter, r *http.R
|
||||
entry = cachedEntry
|
||||
glog.V(1).Infof("streamFromVolumeServers: successfully cached remote object, got %d chunks", len(chunks))
|
||||
} else {
|
||||
// Caching failed - return error to client
|
||||
glog.Errorf("streamFromVolumeServers: failed to cache remote object for streaming")
|
||||
s3err.WriteErrorResponse(w, r, s3err.ErrInternalError)
|
||||
return newStreamErrorWithResponse(fmt.Errorf("failed to cache remote object for streaming"))
|
||||
// Client disconnected: report cancellation, not 503.
|
||||
if ctxErr := r.Context().Err(); ctxErr != nil {
|
||||
return ctxErr
|
||||
}
|
||||
// Cache still filling: 503 + Retry-After so SDKs back off and retry.
|
||||
glog.V(1).Infof("streamFromVolumeServers: remote object %s/%s not cached yet, returning 503 for retry", bucket, object)
|
||||
w.Header().Set("Retry-After", "5")
|
||||
s3err.WriteErrorResponse(w, r, s3err.ErrServiceUnavailable)
|
||||
return newStreamErrorWithResponse(fmt.Errorf("remote object not cached yet"))
|
||||
}
|
||||
} else if totalSize > 0 && len(entry.Content) == 0 {
|
||||
// Not a remote entry but has size without content - this is a data integrity issue
|
||||
|
||||
Reference in New Issue
Block a user