s3: route suspended-versioning PutObject off the DLM

A suspended-versioning PUT writes the "null" version to the main object path, so
it is a single-entry object write just like a non-versioned PUT — only the
IsLatest-flag rewrite over existing versions differs, and that is best-effort
bookkeeping (recoverable on the next list-versions resync).

So route it on the object key like a normal PUT: putSuspendedVersioningObject
now calls putToFiler without an afterCreate hook (letting the route-by-key path
take it and skip the distributed lock), then runs updateIsLatestFlagsFor-
SuspendedVersioning best-effort after the write instead of inside the lock.

routedObjectOwner now excludes only versioning-*enabled* buckets (whose writes go
to .versions) rather than all versioning-configured buckets, so suspended and
unversioned writes — both targeting the main object path — resolve the same
object-key owner and serialize on the same lock. Object-lock buckets still keep
the lock path; suspended DELETE (delete null + create marker) stays multi-step on
the lock.
This commit is contained in:
Chris Lu
2026-05-23 09:52:56 -07:00
parent a99c8214bb
commit 8acf21e4d1
2 changed files with 21 additions and 17 deletions
+12 -8
View File
@@ -1330,18 +1330,22 @@ func (s3a *S3ApiServer) putSuspendedVersioningObject(r *http.Request, bucket, ob
// it runs while withObjectWriteLock is still held in putToFiler.
// Doing it after putToFiler returns would race a concurrent PUT
// promoting a newer latest, which we'd then incorrectly wipe.
etag, errCode, sseMetadata = s3a.putToFiler(r, filePath, body, bucket, normalizedObject, 1, 0, func(_ *filer_pb.Entry) s3err.ErrorCode {
if err := s3a.updateIsLatestFlagsForSuspendedVersioning(bucket, normalizedObject); err != nil {
// Best-effort: a stale IsLatest flag is recoverable on the
// next list-versions resync, so don't fail the PUT.
glog.Warningf("putSuspendedVersioningObject: failed to update IsLatest flags: %v", err)
}
return s3err.ErrNone
}, false)
// The null version is written to the main object path, so this is a
// single-entry object write — route it on the object key like a normal PUT
// (no afterCreate, so putToFiler can take the route-by-key path and skip the
// distributed lock). The IsLatest flag rewrite over existing versions is
// best-effort bookkeeping, so it runs after the write rather than inside the
// atomic boundary.
etag, errCode, sseMetadata = s3a.putToFiler(r, filePath, body, bucket, normalizedObject, 1, 0, nil, false)
if errCode != s3err.ErrNone {
glog.Errorf("putSuspendedVersioningObject: failed to upload object: %v", errCode)
return "", errCode, SSEResponseMetadata{}
}
if err := s3a.updateIsLatestFlagsForSuspendedVersioning(bucket, normalizedObject); err != nil {
// Best-effort: a stale IsLatest flag is recoverable on the next
// list-versions resync, so don't fail the PUT.
glog.Warningf("putSuspendedVersioningObject: failed to update IsLatest flags: %v", err)
}
glog.V(2).Infof("putSuspendedVersioningObject: successfully created null version for %s/%s", bucket, object)
+9 -9
View File
@@ -2,7 +2,6 @@ package s3api
import (
"context"
"fmt"
"net/http"
"strings"
"time"
@@ -16,22 +15,23 @@ import (
)
// routedObjectOwner returns the filer that owns this object's metadata for
// route-by-key, or ok=false when the object's writes must keep the distributed
// lock. Versioned and object-lock buckets stay on the lock path: their
// mutations span multiple entries / extra metadata checks a single conditional
// create or delete does not cover. On any lookup error it falls back to be safe.
// route-by-key on a single-entry object write, or ok=false when the write must
// keep the distributed lock. Only versioning-*enabled* buckets are excluded:
// their writes go to <obj>/.versions and flip the latest pointer (the versioned
// finalize path handles those). Suspended and unversioned writes both go to the
// main object path, so they route here. Object-lock buckets stay on the lock
// path. On any lookup error it falls back to be safe.
func (s3a *S3ApiServer) routedObjectOwner(bucket, object string) (pb.ServerAddress, bool) {
if object == "" || s3a.objectWriteLockClient == nil {
if object == "" {
return "", false
}
if configured, err := s3a.isVersioningConfigured(bucket); err != nil || configured {
if enabled, err := s3a.isVersioningEnabled(bucket); err != nil || enabled {
return "", false
}
if locked, err := s3a.isObjectLockEnabled(bucket); err != nil || locked {
return "", false
}
lockKey := fmt.Sprintf("s3.object.write:%s", s3a.toFilerPath(bucket, object))
owner := s3a.objectWriteLockClient.PrimaryForKey(lockKey)
owner := s3a.objectWriteOwner(bucket, object)
if owner == "" {
return "", false
}