Files
seaweedfs/weed/server/filer_server.go
T
Chris Lu ab7be7867d security: hot-reload JWT signing keys on SIGHUP (#9826)
* security: reload JWT signing keys on SIGHUP

Signing keys were read once in the server constructors and never
refreshed. After a key rotation (Secret update, divergent reads) the
in-memory key stayed stale and every request kept failing "wrong jwt"
until the affected process was restarted.

Add Guard.UpdateSigningKeys and call it from the master, volume and
filer reload paths and the s3 reload hook, next to the existing
whitelist refresh. Make the global chunk-read JWT cache reloadable via
an atomic swap, and register the master's Reload with grace.OnReload --
it was never wired, so the master ignored SIGHUP entirely.

Mirror the same refresh in the Rust volume server's SIGHUP handler.

* security: swap signing keys behind an atomic pointer

Addresses review feedback on the in-place key swap: SigningKey is a
[]byte, so reassigning the Guard fields while a request handler reads
them is a data race that can tear the multi-word slice header and read
out of bounds.

Hold the four signing-key fields in an immutable signingConfig snapshot
behind atomic.Pointer; UpdateSigningKeys swaps the whole pointer, so a
reader sees either the old keys or the new ones. Reads go through new
SigningKey/ExpiresAfterSec/ReadSigningKey/ReadExpiresAfterSec accessors.

The Rust guard is already safe: every read and the SIGHUP write go
through the shared RwLock<Guard>.

* security: fold whitelist + auth state into the atomic snapshot

Review follow-up. UpdateSigningKeys still wrote isWriteActive while the
request path read it (and the whitelist maps) unsynchronized, so a SIGHUP
under load could expose an inconsistent mix of activation bits and
whitelist contents.

Move all hot-reloadable Guard state -- keys, expirations, whitelist, and
the activation flags -- into a single immutable guardState swapped behind
one atomic.Pointer. The Update* methods take a small mutex to serialize
the read-modify-write; readers stay lock-free. The concurrency test now
also rotates the whitelist and probes IsWhiteListed under -race.

Also read each signing key once per branch in the volume/filer JWT auth
checks, so a reload landing mid-check can't take the allow-fast-path
after auth was enabled or verify against a different key than the branch
saw.
2026-06-04 22:26:08 -07:00

362 lines
14 KiB
Go

package weed_server
import (
"context"
"fmt"
"net/http"
"os"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/seaweedfs/seaweedfs/weed/credential"
"github.com/seaweedfs/seaweedfs/weed/stats"
"golang.org/x/sync/singleflight"
"google.golang.org/grpc"
"github.com/seaweedfs/seaweedfs/weed/util/grace"
"github.com/seaweedfs/seaweedfs/weed/operation"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
"github.com/seaweedfs/seaweedfs/weed/filer"
_ "github.com/seaweedfs/seaweedfs/weed/filer/arangodb"
_ "github.com/seaweedfs/seaweedfs/weed/filer/cassandra"
_ "github.com/seaweedfs/seaweedfs/weed/filer/cassandra2"
_ "github.com/seaweedfs/seaweedfs/weed/filer/elastic/v7"
_ "github.com/seaweedfs/seaweedfs/weed/filer/etcd"
_ "github.com/seaweedfs/seaweedfs/weed/filer/foundationdb"
_ "github.com/seaweedfs/seaweedfs/weed/filer/hbase"
_ "github.com/seaweedfs/seaweedfs/weed/filer/leveldb"
_ "github.com/seaweedfs/seaweedfs/weed/filer/leveldb2"
_ "github.com/seaweedfs/seaweedfs/weed/filer/leveldb3"
_ "github.com/seaweedfs/seaweedfs/weed/filer/mongodb"
_ "github.com/seaweedfs/seaweedfs/weed/filer/mysql"
_ "github.com/seaweedfs/seaweedfs/weed/filer/mysql2"
"github.com/seaweedfs/seaweedfs/weed/filer/posixlock"
_ "github.com/seaweedfs/seaweedfs/weed/filer/postgres"
_ "github.com/seaweedfs/seaweedfs/weed/filer/postgres2"
_ "github.com/seaweedfs/seaweedfs/weed/filer/redis"
_ "github.com/seaweedfs/seaweedfs/weed/filer/redis2"
_ "github.com/seaweedfs/seaweedfs/weed/filer/redis3"
_ "github.com/seaweedfs/seaweedfs/weed/filer/sqlite"
_ "github.com/seaweedfs/seaweedfs/weed/filer/tarantool"
_ "github.com/seaweedfs/seaweedfs/weed/filer/ydb"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/notification"
_ "github.com/seaweedfs/seaweedfs/weed/notification/aws_sqs"
_ "github.com/seaweedfs/seaweedfs/weed/notification/gocdk_pub_sub"
_ "github.com/seaweedfs/seaweedfs/weed/notification/google_pub_sub"
_ "github.com/seaweedfs/seaweedfs/weed/notification/kafka"
_ "github.com/seaweedfs/seaweedfs/weed/notification/log"
_ "github.com/seaweedfs/seaweedfs/weed/notification/webhook"
"github.com/seaweedfs/seaweedfs/weed/security"
)
type FilerOption struct {
Masters *pb.ServerDiscovery
FilerGroup string
Collection string
DefaultReplication string
DisableDirListing bool
MaxMB int
DirListingLimit int
DataCenter string
Rack string
DataNode string
DefaultLevelDbDir string
DisableHttp bool
Host pb.ServerAddress
recursiveDelete bool
Cipher bool
SaveToFilerLimit int64
ConcurrentUploadLimit int64
ConcurrentFileUploadLimit int64
ShowUIDirectoryDelete bool
DownloadMaxBytesPs int64
DiskType string
AllowedOrigins []string
ExposeDirectoryData bool
TusBasePath string
S3ConfigFile string // optional path to static S3 identity config file
CredentialManager *credential.CredentialManager
}
type FilerServer struct {
inFlightDataSize int64
inFlightUploads int64
listenersWaits int64
// notifying clients
listenersLock sync.Mutex
listenersCond *sync.Cond
inFlightDataLimitCond *sync.Cond
filer_pb.UnimplementedSeaweedFilerServer
option *FilerOption
filer *filer.Filer
filerGuard *security.Guard
volumeGuard *security.Guard
grpcDialOption grpc.DialOption
// metrics read from the master
metricsAddress string
metricsIntervalSec int
// track known metadata listeners
knownListenersLock sync.Mutex
knownListeners map[int32]int32
// deduplicates concurrent remote object caching operations
remoteCacheGroup singleflight.Group
recentCopyRequestsMu sync.Mutex
recentCopyRequests map[string]recentCopyRequest
// credential manager for IAM operations
CredentialManager *credential.CredentialManager
// mountPeerRegistry backs the MountRegister / MountList RPCs for peer
// chunk sharing (tier 1). Always populated.
mountPeerRegistry *filer.MountPeerRegistry
// entryLockTable serializes mutations to the same entry path on this filer.
// CreateEntry takes it today; UpdateEntry and DeleteEntry are intended to take
// it too as their callers route a key's writes to this node, making it the
// local serialization point for read-modify-write operations that replaces
// the distributed lock for that key. Idle keys are evicted automatically, so
// the table stays bounded.
entryLockTable *util.LockTable[util.FullPath]
// posixLocks is the in-memory authority for cross-mount POSIX advisory locks
// on inodes this filer owns (per the route-by-key ring). Lock state is kept
// here rather than in replicated metadata: it is transient coordination, so
// keeping it off the meta-log avoids churn.
posixLocks *posixlock.Manager
// posixLockSweeperStop stops the lease-reaping sweeper goroutine on Shutdown.
posixLockSweeperStop chan struct{}
// posixLockReadyAt is the unix-nanos when this filer began serving POSIX
// locks. For posixLockWarmup after it, the owner defers would-be grants while
// mounts re-assert, so a (re)started owner does not double-grant from empty
// state. Atomic so the handler reads it without locking; 0 means "not warming
// up" (e.g. in tests).
posixLockReadyAt atomic.Int64
}
func NewFilerServer(defaultMux, readonlyMux *http.ServeMux, option *FilerOption) (fs *FilerServer, err error) {
v := util.GetViper()
signingKey := v.GetString("jwt.filer_signing.key")
v.SetDefault("jwt.filer_signing.expires_after_seconds", 10)
expiresAfterSec := v.GetInt("jwt.filer_signing.expires_after_seconds")
readSigningKey := v.GetString("jwt.filer_signing.read.key")
v.SetDefault("jwt.filer_signing.read.expires_after_seconds", 60)
readExpiresAfterSec := v.GetInt("jwt.filer_signing.read.expires_after_seconds")
volumeSigningKey := v.GetString("jwt.signing.key")
v.SetDefault("jwt.signing.expires_after_seconds", 10)
volumeExpiresAfterSec := v.GetInt("jwt.signing.expires_after_seconds")
volumeReadSigningKey := v.GetString("jwt.signing.read.key")
v.SetDefault("jwt.signing.read.expires_after_seconds", 60)
volumeReadExpiresAfterSec := v.GetInt("jwt.signing.read.expires_after_seconds")
v.SetDefault("cors.allowed_origins.values", "*")
allowedOrigins := v.GetString("cors.allowed_origins.values")
domains := strings.Split(allowedOrigins, ",")
option.AllowedOrigins = domains
v.SetDefault("filer.expose_directory_metadata.enabled", true)
returnDirMetadata := v.GetBool("filer.expose_directory_metadata.enabled")
option.ExposeDirectoryData = returnDirMetadata
fs = &FilerServer{
option: option,
grpcDialOption: security.LoadClientTLS(util.GetViper(), "grpc.filer"),
knownListeners: make(map[int32]int32),
inFlightDataLimitCond: sync.NewCond(new(sync.Mutex)),
recentCopyRequests: make(map[string]recentCopyRequest),
CredentialManager: option.CredentialManager,
entryLockTable: util.NewLockTable[util.FullPath](),
posixLocks: posixlock.NewManager(),
}
fs.startPosixLockSweeper()
fs.mountPeerRegistry = filer.NewMountPeerRegistry()
go fs.runMountPeerRegistrySweeper()
fs.listenersCond = sync.NewCond(&fs.listenersLock)
option.Masters.RefreshBySrvIfAvailable()
if len(option.Masters.GetInstances()) == 0 {
glog.Fatal("master list is required!")
}
if !util.LoadConfiguration("filer", false) {
v.SetDefault("leveldb2.enabled", true)
v.SetDefault("leveldb2.dir", option.DefaultLevelDbDir)
_, err := os.Stat(option.DefaultLevelDbDir)
if os.IsNotExist(err) {
os.MkdirAll(option.DefaultLevelDbDir, 0755)
}
glog.V(0).Infof("default to create filer store dir in %s", option.DefaultLevelDbDir)
} else {
glog.Warningf("skipping default store dir in %s", option.DefaultLevelDbDir)
}
util.LoadConfiguration("notification", false)
v.SetDefault("filer.options.max_file_name_length", 255)
maxFilenameLength := v.GetUint32("filer.options.max_file_name_length")
glog.V(0).Infof("max_file_name_length %d", maxFilenameLength)
fs.filer = filer.NewFiler(*option.Masters, fs.grpcDialOption, option.Host, option.FilerGroup, option.Collection, option.DefaultReplication, option.DataCenter, maxFilenameLength, func() {
if atomic.LoadInt64(&fs.listenersWaits) > 0 {
fs.listenersCond.Broadcast()
}
})
fs.filer.Cipher = option.Cipher
// we do not support IP whitelist right now https://github.com/seaweedfs/seaweedfs/issues/7094
if v.GetString("guard.white_list") != "" {
glog.Warningf("filer: guard.white_list is configured but the IP whitelist feature is currently disabled. See https://github.com/seaweedfs/seaweedfs/issues/7094")
}
fs.filerGuard = security.NewGuard([]string{}, signingKey, expiresAfterSec, readSigningKey, readExpiresAfterSec)
fs.volumeGuard = security.NewGuard([]string{}, volumeSigningKey, volumeExpiresAfterSec, volumeReadSigningKey, volumeReadExpiresAfterSec)
fs.checkWithMaster()
go stats.LoopPushingMetric("filer", string(fs.option.Host), fs.metricsAddress, fs.metricsIntervalSec)
go fs.filer.MasterClient.KeepConnectedToMaster(context.Background())
fs.option.recursiveDelete = v.GetBool("filer.options.recursive_delete")
v.SetDefault("filer.options.buckets_folder", "/buckets")
fs.filer.DirBucketsPath = v.GetString("filer.options.buckets_folder")
// TODO deprecated, will be removed after 2020-12-31
// replaced by https://github.com/seaweedfs/seaweedfs/wiki/Path-Specific-Configuration
// fs.filer.FsyncBuckets = v.GetStringSlice("filer.options.buckets_fsync")
isFresh := fs.filer.LoadConfiguration(v)
notification.LoadConfiguration(v, "notification.")
handleStaticResources(defaultMux)
if !option.DisableHttp {
defaultMux.HandleFunc("/healthz", requestIDMiddleware(fs.filerHealthzHandler))
defaultMux.HandleFunc("/readyz", requestIDMiddleware(fs.filerHealthzHandler))
// TUS resumable upload protocol handler
if option.TusBasePath != "" {
// Normalize TusPath to always have a leading slash and no trailing slash
if !strings.HasPrefix(option.TusBasePath, "/") {
option.TusBasePath = "/" + option.TusBasePath
}
option.TusBasePath = strings.TrimRight(option.TusBasePath, "/")
// Disallow using "/" as TUS base to avoid hijacking all filer routes
if option.TusBasePath == "" {
glog.Warningf("Invalid TUS base path; TUS disabled (must not be root '/')")
} else {
handlePath := option.TusBasePath + "/"
defaultMux.HandleFunc(handlePath, fs.filerGuard.WhiteList(requestIDMiddleware(fs.tusHandler)))
// Start background cleanup of expired TUS sessions (every hour)
fs.StartTusSessionCleanup(1 * time.Hour)
}
}
defaultMux.HandleFunc("/", fs.filerGuard.WhiteList(requestIDMiddleware(fs.filerHandler)))
}
if defaultMux != readonlyMux {
handleStaticResources(readonlyMux)
readonlyMux.HandleFunc("/healthz", requestIDMiddleware(fs.filerHealthzHandler))
readonlyMux.HandleFunc("/readyz", requestIDMiddleware(fs.filerHealthzHandler))
readonlyMux.HandleFunc("/", fs.filerGuard.WhiteList(requestIDMiddleware(fs.readonlyFilerHandler)))
}
existingNodes := fs.filer.ListExistingPeerUpdates(context.Background())
startFromTime := time.Now().Add(-filer.LogFlushInterval)
if isFresh {
glog.V(0).Infof("%s bootstrap from peers %+v", option.Host, existingNodes)
if err := fs.filer.MaybeBootstrapFromOnePeer(option.Host, existingNodes, startFromTime); err != nil {
glog.Fatalf("%s bootstrap from %+v: %v", option.Host, existingNodes, err)
}
}
v.SetDefault("filer.options.s3.empty_folder_cleanup_delay", "2m")
if d, err := time.ParseDuration(v.GetString("filer.options.s3.empty_folder_cleanup_delay")); err == nil {
fs.filer.EmptyFolderCleanupDelay = d
}
fs.filer.AggregateFromPeers(option.Host, existingNodes, startFromTime)
fs.filer.LoadFilerConf()
fs.filer.LoadRemoteStorageConfAndMapping()
grace.OnReload(fs.Reload)
fs.SetupDlmReplication()
fs.filer.Dlm.LockRing.SetTakeSnapshotCallback(fs.OnDlmChangeSnapshot)
if fs.CredentialManager != nil {
fs.CredentialManager.SetFilerAddressFunc(func() pb.ServerAddress {
return fs.option.Host
}, fs.grpcDialOption)
fs.CredentialManager.SetMasterClient(fs.filer.MasterClient, fs.grpcDialOption)
}
return fs, nil
}
func (fs *FilerServer) checkWithMaster() {
isConnected := false
for !isConnected {
fs.option.Masters.RefreshBySrvIfAvailable()
for _, master := range fs.option.Masters.GetInstances() {
readErr := operation.WithMasterServerClient(context.Background(), false, master, fs.grpcDialOption, func(masterClient master_pb.SeaweedClient) error {
resp, err := masterClient.GetMasterConfiguration(context.Background(), &master_pb.GetMasterConfigurationRequest{})
if err != nil {
return fmt.Errorf("get master %s configuration: %v", master, err)
}
fs.metricsAddress, fs.metricsIntervalSec = resp.MetricsAddress, int(resp.MetricsIntervalSeconds)
return nil
})
if readErr == nil {
isConnected = true
} else {
time.Sleep(7 * time.Second)
}
}
}
}
// Shutdown gracefully shuts down the filer server by waiting for in-flight uploads to complete.
// This prevents data corruption when the process receives SIGTERM during active uploads.
func (fs *FilerServer) Shutdown() {
glog.V(0).Infof("Shutting down filer")
if fs.posixLockSweeperStop != nil {
close(fs.posixLockSweeperStop)
}
fs.filer.Shutdown()
}
func (fs *FilerServer) Reload() {
glog.V(0).Infoln("Reload filer server...")
util.LoadConfiguration("security", false)
v := util.GetViper()
fs.filerGuard.UpdateSigningKeys(
v.GetString("jwt.filer_signing.key"),
v.GetInt("jwt.filer_signing.expires_after_seconds"),
v.GetString("jwt.filer_signing.read.key"),
v.GetInt("jwt.filer_signing.read.expires_after_seconds"),
)
fs.volumeGuard.UpdateSigningKeys(
v.GetString("jwt.signing.key"),
v.GetInt("jwt.signing.expires_after_seconds"),
v.GetString("jwt.signing.read.key"),
v.GetInt("jwt.signing.read.expires_after_seconds"),
)
util_http.ReloadJwtSigningReadConfig()
}