mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2026-06-13 23:36:45 +03:00
ab7be7867d
* security: reload JWT signing keys on SIGHUP Signing keys were read once in the server constructors and never refreshed. After a key rotation (Secret update, divergent reads) the in-memory key stayed stale and every request kept failing "wrong jwt" until the affected process was restarted. Add Guard.UpdateSigningKeys and call it from the master, volume and filer reload paths and the s3 reload hook, next to the existing whitelist refresh. Make the global chunk-read JWT cache reloadable via an atomic swap, and register the master's Reload with grace.OnReload -- it was never wired, so the master ignored SIGHUP entirely. Mirror the same refresh in the Rust volume server's SIGHUP handler. * security: swap signing keys behind an atomic pointer Addresses review feedback on the in-place key swap: SigningKey is a []byte, so reassigning the Guard fields while a request handler reads them is a data race that can tear the multi-word slice header and read out of bounds. Hold the four signing-key fields in an immutable signingConfig snapshot behind atomic.Pointer; UpdateSigningKeys swaps the whole pointer, so a reader sees either the old keys or the new ones. Reads go through new SigningKey/ExpiresAfterSec/ReadSigningKey/ReadExpiresAfterSec accessors. The Rust guard is already safe: every read and the SIGHUP write go through the shared RwLock<Guard>. * security: fold whitelist + auth state into the atomic snapshot Review follow-up. UpdateSigningKeys still wrote isWriteActive while the request path read it (and the whitelist maps) unsynchronized, so a SIGHUP under load could expose an inconsistent mix of activation bits and whitelist contents. Move all hot-reloadable Guard state -- keys, expirations, whitelist, and the activation flags -- into a single immutable guardState swapped behind one atomic.Pointer. The Update* methods take a small mutex to serialize the read-modify-write; readers stay lock-free. The concurrency test now also rotates the whitelist and probes IsWhiteListed under -race. Also read each signing key once per branch in the volume/filer JWT auth checks, so a reload landing mid-check can't take the allow-fast-path after auth was enabled or verify against a different key than the branch saw.
362 lines
14 KiB
Go
362 lines
14 KiB
Go
package weed_server
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/http"
|
|
"os"
|
|
"strings"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/credential"
|
|
"github.com/seaweedfs/seaweedfs/weed/stats"
|
|
"golang.org/x/sync/singleflight"
|
|
|
|
"google.golang.org/grpc"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/util/grace"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/operation"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/util"
|
|
util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/filer"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/filer/arangodb"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/filer/cassandra"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/filer/cassandra2"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/filer/elastic/v7"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/filer/etcd"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/filer/foundationdb"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/filer/hbase"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/filer/leveldb"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/filer/leveldb2"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/filer/leveldb3"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/filer/mongodb"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/filer/mysql"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/filer/mysql2"
|
|
"github.com/seaweedfs/seaweedfs/weed/filer/posixlock"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/filer/postgres"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/filer/postgres2"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/filer/redis"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/filer/redis2"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/filer/redis3"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/filer/sqlite"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/filer/tarantool"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/filer/ydb"
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/notification"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/notification/aws_sqs"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/notification/gocdk_pub_sub"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/notification/google_pub_sub"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/notification/kafka"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/notification/log"
|
|
_ "github.com/seaweedfs/seaweedfs/weed/notification/webhook"
|
|
"github.com/seaweedfs/seaweedfs/weed/security"
|
|
)
|
|
|
|
type FilerOption struct {
|
|
Masters *pb.ServerDiscovery
|
|
FilerGroup string
|
|
Collection string
|
|
DefaultReplication string
|
|
DisableDirListing bool
|
|
MaxMB int
|
|
DirListingLimit int
|
|
DataCenter string
|
|
Rack string
|
|
DataNode string
|
|
DefaultLevelDbDir string
|
|
DisableHttp bool
|
|
Host pb.ServerAddress
|
|
recursiveDelete bool
|
|
Cipher bool
|
|
SaveToFilerLimit int64
|
|
ConcurrentUploadLimit int64
|
|
ConcurrentFileUploadLimit int64
|
|
ShowUIDirectoryDelete bool
|
|
DownloadMaxBytesPs int64
|
|
DiskType string
|
|
AllowedOrigins []string
|
|
ExposeDirectoryData bool
|
|
TusBasePath string
|
|
S3ConfigFile string // optional path to static S3 identity config file
|
|
CredentialManager *credential.CredentialManager
|
|
}
|
|
|
|
type FilerServer struct {
|
|
inFlightDataSize int64
|
|
inFlightUploads int64
|
|
listenersWaits int64
|
|
|
|
// notifying clients
|
|
listenersLock sync.Mutex
|
|
listenersCond *sync.Cond
|
|
|
|
inFlightDataLimitCond *sync.Cond
|
|
|
|
filer_pb.UnimplementedSeaweedFilerServer
|
|
option *FilerOption
|
|
filer *filer.Filer
|
|
filerGuard *security.Guard
|
|
volumeGuard *security.Guard
|
|
grpcDialOption grpc.DialOption
|
|
|
|
// metrics read from the master
|
|
metricsAddress string
|
|
metricsIntervalSec int
|
|
|
|
// track known metadata listeners
|
|
knownListenersLock sync.Mutex
|
|
knownListeners map[int32]int32
|
|
|
|
// deduplicates concurrent remote object caching operations
|
|
remoteCacheGroup singleflight.Group
|
|
|
|
recentCopyRequestsMu sync.Mutex
|
|
recentCopyRequests map[string]recentCopyRequest
|
|
|
|
// credential manager for IAM operations
|
|
CredentialManager *credential.CredentialManager
|
|
|
|
// mountPeerRegistry backs the MountRegister / MountList RPCs for peer
|
|
// chunk sharing (tier 1). Always populated.
|
|
mountPeerRegistry *filer.MountPeerRegistry
|
|
|
|
// entryLockTable serializes mutations to the same entry path on this filer.
|
|
// CreateEntry takes it today; UpdateEntry and DeleteEntry are intended to take
|
|
// it too as their callers route a key's writes to this node, making it the
|
|
// local serialization point for read-modify-write operations that replaces
|
|
// the distributed lock for that key. Idle keys are evicted automatically, so
|
|
// the table stays bounded.
|
|
entryLockTable *util.LockTable[util.FullPath]
|
|
|
|
// posixLocks is the in-memory authority for cross-mount POSIX advisory locks
|
|
// on inodes this filer owns (per the route-by-key ring). Lock state is kept
|
|
// here rather than in replicated metadata: it is transient coordination, so
|
|
// keeping it off the meta-log avoids churn.
|
|
posixLocks *posixlock.Manager
|
|
// posixLockSweeperStop stops the lease-reaping sweeper goroutine on Shutdown.
|
|
posixLockSweeperStop chan struct{}
|
|
// posixLockReadyAt is the unix-nanos when this filer began serving POSIX
|
|
// locks. For posixLockWarmup after it, the owner defers would-be grants while
|
|
// mounts re-assert, so a (re)started owner does not double-grant from empty
|
|
// state. Atomic so the handler reads it without locking; 0 means "not warming
|
|
// up" (e.g. in tests).
|
|
posixLockReadyAt atomic.Int64
|
|
}
|
|
|
|
func NewFilerServer(defaultMux, readonlyMux *http.ServeMux, option *FilerOption) (fs *FilerServer, err error) {
|
|
|
|
v := util.GetViper()
|
|
signingKey := v.GetString("jwt.filer_signing.key")
|
|
v.SetDefault("jwt.filer_signing.expires_after_seconds", 10)
|
|
expiresAfterSec := v.GetInt("jwt.filer_signing.expires_after_seconds")
|
|
|
|
readSigningKey := v.GetString("jwt.filer_signing.read.key")
|
|
v.SetDefault("jwt.filer_signing.read.expires_after_seconds", 60)
|
|
readExpiresAfterSec := v.GetInt("jwt.filer_signing.read.expires_after_seconds")
|
|
|
|
volumeSigningKey := v.GetString("jwt.signing.key")
|
|
v.SetDefault("jwt.signing.expires_after_seconds", 10)
|
|
volumeExpiresAfterSec := v.GetInt("jwt.signing.expires_after_seconds")
|
|
|
|
volumeReadSigningKey := v.GetString("jwt.signing.read.key")
|
|
v.SetDefault("jwt.signing.read.expires_after_seconds", 60)
|
|
volumeReadExpiresAfterSec := v.GetInt("jwt.signing.read.expires_after_seconds")
|
|
|
|
v.SetDefault("cors.allowed_origins.values", "*")
|
|
|
|
allowedOrigins := v.GetString("cors.allowed_origins.values")
|
|
domains := strings.Split(allowedOrigins, ",")
|
|
option.AllowedOrigins = domains
|
|
|
|
v.SetDefault("filer.expose_directory_metadata.enabled", true)
|
|
returnDirMetadata := v.GetBool("filer.expose_directory_metadata.enabled")
|
|
option.ExposeDirectoryData = returnDirMetadata
|
|
|
|
fs = &FilerServer{
|
|
option: option,
|
|
grpcDialOption: security.LoadClientTLS(util.GetViper(), "grpc.filer"),
|
|
knownListeners: make(map[int32]int32),
|
|
inFlightDataLimitCond: sync.NewCond(new(sync.Mutex)),
|
|
recentCopyRequests: make(map[string]recentCopyRequest),
|
|
CredentialManager: option.CredentialManager,
|
|
entryLockTable: util.NewLockTable[util.FullPath](),
|
|
posixLocks: posixlock.NewManager(),
|
|
}
|
|
fs.startPosixLockSweeper()
|
|
fs.mountPeerRegistry = filer.NewMountPeerRegistry()
|
|
go fs.runMountPeerRegistrySweeper()
|
|
fs.listenersCond = sync.NewCond(&fs.listenersLock)
|
|
|
|
option.Masters.RefreshBySrvIfAvailable()
|
|
if len(option.Masters.GetInstances()) == 0 {
|
|
glog.Fatal("master list is required!")
|
|
}
|
|
|
|
if !util.LoadConfiguration("filer", false) {
|
|
v.SetDefault("leveldb2.enabled", true)
|
|
v.SetDefault("leveldb2.dir", option.DefaultLevelDbDir)
|
|
_, err := os.Stat(option.DefaultLevelDbDir)
|
|
if os.IsNotExist(err) {
|
|
os.MkdirAll(option.DefaultLevelDbDir, 0755)
|
|
}
|
|
glog.V(0).Infof("default to create filer store dir in %s", option.DefaultLevelDbDir)
|
|
} else {
|
|
glog.Warningf("skipping default store dir in %s", option.DefaultLevelDbDir)
|
|
}
|
|
util.LoadConfiguration("notification", false)
|
|
|
|
v.SetDefault("filer.options.max_file_name_length", 255)
|
|
maxFilenameLength := v.GetUint32("filer.options.max_file_name_length")
|
|
glog.V(0).Infof("max_file_name_length %d", maxFilenameLength)
|
|
fs.filer = filer.NewFiler(*option.Masters, fs.grpcDialOption, option.Host, option.FilerGroup, option.Collection, option.DefaultReplication, option.DataCenter, maxFilenameLength, func() {
|
|
if atomic.LoadInt64(&fs.listenersWaits) > 0 {
|
|
fs.listenersCond.Broadcast()
|
|
}
|
|
})
|
|
fs.filer.Cipher = option.Cipher
|
|
// we do not support IP whitelist right now https://github.com/seaweedfs/seaweedfs/issues/7094
|
|
if v.GetString("guard.white_list") != "" {
|
|
glog.Warningf("filer: guard.white_list is configured but the IP whitelist feature is currently disabled. See https://github.com/seaweedfs/seaweedfs/issues/7094")
|
|
}
|
|
fs.filerGuard = security.NewGuard([]string{}, signingKey, expiresAfterSec, readSigningKey, readExpiresAfterSec)
|
|
fs.volumeGuard = security.NewGuard([]string{}, volumeSigningKey, volumeExpiresAfterSec, volumeReadSigningKey, volumeReadExpiresAfterSec)
|
|
|
|
fs.checkWithMaster()
|
|
|
|
go stats.LoopPushingMetric("filer", string(fs.option.Host), fs.metricsAddress, fs.metricsIntervalSec)
|
|
go fs.filer.MasterClient.KeepConnectedToMaster(context.Background())
|
|
|
|
fs.option.recursiveDelete = v.GetBool("filer.options.recursive_delete")
|
|
v.SetDefault("filer.options.buckets_folder", "/buckets")
|
|
fs.filer.DirBucketsPath = v.GetString("filer.options.buckets_folder")
|
|
// TODO deprecated, will be removed after 2020-12-31
|
|
// replaced by https://github.com/seaweedfs/seaweedfs/wiki/Path-Specific-Configuration
|
|
// fs.filer.FsyncBuckets = v.GetStringSlice("filer.options.buckets_fsync")
|
|
isFresh := fs.filer.LoadConfiguration(v)
|
|
|
|
notification.LoadConfiguration(v, "notification.")
|
|
|
|
handleStaticResources(defaultMux)
|
|
if !option.DisableHttp {
|
|
defaultMux.HandleFunc("/healthz", requestIDMiddleware(fs.filerHealthzHandler))
|
|
defaultMux.HandleFunc("/readyz", requestIDMiddleware(fs.filerHealthzHandler))
|
|
// TUS resumable upload protocol handler
|
|
if option.TusBasePath != "" {
|
|
// Normalize TusPath to always have a leading slash and no trailing slash
|
|
if !strings.HasPrefix(option.TusBasePath, "/") {
|
|
option.TusBasePath = "/" + option.TusBasePath
|
|
}
|
|
option.TusBasePath = strings.TrimRight(option.TusBasePath, "/")
|
|
|
|
// Disallow using "/" as TUS base to avoid hijacking all filer routes
|
|
if option.TusBasePath == "" {
|
|
glog.Warningf("Invalid TUS base path; TUS disabled (must not be root '/')")
|
|
} else {
|
|
handlePath := option.TusBasePath + "/"
|
|
defaultMux.HandleFunc(handlePath, fs.filerGuard.WhiteList(requestIDMiddleware(fs.tusHandler)))
|
|
// Start background cleanup of expired TUS sessions (every hour)
|
|
fs.StartTusSessionCleanup(1 * time.Hour)
|
|
}
|
|
}
|
|
defaultMux.HandleFunc("/", fs.filerGuard.WhiteList(requestIDMiddleware(fs.filerHandler)))
|
|
}
|
|
if defaultMux != readonlyMux {
|
|
handleStaticResources(readonlyMux)
|
|
readonlyMux.HandleFunc("/healthz", requestIDMiddleware(fs.filerHealthzHandler))
|
|
readonlyMux.HandleFunc("/readyz", requestIDMiddleware(fs.filerHealthzHandler))
|
|
readonlyMux.HandleFunc("/", fs.filerGuard.WhiteList(requestIDMiddleware(fs.readonlyFilerHandler)))
|
|
}
|
|
|
|
existingNodes := fs.filer.ListExistingPeerUpdates(context.Background())
|
|
startFromTime := time.Now().Add(-filer.LogFlushInterval)
|
|
if isFresh {
|
|
glog.V(0).Infof("%s bootstrap from peers %+v", option.Host, existingNodes)
|
|
if err := fs.filer.MaybeBootstrapFromOnePeer(option.Host, existingNodes, startFromTime); err != nil {
|
|
glog.Fatalf("%s bootstrap from %+v: %v", option.Host, existingNodes, err)
|
|
}
|
|
}
|
|
v.SetDefault("filer.options.s3.empty_folder_cleanup_delay", "2m")
|
|
if d, err := time.ParseDuration(v.GetString("filer.options.s3.empty_folder_cleanup_delay")); err == nil {
|
|
fs.filer.EmptyFolderCleanupDelay = d
|
|
}
|
|
fs.filer.AggregateFromPeers(option.Host, existingNodes, startFromTime)
|
|
|
|
fs.filer.LoadFilerConf()
|
|
|
|
fs.filer.LoadRemoteStorageConfAndMapping()
|
|
|
|
grace.OnReload(fs.Reload)
|
|
|
|
fs.SetupDlmReplication()
|
|
fs.filer.Dlm.LockRing.SetTakeSnapshotCallback(fs.OnDlmChangeSnapshot)
|
|
|
|
if fs.CredentialManager != nil {
|
|
fs.CredentialManager.SetFilerAddressFunc(func() pb.ServerAddress {
|
|
return fs.option.Host
|
|
}, fs.grpcDialOption)
|
|
fs.CredentialManager.SetMasterClient(fs.filer.MasterClient, fs.grpcDialOption)
|
|
}
|
|
|
|
return fs, nil
|
|
}
|
|
|
|
func (fs *FilerServer) checkWithMaster() {
|
|
|
|
isConnected := false
|
|
for !isConnected {
|
|
fs.option.Masters.RefreshBySrvIfAvailable()
|
|
for _, master := range fs.option.Masters.GetInstances() {
|
|
readErr := operation.WithMasterServerClient(context.Background(), false, master, fs.grpcDialOption, func(masterClient master_pb.SeaweedClient) error {
|
|
resp, err := masterClient.GetMasterConfiguration(context.Background(), &master_pb.GetMasterConfigurationRequest{})
|
|
if err != nil {
|
|
return fmt.Errorf("get master %s configuration: %v", master, err)
|
|
}
|
|
fs.metricsAddress, fs.metricsIntervalSec = resp.MetricsAddress, int(resp.MetricsIntervalSeconds)
|
|
return nil
|
|
})
|
|
if readErr == nil {
|
|
isConnected = true
|
|
} else {
|
|
time.Sleep(7 * time.Second)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Shutdown gracefully shuts down the filer server by waiting for in-flight uploads to complete.
|
|
// This prevents data corruption when the process receives SIGTERM during active uploads.
|
|
func (fs *FilerServer) Shutdown() {
|
|
glog.V(0).Infof("Shutting down filer")
|
|
if fs.posixLockSweeperStop != nil {
|
|
close(fs.posixLockSweeperStop)
|
|
}
|
|
fs.filer.Shutdown()
|
|
}
|
|
|
|
func (fs *FilerServer) Reload() {
|
|
glog.V(0).Infoln("Reload filer server...")
|
|
|
|
util.LoadConfiguration("security", false)
|
|
v := util.GetViper()
|
|
fs.filerGuard.UpdateSigningKeys(
|
|
v.GetString("jwt.filer_signing.key"),
|
|
v.GetInt("jwt.filer_signing.expires_after_seconds"),
|
|
v.GetString("jwt.filer_signing.read.key"),
|
|
v.GetInt("jwt.filer_signing.read.expires_after_seconds"),
|
|
)
|
|
fs.volumeGuard.UpdateSigningKeys(
|
|
v.GetString("jwt.signing.key"),
|
|
v.GetInt("jwt.signing.expires_after_seconds"),
|
|
v.GetString("jwt.signing.read.key"),
|
|
v.GetInt("jwt.signing.read.expires_after_seconds"),
|
|
)
|
|
util_http.ReloadJwtSigningReadConfig()
|
|
}
|