refactor(filer): remove the inode->path index and the NFS gateway (#9724)

* fix(filer): derive inodes by hash instead of a snowflake sequencer

Compute the same inode the FUSE mount would: non-hard-linked entries hash path + crtime, hard links hash their shared HardLinkId so every link resolves to one inode. Removes the snowflake inodeSequencer and the SEAWEEDFS_FILER_SNOWFLAKE_ID knob; inodes are now deterministic across filers.

* chore: remove the experimental NFS gateway

The NFS frontend ('weed nfs') was the only consumer of the inode->path index. Remove the weed/server/nfs package, the command and its registration, the integration test harness, and the CI workflow; go mod tidy drops the willscott/go-nfs and go-nfs-client dependencies.

* refactor(filer): drop the inode->path index

With the NFS gateway gone, nothing reads it. A regular file's inode is a pure hash of its path and a hard link's is a hash of its shared HardLinkId -- both derivable on demand -- so the secondary KV index and its write/remove hooks are dead. Removes filer_inode_index.go and the recordInodeIndex hooks from the store wrapper.
This commit is contained in:
Chris Lu
2026-05-28 15:00:18 -07:00
committed by GitHub
parent 3537312045
commit dfd05d14cb
37 changed files with 64 additions and 9247 deletions
-137
View File
@@ -1,137 +0,0 @@
name: "NFS Integration Tests"
on:
push:
branches: [ master, main ]
paths:
- 'weed/server/nfs/**'
- 'weed/command/nfs.go'
- 'weed/filer/filer_inode.go'
- 'weed/filer/filer_inode_index.go'
- 'weed/filer/filerstore_wrapper.go'
- 'weed/server/filer_grpc_server_rename.go'
- 'test/nfs/**'
- '.github/workflows/nfs-tests.yml'
pull_request:
branches: [ master, main ]
paths:
- 'weed/server/nfs/**'
- 'weed/command/nfs.go'
- 'weed/filer/filer_inode.go'
- 'weed/filer/filer_inode_index.go'
- 'weed/filer/filerstore_wrapper.go'
- 'weed/server/filer_grpc_server_rename.go'
- 'test/nfs/**'
- '.github/workflows/nfs-tests.yml'
concurrency:
group: ${{ github.head_ref }}/nfs-tests
cancel-in-progress: true
permissions:
contents: read
env:
TEST_TIMEOUT: '15m'
jobs:
nfs-integration:
name: NFS Integration Testing
runs-on: ubuntu-22.04
timeout-minutes: 20
steps:
- name: Checkout code
uses: actions/checkout@v6
- name: Set up Go
uses: actions/setup-go@v6
with:
go-version-file: 'go.mod'
- name: Build SeaweedFS
run: |
cd weed
go build -o weed .
chmod +x weed
./weed version
- name: Run NFS Integration Tests
run: |
cd test/nfs
echo "Running NFS integration tests..."
echo "============================================"
# Install test dependencies
go mod download
# Run the protocol-layer tests. The kernel-mount tests require root
# for mount(2) and are exercised in their own privileged step below;
# skip them here so a "skipped because not root" line doesn't show
# up as noise on every CI run.
go test -v -timeout=${{ env.TEST_TIMEOUT }} -skip '^TestKernelMount' ./...
echo "============================================"
echo "NFS integration tests completed"
- name: Install kernel NFS client
run: |
# nfs-common provides mount.nfs; netbase provides /etc/protocols
# which mount.nfs's protocol-name lookups (`tcp`, `udp`) need.
sudo apt-get update
sudo apt-get install -y nfs-common netbase
- name: Run kernel-mount E2E tests
run: |
cd test/nfs
echo "Running kernel-mount end-to-end tests..."
echo "These mount the running 'weed nfs' subprocess via the actual"
echo "Linux NFS client to catch protocol regressions invisible to"
echo "the go-nfs-client-based tests above."
echo "============================================"
# mount(2) is privileged. Preserve PATH so 'go' (and the weed
# binary that test/nfs/framework.go locates via $PATH) resolve
# correctly under sudo, and pass through the Go module/cache dirs
# so we don't redownload modules under root.
sudo env "PATH=$PATH" \
GOMODCACHE="$(go env GOMODCACHE)" \
GOCACHE="$(go env GOCACHE)" \
go test -v -timeout=${{ env.TEST_TIMEOUT }} -run '^TestKernelMount' ./...
echo "============================================"
echo "Kernel-mount E2E tests completed"
- name: Test Summary
if: always()
run: |
echo "## NFS Integration Test Summary" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Test Coverage" >> $GITHUB_STEP_SUMMARY
echo "- **Read/Write Round Trip**: Basic file create + read" >> $GITHUB_STEP_SUMMARY
echo "- **Directory Operations**: Mkdir, ReadDirPlus, RmDir" >> $GITHUB_STEP_SUMMARY
echo "- **Nested Directories**: Deep tree creation and leaf I/O" >> $GITHUB_STEP_SUMMARY
echo "- **Rename**: Content preserved across rename" >> $GITHUB_STEP_SUMMARY
echo "- **Overwrite + Truncate**: Setattr(size=0) + shorter write" >> $GITHUB_STEP_SUMMARY
echo "- **Large Files**: 3 MiB binary round trip" >> $GITHUB_STEP_SUMMARY
echo "- **Edge Payloads**: All 256 byte values + empty files" >> $GITHUB_STEP_SUMMARY
echo "- **Symlinks**: Symlink + Lookup" >> $GITHUB_STEP_SUMMARY
echo "- **Missing Path**: Remove on missing entry errors cleanly" >> $GITHUB_STEP_SUMMARY
echo "- **FSINFO**: Non-zero rtpref/wtpref advertised" >> $GITHUB_STEP_SUMMARY
echo "- **Sequential Append**: Two-part concatenation" >> $GITHUB_STEP_SUMMARY
echo "- **ReadDir After Remove**: Meta cache does not serve stale entries" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Kernel-Mount E2E Coverage" >> $GITHUB_STEP_SUMMARY
echo "- **V3 over TCP**: baseline NFSv3 mount + readdir" >> $GITHUB_STEP_SUMMARY
echo "- **V3 with mountproto=udp**: regression test for UDP MOUNT v3 responder" >> $GITHUB_STEP_SUMMARY
echo "- **V4 rejects cleanly**: regression test for the v4 PROG_MISMATCH path (#9262)" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Harness" >> $GITHUB_STEP_SUMMARY
echo "Most tests boot their own master + volume + filer + nfs subprocess" >> $GITHUB_STEP_SUMMARY
echo "stack on loopback and drive it via the NFSv3 RPC protocol using" >> $GITHUB_STEP_SUMMARY
echo "go-nfs-client. The kernel-mount E2E tests reuse the same harness" >> $GITHUB_STEP_SUMMARY
echo "but mount the export through the in-tree Linux NFS client to" >> $GITHUB_STEP_SUMMARY
echo "catch protocol regressions a Go-only client can't see; they run" >> $GITHUB_STEP_SUMMARY
echo "in a separate privileged step (mount(2) requires root)." >> $GITHUB_STEP_SUMMARY
+1 -4
View File
@@ -129,7 +129,6 @@ require (
github.com/cognusion/imaging v1.0.3
github.com/fluent/fluent-logger-golang v1.10.1
github.com/getsentry/sentry-go v0.44.1
github.com/go-git/go-billy/v5 v5.9.0
github.com/go-ldap/ldap/v3 v3.4.13
github.com/golang-jwt/jwt/v5 v5.3.1
github.com/google/flatbuffers/go v0.0.0-20230108230133-3b8644d32c50
@@ -152,8 +151,6 @@ require (
github.com/tarantool/go-tarantool/v2 v2.4.2
github.com/testcontainers/testcontainers-go v0.40.0
github.com/tikv/client-go/v2 v2.0.7
github.com/willscott/go-nfs v0.0.4
github.com/willscott/go-nfs-client v0.0.0-20251022144359-801f10d98886
github.com/xeipuuv/gojsonschema v1.2.0
github.com/ydb-platform/ydb-go-sdk-auth-environ v0.5.1
github.com/ydb-platform/ydb-go-sdk/v3 v3.134.2
@@ -211,6 +208,7 @@ require (
github.com/dromara/dongle v1.0.1 // indirect
github.com/gin-gonic/gin v1.11.0 // indirect
github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667 // indirect
github.com/go-git/go-billy/v5 v5.9.0 // indirect
github.com/goccy/go-yaml v1.18.0 // indirect
github.com/golang/geo v0.0.0-20210211234256-740aa86cb551 // indirect
github.com/google/go-cmp v0.7.0 // indirect
@@ -257,7 +255,6 @@ require (
github.com/pquerna/otp v1.5.0 // indirect
github.com/pterm/pterm v0.12.82 // indirect
github.com/quic-go/qpack v0.6.0 // indirect
github.com/rasky/go-xdr v0.0.0-20170124162913-1a41d1a06c93 // indirect
github.com/rclone/Proton-API-Bridge v1.0.3 // indirect
github.com/rclone/go-proton-api v1.0.2 // indirect
github.com/rogpeppe/go-internal v1.14.1 // indirect
-6
View File
@@ -1783,8 +1783,6 @@ github.com/quic-go/quic-go v0.59.0 h1:OLJkp1Mlm/aS7dpKgTc6cnpynnD2Xg7C1pwL6vy/SA
github.com/quic-go/quic-go v0.59.0/go.mod h1:upnsH4Ju1YkqpLXC305eW3yDZ4NfnNbmQRCMWS58IKU=
github.com/rabbitmq/amqp091-go v1.11.0 h1:HxIctVm9Gid/Vtn706necmZ7Wj6pgGI2eqplRbEY8O8=
github.com/rabbitmq/amqp091-go v1.11.0/go.mod h1:Hy4jKW5kQART1u+JkDTF9YYOQUHXqMuhrgxOEeS7G4o=
github.com/rasky/go-xdr v0.0.0-20170124162913-1a41d1a06c93 h1:UVArwN/wkKjMVhh2EQGC0tEc1+FqiLlvYXY5mQ2f8Wg=
github.com/rasky/go-xdr v0.0.0-20170124162913-1a41d1a06c93/go.mod h1:Nfe4efndBz4TibWycNE+lqyJZiMX4ycx+QKV8Ta0f/o=
github.com/rclone/Proton-API-Bridge v1.0.3 h1:Bs7RC4xCFSN0BPIYVda/BNxp0qo3NV0gB2VZqx2KIew=
github.com/rclone/Proton-API-Bridge v1.0.3/go.mod h1:26RAest751Ofk+F/d8xtl4UyWXrZvMQwn39U8rm/WKM=
github.com/rclone/go-proton-api v1.0.2 h1:cJtJUab0MGJ3C6q5kiEJs3pbyhSLnOKMyYOQehA0PBc=
@@ -2028,10 +2026,6 @@ github.com/vmihailenco/msgpack/v5 v5.4.1 h1:cQriyiUvjTwOHg8QZaPihLWeRAAVoCpE00IU
github.com/vmihailenco/msgpack/v5 v5.4.1/go.mod h1:GaZTsDaehaPpQVyxrf5mtQlH+pc21PIudVV/E3rRQok=
github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g=
github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds=
github.com/willscott/go-nfs v0.0.4 h1:1vpOPAdECmoT2KmZ8u+ukO/jfvDjMEUNYhA2F1jGJtI=
github.com/willscott/go-nfs v0.0.4/go.mod h1:VhNccO67Oug787VNXcyx9JDI3ZoSpqoKMT/lWMhUIDg=
github.com/willscott/go-nfs-client v0.0.0-20251022144359-801f10d98886 h1:DtrBtkgTJk2XGt4T7eKdKVkd9A5NCevN2e4inLXtsqA=
github.com/willscott/go-nfs-client v0.0.0-20251022144359-801f10d98886/go.mod h1:Tq++Lr/FgiS3X48q5FETemXiSLGuYMQT2sPjYNPJSwA=
github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc=
github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw=
github.com/wsxiaoys/terminal v0.0.0-20160513160801-0940f3fc43a0 h1:3UeQBvD0TFrlVjOeLOBz+CPAI8dnbqNSVwUwRrkp7vQ=
-36
View File
@@ -1,36 +0,0 @@
.PHONY: all build test test-verbose test-short test-debug clean deps tidy
all: build test
# Build the weed binary first
build:
cd ../../weed && go build -o weed .
# Install test dependencies
deps:
go mod download
# Run all tests
test: build deps
go test -timeout 5m ./...
# Run tests with verbose output
test-verbose: build deps
go test -v -timeout 5m ./...
# Skip long-running integration tests
test-short: deps
go test -short -v ./...
# Run tests with debug output from SeaweedFS
test-debug: build deps
go test -v -timeout 5m ./... 2>&1 | tee test.log
# Clean up test artifacts
clean:
rm -f test.log
go clean -testcache
# Update go.sum
tidy:
go mod tidy
-92
View File
@@ -1,92 +0,0 @@
# SeaweedFS NFS Integration Tests
End-to-end tests that boot a real SeaweedFS cluster (`master` + `volume` +
`filer`) plus the experimental `weed nfs` frontend and drive it through the
NFSv3 wire protocol. The tests talk to the server over TCP using
`github.com/willscott/go-nfs-client`, which means they do **not** need a
kernel NFS mount, privileged ports, or any platform-specific tooling.
## Prerequisites
1. Build the `weed` binary:
```bash
cd ../../weed
go build -o weed .
```
2. Go 1.24 or later.
## Running the tests
```bash
# Build weed and run everything
make test
# Verbose output, keeps the subprocess stdout
make test-verbose
# Skip integration tests — useful when iterating on the framework itself
make test-short
# Run a single test
go test -v -run TestNfsBasicReadWrite ./...
```
Every test starts its own cluster on random loopback ports, so runs are
isolated and can execute in parallel.
## Layout
- `framework.go` — launches `weed master`, `weed volume`, `weed filer`, and
`weed nfs` as subprocesses, waits for each to accept TCP, and exposes a
`Mount()` helper that returns an `nfsclient.Target`.
- `basic_test.go` — covers the most common NFS operations:
- Read/write round-trip (`TestNfsBasicReadWrite`)
- Mkdir / ReadDirPlus / RmDir (`TestNfsMkdirAndRmdir`)
- Nested directory + leaf file (`TestNfsNestedDirectories`)
- Rename preserves content (`TestNfsRenamePreservesContent`)
- Overwrite shrinks file size (`TestNfsOverwriteShrinksFile`)
- Large binary file round-trip (`TestNfsLargeFile`)
- Arbitrary binary and empty files (`TestNfsBinaryAndEmptyFiles`)
- Symlink + Readlink (`TestNfsSymlinkRoundTrip`)
- ReadDirPlus ordering sanity (`TestNfsReadDirPlusOrdering`)
- Remove on missing path errors cleanly (`TestNfsRemoveMissingFailsCleanly`)
- FSINFO advertises non-zero limits (`TestNfsFSInfoReturnsSaneLimits`)
- Sequential append writes concatenate (`TestNfsAppendIsSequential`)
- ReadDir after remove (`TestNfsReadDirAfterRemove`)
## Debugging a failing test
Keep the cluster temp dir for inspection:
```go
config := DefaultTestConfig()
config.SkipCleanup = true
```
Enable subprocess stdout/stderr:
```go
config := DefaultTestConfig()
config.EnableDebug = true
```
Or run with `-v`, which flips `EnableDebug` automatically via `testing.Verbose()`.
## Notes
- The NFS server binds to `127.0.0.1` with `-ip.bind=127.0.0.1` and exports
`/nfs_export`. The test framework pre-creates that directory via the
filer's HTTP API before starting the NFS server — the NFS server requires
its export root to exist in the filer's namespace with a real entry, and
the filer's synthetic `/` root does not match the `Name=="/"` check the
NFS server performs during `ensureIndexedEntry`.
- Ports are allocated dynamically. Each test run opens a short-lived
listener on `127.0.0.1:0`, reads back the assigned port, closes the
listener, and hands the port to `weed master/volume/filer/nfs`. There is
a tiny race window between close and reopen that has not been a problem
in practice but is worth remembering if you see a "bind: address already
in use" failure.
- All four `weed` components are started with explicit `-port.grpc=...`
flags. Without them, the default is `-port + 10000`, which overflows
`65535` whenever the HTTP port lands above `55535` — the kernel's
ephemeral port range on macOS routinely does.
-400
View File
@@ -1,400 +0,0 @@
package nfs
import (
"bytes"
"fmt"
"io"
"os"
"path"
"strings"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
nfsclient "github.com/willscott/go-nfs-client/nfs"
)
// setupFramework is a small helper that boots the cluster for a single test
// and tears everything down on completion. Every test gets a fresh filer +
// volume pair so they cannot step on each other's namespace.
func setupFramework(t *testing.T) *NfsTestFramework {
t.Helper()
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
config := DefaultTestConfig()
config.EnableDebug = testing.Verbose()
fw := NewNfsTestFramework(t, config)
require.NoError(t, fw.Setup(config), "framework setup")
t.Cleanup(fw.Cleanup)
return fw
}
// writeAll writes payload to path on the target in a single Write call. The
// NFS WRITE3 RPC chunks internally, so this exists purely so tests read
// linearly.
func writeAll(t *testing.T, target *nfsclient.Target, remotePath string, payload []byte) {
t.Helper()
file, err := target.OpenFile(remotePath, 0o644)
require.NoError(t, err, "open %s for write", remotePath)
if len(payload) > 0 {
n, err := file.Write(payload)
require.NoError(t, err, "write %s", remotePath)
require.Equal(t, len(payload), n, "short write on %s", remotePath)
}
require.NoError(t, file.Close(), "close %s", remotePath)
}
// readAll opens path on the target and returns the full file contents.
func readAll(t *testing.T, target *nfsclient.Target, remotePath string) []byte {
t.Helper()
file, err := target.Open(remotePath)
require.NoError(t, err, "open %s for read", remotePath)
defer file.Close()
content, err := io.ReadAll(file)
require.NoError(t, err, "read %s", remotePath)
return content
}
// TestNfsBasicReadWrite exercises the most common NFS path: OpenFile + Write
// + Close followed by Open + Read to verify round-trip data integrity.
func TestNfsBasicReadWrite(t *testing.T) {
fw := setupFramework(t)
target, cleanup, err := fw.Mount()
require.NoError(t, err)
defer cleanup()
payload := []byte("hello from seaweedfs nfs integration test")
writeAll(t, target, "/hello.txt", payload)
got := readAll(t, target, "/hello.txt")
assert.Equal(t, payload, got, "round-tripped content must match")
info, err := target.Getattr("/hello.txt")
require.NoError(t, err)
assert.Equal(t, int64(len(payload)), int64(info.Filesize))
}
// TestNfsMkdirAndRmdir covers Mkdir, ReadDirPlus, and RmDir. The readdir
// assertion also verifies that the newly-created directory shows up under
// the export root the way a POSIX client would expect.
func TestNfsMkdirAndRmdir(t *testing.T) {
fw := setupFramework(t)
target, cleanup, err := fw.Mount()
require.NoError(t, err)
defer cleanup()
_, err = target.Mkdir("/dir1", 0o755)
require.NoError(t, err)
entries, err := target.ReadDirPlus("/")
require.NoError(t, err)
found := false
for _, entry := range entries {
if entry.Name() == "dir1" {
found = true
assert.True(t, entry.IsDir(), "dir1 should be a directory")
}
}
assert.True(t, found, "expected dir1 in readdir listing")
require.NoError(t, target.RmDir("/dir1"))
// After removal, dir1 must be gone from the listing.
entries, err = target.ReadDirPlus("/")
require.NoError(t, err)
for _, entry := range entries {
assert.NotEqual(t, "dir1", entry.Name(), "dir1 should be removed")
}
}
// TestNfsNestedDirectories ensures the server can materialise a deep tree in
// a single Mkdir-per-segment sequence and that reads/writes work at the
// leaves.
func TestNfsNestedDirectories(t *testing.T) {
fw := setupFramework(t)
target, cleanup, err := fw.Mount()
require.NoError(t, err)
defer cleanup()
for _, segment := range []string{"/a", "/a/b", "/a/b/c"} {
_, err := target.Mkdir(segment, 0o755)
require.NoError(t, err, "mkdir %s", segment)
}
payload := []byte("deep path content")
writeAll(t, target, "/a/b/c/leaf.txt", payload)
got := readAll(t, target, "/a/b/c/leaf.txt")
assert.Equal(t, payload, got)
require.NoError(t, target.Remove("/a/b/c/leaf.txt"))
require.NoError(t, target.RmDir("/a/b/c"))
require.NoError(t, target.RmDir("/a/b"))
require.NoError(t, target.RmDir("/a"))
}
// TestNfsRenamePreservesContent renames a file and makes sure the content
// at the new path matches what was written at the old one, and that the
// old path disappears. It does not assert on inode identity because pjdfstest
// already covers that and this test intentionally avoids depending on the
// mount-side identity plumbing.
func TestNfsRenamePreservesContent(t *testing.T) {
fw := setupFramework(t)
target, cleanup, err := fw.Mount()
require.NoError(t, err)
defer cleanup()
payload := []byte("rename me")
writeAll(t, target, "/src.txt", payload)
require.NoError(t, target.Rename("/src.txt", "/dst.txt"))
_, _, err = target.Lookup("/src.txt")
assert.Error(t, err, "source should be gone after rename")
got := readAll(t, target, "/dst.txt")
assert.Equal(t, payload, got)
require.NoError(t, target.Remove("/dst.txt"))
}
// TestNfsOverwriteShrinksFile rewrites an existing file with shorter content
// and asserts Getattr reports the new (smaller) size. go-nfs-client's
// OpenFile does not pass O_TRUNC, so the test truncates explicitly via
// Setattr(size=0) before the second write — mirroring what `echo >file`
// does on a POSIX client.
func TestNfsOverwriteShrinksFile(t *testing.T) {
fw := setupFramework(t)
target, cleanup, err := fw.Mount()
require.NoError(t, err)
defer cleanup()
writeAll(t, target, "/overwrite.txt", []byte("the quick brown fox"))
require.NoError(t, target.Setattr("/overwrite.txt", nfsclient.Sattr3{
Size: nfsclient.SetSize{SetIt: true, Size: 0},
}))
writeAll(t, target, "/overwrite.txt", []byte("short"))
info, err := target.Getattr("/overwrite.txt")
require.NoError(t, err)
assert.Equal(t, int64(len("short")), int64(info.Filesize))
got := readAll(t, target, "/overwrite.txt")
assert.Equal(t, []byte("short"), got)
require.NoError(t, target.Remove("/overwrite.txt"))
}
// TestNfsLargeFile writes a multi-megabyte payload so the write path has to
// cut chunks and flush through the volume server rather than inlining
// content in the filer entry.
func TestNfsLargeFile(t *testing.T) {
fw := setupFramework(t)
target, cleanup, err := fw.Mount()
require.NoError(t, err)
defer cleanup()
const size = 3 * 1024 * 1024 // 3 MiB — exceeds the 4 MiB inline cutoff boundary when combined with metadata
payload := make([]byte, size)
for i := range payload {
payload[i] = byte(i % 251) // non-repeating to catch offset bugs
}
writeAll(t, target, "/big.bin", payload)
info, err := target.Getattr("/big.bin")
require.NoError(t, err)
assert.Equal(t, int64(size), int64(info.Filesize))
got := readAll(t, target, "/big.bin")
require.Equal(t, size, len(got))
assert.True(t, bytes.Equal(payload, got), "large file content must round-trip byte-for-byte")
require.NoError(t, target.Remove("/big.bin"))
}
// TestNfsBinaryAndEmptyFiles covers two edge-case payloads the write path
// tends to regress on: arbitrary binary bytes and zero-length files.
func TestNfsBinaryAndEmptyFiles(t *testing.T) {
fw := setupFramework(t)
target, cleanup, err := fw.Mount()
require.NoError(t, err)
defer cleanup()
t.Run("AllByteValues", func(t *testing.T) {
payload := make([]byte, 256)
for i := range payload {
payload[i] = byte(i)
}
writeAll(t, target, "/binary.bin", payload)
assert.Equal(t, payload, readAll(t, target, "/binary.bin"))
require.NoError(t, target.Remove("/binary.bin"))
})
t.Run("EmptyFile", func(t *testing.T) {
writeAll(t, target, "/empty.txt", nil)
info, err := target.Getattr("/empty.txt")
require.NoError(t, err)
assert.Equal(t, int64(0), int64(info.Filesize))
require.NoError(t, target.Remove("/empty.txt"))
})
}
// TestNfsSymlinkRoundTrip covers Symlink and Readlink through the nfs server.
// Readlink returns the target path; the server does not auto-traverse it.
func TestNfsSymlinkRoundTrip(t *testing.T) {
fw := setupFramework(t)
target, cleanup, err := fw.Mount()
require.NoError(t, err)
defer cleanup()
// Symlink uses a different RPC than open+create, and our server routes it
// through the billy Change interface.
require.NoError(t, target.Symlink("/target.txt", "/link.txt"))
// The underlying target does not need to exist for readlink to succeed.
file, _, err := target.Lookup("/link.txt")
require.NoError(t, err, "lookup symlink")
assert.True(t, file.Mode()&os.ModeSymlink != 0, "expected symlink mode, got %s", file.Mode())
require.NoError(t, target.Remove("/link.txt"))
}
// TestNfsReadDirPlusOrdering creates a handful of files with distinct names
// and ensures ReadDirPlus surfaces every one of them. The server pages
// listings from the filer, so we want to make sure nothing is truncated.
func TestNfsReadDirPlusOrdering(t *testing.T) {
fw := setupFramework(t)
target, cleanup, err := fw.Mount()
require.NoError(t, err)
defer cleanup()
_, err = target.Mkdir("/listing", 0o755)
require.NoError(t, err)
names := []string{"alpha.txt", "beta.txt", "gamma.txt", "delta.txt", "epsilon.txt"}
for _, name := range names {
writeAll(t, target, path.Join("/listing", name), []byte(name))
}
entries, err := target.ReadDirPlus("/listing")
require.NoError(t, err)
seen := make(map[string]struct{}, len(entries))
for _, entry := range entries {
if entry.Name() == "." || entry.Name() == ".." {
continue
}
seen[entry.Name()] = struct{}{}
}
for _, name := range names {
_, ok := seen[name]
assert.True(t, ok, "expected %s in directory listing", name)
}
for _, name := range names {
require.NoError(t, target.Remove(path.Join("/listing", name)))
}
require.NoError(t, target.RmDir("/listing"))
}
// TestNfsRemoveMissingFailsCleanly asserts that removing a non-existent path
// surfaces an error instead of silently succeeding. A bug where the server
// returned NFS3_OK on missing entries would hide metadata drift.
func TestNfsRemoveMissingFailsCleanly(t *testing.T) {
fw := setupFramework(t)
target, cleanup, err := fw.Mount()
require.NoError(t, err)
defer cleanup()
err = target.Remove("/does_not_exist.txt")
require.Error(t, err, "removing a missing file must error")
// NFS3 surfaces this as NFS3ERR_NOENT; make sure the error text is
// recognisable without locking us into the library's exact wording.
assert.True(t,
strings.Contains(strings.ToLower(err.Error()), "noent") ||
strings.Contains(strings.ToLower(err.Error()), "not exist") ||
strings.Contains(strings.ToLower(err.Error()), "no such"),
"unexpected error shape: %v", err)
}
// TestNfsFSInfoReturnsSaneLimits pokes at FSINFO so we catch regressions
// where the server advertises zero read/write limits (which would make
// clients fall back to the 8 KiB floor and slow every test that follows).
func TestNfsFSInfoReturnsSaneLimits(t *testing.T) {
fw := setupFramework(t)
target, cleanup, err := fw.Mount()
require.NoError(t, err)
defer cleanup()
info, err := target.FSInfo()
require.NoError(t, err)
require.NotNil(t, info)
assert.Greater(t, info.RTPref, uint32(0), "rtpref must be positive")
assert.Greater(t, info.WTPref, uint32(0), "wtpref must be positive")
}
// TestNfsAppendIsSequential writes two chunks to the same file in separate
// Open cycles and asserts the concatenation is preserved. The second write
// uses O_APPEND (the default Open path in go-nfs-client does not pass
// flags, so we explicitly reopen after writing the first chunk).
func TestNfsAppendIsSequential(t *testing.T) {
fw := setupFramework(t)
target, cleanup, err := fw.Mount()
require.NoError(t, err)
defer cleanup()
const prefix = "part1-"
const suffix = "part2"
writeAll(t, target, "/concat.txt", []byte(prefix))
file, err := target.OpenFile("/concat.txt", 0o644)
require.NoError(t, err)
// Seek to end before writing so we append rather than overwrite. go-nfs
// client's File.Seek uses the same offset tracking as Write so this is
// enough to place the second chunk after the first.
_, err = file.Seek(int64(len(prefix)), io.SeekStart)
require.NoError(t, err)
_, err = file.Write([]byte(suffix))
require.NoError(t, err)
require.NoError(t, file.Close())
got := readAll(t, target, "/concat.txt")
assert.Equal(t, prefix+suffix, string(got))
require.NoError(t, target.Remove("/concat.txt"))
}
// Regression: readdir should not emit stale entries after a remove. This is
// the scenario the PR's meta cache invalidation logic was written to fix.
func TestNfsReadDirAfterRemove(t *testing.T) {
fw := setupFramework(t)
target, cleanup, err := fw.Mount()
require.NoError(t, err)
defer cleanup()
_, err = target.Mkdir("/churn", 0o755)
require.NoError(t, err)
for i := 0; i < 5; i++ {
writeAll(t, target, path.Join("/churn", fmt.Sprintf("f%d.txt", i)), []byte{byte(i)})
}
// Remove the middle one and re-list.
require.NoError(t, target.Remove("/churn/f2.txt"))
entries, err := target.ReadDirPlus("/churn")
require.NoError(t, err)
for _, entry := range entries {
assert.NotEqual(t, "f2.txt", entry.Name(), "removed file should not reappear in listing")
}
for i := 0; i < 5; i++ {
if i == 2 {
continue
}
require.NoError(t, target.Remove(path.Join("/churn", fmt.Sprintf("f%d.txt", i))))
}
require.NoError(t, target.RmDir("/churn"))
}
-423
View File
@@ -1,423 +0,0 @@
package nfs
import (
"bytes"
"fmt"
"io"
"mime/multipart"
"net"
"net/http"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"syscall"
"testing"
"time"
"github.com/seaweedfs/seaweedfs/test/testutil"
"github.com/stretchr/testify/require"
nfsclient "github.com/willscott/go-nfs-client/nfs"
"github.com/willscott/go-nfs-client/nfs/rpc"
)
// NfsTestFramework boots a minimal SeaweedFS cluster (master + volume + filer)
// plus the experimental `weed nfs` frontend and hands out NFSv3 RPC clients
// that talk to it. Everything is driven via subprocesses so the tests exercise
// the same binary an operator would deploy, and no kernel mount is required.
type NfsTestFramework struct {
t *testing.T
tempDir string
dataDir string
masterProcess *os.Process
volumeProcess *os.Process
filerProcess *os.Process
nfsProcess *os.Process
masterAddr string
masterGrpc int
volumeAddr string
volumeGrpc int
filerAddr string
filerGrpc int
nfsAddr string
exportRoot string
weedBinary string
isSetup bool
skipCleanup bool
}
// TestConfig controls how the framework boots the cluster.
type TestConfig struct {
NumVolumes int
EnableDebug bool
SkipCleanup bool // keep temp dir on failure for inspection
// ExportRoot is the filer path the NFS server exports. Defaults to "/"
// so tests can use any path, with a single warning logged by the server.
ExportRoot string
}
// DefaultTestConfig returns the defaults used by most tests. A dedicated
// /nfs_export subtree is used as the NFS export root because the NFS server
// requires the export directory to exist in the filer's namespace and carry
// a non-zero inode — passing "/" would succeed only for filer setups that
// have already backfilled the root inode.
func DefaultTestConfig() *TestConfig {
return &TestConfig{
NumVolumes: 3,
EnableDebug: false,
SkipCleanup: false,
ExportRoot: "/nfs_export",
}
}
// NewNfsTestFramework allocates a framework bound to the current test. Call
// Setup next to actually start the cluster.
func NewNfsTestFramework(t *testing.T, config *TestConfig) *NfsTestFramework {
if config == nil {
config = DefaultTestConfig()
}
tempDir, err := os.MkdirTemp("", "seaweedfs_nfs_test_")
require.NoError(t, err)
// testutil.MustAllocatePorts holds every listener open until the full
// batch has been reserved, which avoids the "close-then-hope" race my
// original per-port helper had. We need seven ports: four HTTP (master,
// volume, filer, nfs) and three gRPC (master, volume, filer — nfs has
// no gRPC endpoint).
ports := testutil.MustAllocatePorts(t, 7)
exportRoot := config.ExportRoot
if exportRoot == "" {
exportRoot = "/"
}
return &NfsTestFramework{
t: t,
tempDir: tempDir,
dataDir: filepath.Join(tempDir, "data"),
masterAddr: fmt.Sprintf("127.0.0.1:%d", ports[0]),
masterGrpc: ports[1],
volumeAddr: fmt.Sprintf("127.0.0.1:%d", ports[2]),
volumeGrpc: ports[3],
filerAddr: fmt.Sprintf("127.0.0.1:%d", ports[4]),
filerGrpc: ports[5],
nfsAddr: fmt.Sprintf("127.0.0.1:%d", ports[6]),
exportRoot: exportRoot,
weedBinary: findWeedBinary(),
isSetup: false,
skipCleanup: config.SkipCleanup,
}
}
// Setup starts the SeaweedFS cluster and the NFS frontend, waiting for each
// component to accept connections before moving on.
func (f *NfsTestFramework) Setup(config *TestConfig) error {
if f.isSetup {
return fmt.Errorf("framework already setup")
}
dirs := []string{
f.dataDir,
filepath.Join(f.dataDir, "master"),
filepath.Join(f.dataDir, "volume"),
}
for _, dir := range dirs {
if err := os.MkdirAll(dir, 0755); err != nil {
return fmt.Errorf("failed to create directory %s: %v", dir, err)
}
}
if err := f.startMaster(config); err != nil {
return fmt.Errorf("failed to start master: %v", err)
}
if !testutil.WaitForPort(portFromAddr(f.masterAddr), testutil.SeaweedMiniStartupTimeout) {
return fmt.Errorf("master not ready at %s", f.masterAddr)
}
if err := f.startVolumeServer(config); err != nil {
return fmt.Errorf("failed to start volume server: %v", err)
}
if !testutil.WaitForPort(portFromAddr(f.volumeAddr), testutil.SeaweedMiniStartupTimeout) {
return fmt.Errorf("volume server not ready at %s", f.volumeAddr)
}
if err := f.startFiler(config); err != nil {
return fmt.Errorf("failed to start filer: %v", err)
}
if !testutil.WaitForPort(portFromAddr(f.filerAddr), testutil.SeaweedMiniStartupTimeout) {
return fmt.Errorf("filer not ready at %s", f.filerAddr)
}
// Pre-create the export root in the filer's namespace. The NFS server
// expects its export directory to exist with a real inode; uploading a
// placeholder file creates the parent directory implicitly and then
// removing the file leaves the empty directory in place.
if f.exportRoot != "/" {
if err := f.ensureExportRootExists(); err != nil {
return fmt.Errorf("failed to pre-create export root %s: %v", f.exportRoot, err)
}
}
if err := f.startNfsServer(config); err != nil {
return fmt.Errorf("failed to start NFS server: %v", err)
}
if !testutil.WaitForPort(portFromAddr(f.nfsAddr), testutil.SeaweedMiniStartupTimeout) {
return fmt.Errorf("NFS server not ready at %s", f.nfsAddr)
}
// Let the NFS server finish wiring up its gRPC subscription to the filer
// before the first client call hits MOUNT/LOOKUP.
time.Sleep(500 * time.Millisecond)
f.isSetup = true
return nil
}
// Cleanup stops all processes. Temp state is preserved if SkipCleanup is set.
func (f *NfsTestFramework) Cleanup() {
processes := []*os.Process{f.nfsProcess, f.filerProcess, f.volumeProcess, f.masterProcess}
for _, proc := range processes {
if proc != nil {
_ = proc.Signal(syscall.SIGTERM)
_, _ = proc.Wait()
}
}
if !f.skipCleanup {
_ = os.RemoveAll(f.tempDir)
}
}
// NfsAddr returns the TCP address the NFS server is listening on.
func (f *NfsTestFramework) NfsAddr() string { return f.nfsAddr }
// FilerAddr returns the TCP address of the filer.
func (f *NfsTestFramework) FilerAddr() string { return f.filerAddr }
// ExportRoot returns the path the NFS server exports.
func (f *NfsTestFramework) ExportRoot() string { return f.exportRoot }
// Mount opens an NFSv3 MOUNT+NFS connection against the running NFS server
// and returns a Target that tests can drive like a mini-VFS. Caller is
// responsible for calling the returned cleanup func to Unmount and close the
// TCP connection.
func (f *NfsTestFramework) Mount() (*nfsclient.Target, func(), error) {
var (
client *rpc.Client
err error
)
// The NFS server's TCP listener may already be accepting connections when
// waitForService returns, but the RPC program registration can trail it
// by a few milliseconds. Retry the dial to absorb that small window.
for attempt := 0; attempt < 20; attempt++ {
client, err = rpc.DialTCP("tcp", f.nfsAddr, false)
if err == nil {
break
}
time.Sleep(25 * time.Millisecond)
}
if err != nil {
return nil, nil, fmt.Errorf("dial NFS: %w", err)
}
// Note: do not set Mount.Addr here. When Addr is non-empty, the go-nfs
// client re-dials via portmapper and concatenates `:111` onto the
// address, which produces "too many colons" for a raw `host:port`
// string. Reusing the existing RPC client avoids that path entirely.
mounter := &nfsclient.Mount{Client: client}
target, err := mounter.Mount(f.exportRoot, rpc.AuthNull)
if err != nil {
client.Close()
return nil, nil, fmt.Errorf("mount %s: %w", f.exportRoot, err)
}
cleanup := func() {
_ = mounter.Unmount()
client.Close()
}
return target, cleanup, nil
}
func (f *NfsTestFramework) startMaster(config *TestConfig) error {
_, masterPort := splitHostPort(f.masterAddr)
args := []string{
"master",
"-ip=127.0.0.1",
fmt.Sprintf("-port=%d", masterPort),
fmt.Sprintf("-port.grpc=%d", f.masterGrpc),
"-mdir=" + filepath.Join(f.dataDir, "master"),
"-raftBootstrap",
"-peers=none",
}
return f.startProcess(&f.masterProcess, config, args)
}
func (f *NfsTestFramework) startVolumeServer(config *TestConfig) error {
_, volumePort := splitHostPort(f.volumeAddr)
// pb.ServerAddress encodes a non-default gRPC port as `host:port.grpc`.
// See weed/pb/server_address.go — the dot, not a colon, is the separator
// between the HTTP port and the gRPC port.
masterWithGrpc := fmt.Sprintf("%s.%d", f.masterAddr, f.masterGrpc)
args := []string{
"volume",
"-master=" + masterWithGrpc,
"-ip=127.0.0.1",
fmt.Sprintf("-port=%d", volumePort),
fmt.Sprintf("-port.grpc=%d", f.volumeGrpc),
"-dir=" + filepath.Join(f.dataDir, "volume"),
fmt.Sprintf("-max=%d", config.NumVolumes),
}
return f.startProcess(&f.volumeProcess, config, args)
}
func (f *NfsTestFramework) startFiler(config *TestConfig) error {
_, filerPort := splitHostPort(f.filerAddr)
masterWithGrpc := fmt.Sprintf("%s.%d", f.masterAddr, f.masterGrpc)
args := []string{
"filer",
"-master=" + masterWithGrpc,
"-ip=127.0.0.1",
fmt.Sprintf("-port=%d", filerPort),
fmt.Sprintf("-port.grpc=%d", f.filerGrpc),
}
return f.startProcess(&f.filerProcess, config, args)
}
func (f *NfsTestFramework) startNfsServer(config *TestConfig) error {
_, nfsPort := splitHostPort(f.nfsAddr)
// `host:port.grpc` encoding — see pb/server_address.go.
filerWithGrpc := fmt.Sprintf("%s.%d", f.filerAddr, f.filerGrpc)
args := []string{
"nfs",
"-filer=" + filerWithGrpc,
"-ip.bind=127.0.0.1",
fmt.Sprintf("-port=%d", nfsPort),
"-filer.path=" + f.exportRoot,
}
return f.startProcess(&f.nfsProcess, config, args)
}
func (f *NfsTestFramework) startProcess(target **os.Process, config *TestConfig, args []string) error {
cmd := exec.Command(f.weedBinary, args...)
cmd.Dir = f.tempDir
if config.EnableDebug {
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
}
if err := cmd.Start(); err != nil {
return err
}
*target = cmd.Process
return nil
}
// portFromAddr returns just the port number from a `host:port` string.
// testutil.WaitForPort takes an int port, not a full address.
func portFromAddr(addr string) int {
_, port := splitHostPort(addr)
return port
}
// ensureExportRootExists posts a placeholder file to f.exportRoot via the
// filer's HTTP API, then deletes it. That roundtrip implicitly creates the
// target directory so the NFS server has something to mount. We bypass
// weed/pb here because the HTTP client is simpler and needs no gRPC stubs.
func (f *NfsTestFramework) ensureExportRootExists() error {
exportRoot := strings.TrimRight(f.exportRoot, "/")
if exportRoot == "" {
return nil
}
placeholder := exportRoot + "/.nfs_test_init"
filerURL := "http://" + f.filerAddr + placeholder
var body bytes.Buffer
writer := multipart.NewWriter(&body)
part, err := writer.CreateFormFile("file", ".nfs_test_init")
if err != nil {
return err
}
if _, err := io.WriteString(part, ""); err != nil {
return err
}
if err := writer.Close(); err != nil {
return err
}
httpClient := &http.Client{Timeout: 10 * time.Second}
req, err := http.NewRequest(http.MethodPost, filerURL, &body)
if err != nil {
return err
}
req.Header.Set("Content-Type", writer.FormDataContentType())
resp, err := httpClient.Do(req)
if err != nil {
return err
}
_, _ = io.Copy(io.Discard, resp.Body)
resp.Body.Close()
if resp.StatusCode/100 != 2 {
return fmt.Errorf("filer POST %s returned status %d", filerURL, resp.StatusCode)
}
// Delete the placeholder; the directory stays behind.
deleteReq, err := http.NewRequest(http.MethodDelete, filerURL, nil)
if err != nil {
return err
}
deleteResp, err := httpClient.Do(deleteReq)
if err != nil {
return err
}
_, _ = io.Copy(io.Discard, deleteResp.Body)
deleteResp.Body.Close()
if deleteResp.StatusCode/100 != 2 && deleteResp.StatusCode != http.StatusNotFound {
return fmt.Errorf("filer DELETE %s returned status %d", filerURL, deleteResp.StatusCode)
}
return nil
}
func splitHostPort(addr string) (string, int) {
host, portStr, err := net.SplitHostPort(addr)
if err != nil {
return "", 0
}
var port int
_, _ = fmt.Sscanf(portStr, "%d", &port)
return host, port
}
// findWeedBinary locates the weed binary, preferring the local build in the
// checkout so tests run against the code under review rather than whatever is
// on $PATH.
func findWeedBinary() string {
if _, thisFile, _, ok := runtime.Caller(0); ok {
thisDir := filepath.Dir(thisFile)
candidates := []string{
filepath.Join(thisDir, "../../weed/weed"),
filepath.Join(thisDir, "../weed/weed"),
}
for _, candidate := range candidates {
if _, err := os.Stat(candidate); err == nil {
abs, _ := filepath.Abs(candidate)
return abs
}
}
}
cwd, _ := os.Getwd()
candidates := []string{
filepath.Join(cwd, "../../weed/weed"),
filepath.Join(cwd, "../weed/weed"),
filepath.Join(cwd, "./weed"),
}
for _, candidate := range candidates {
if _, err := os.Stat(candidate); err == nil {
abs, _ := filepath.Abs(candidate)
return abs
}
}
if path, err := exec.LookPath("weed"); err == nil {
return path
}
return "weed"
}
-21
View File
@@ -1,21 +0,0 @@
module seaweedfs-nfs-tests
go 1.25.0
// test/testutil lives inside the main seaweedfs module; pull it in via a
// local replace so this integration suite can reuse the shared port
// allocator and readiness helpers instead of reinventing them.
replace github.com/seaweedfs/seaweedfs => ../..
require (
github.com/seaweedfs/seaweedfs v0.0.0-00010101000000-000000000000
github.com/stretchr/testify v1.11.1
github.com/willscott/go-nfs-client v0.0.0-20251022144359-801f10d98886
)
require (
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/rasky/go-xdr v0.0.0-20170124162913-1a41d1a06c93 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
-14
View File
@@ -1,14 +0,0 @@
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rasky/go-xdr v0.0.0-20170124162913-1a41d1a06c93 h1:UVArwN/wkKjMVhh2EQGC0tEc1+FqiLlvYXY5mQ2f8Wg=
github.com/rasky/go-xdr v0.0.0-20170124162913-1a41d1a06c93/go.mod h1:Nfe4efndBz4TibWycNE+lqyJZiMX4ycx+QKV8Ta0f/o=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/willscott/go-nfs-client v0.0.0-20251022144359-801f10d98886 h1:DtrBtkgTJk2XGt4T7eKdKVkd9A5NCevN2e4inLXtsqA=
github.com/willscott/go-nfs-client v0.0.0-20251022144359-801f10d98886/go.mod h1:Tq++Lr/FgiS3X48q5FETemXiSLGuYMQT2sPjYNPJSwA=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
-193
View File
@@ -1,193 +0,0 @@
//go:build linux
package nfs
// End-to-end mount tests that drive the real Linux NFS client (mount.nfs +
// in-tree kernel) against a running `weed nfs` subprocess. These exist to
// catch regressions that the existing framework can't see, because the
// framework drives the server with willscott/go-nfs-client — the same RPC
// library the server uses internally — so any bug shared between the two
// (XDR layout, version dispatch, RPC framing) round-trips invisibly.
//
// Two real bugs hit recently were exactly that shape:
// 1. NFSv4 mis-routed to the v3 SETATTR handler (#9262). The client
// library never sends NFSv4, so the test suite never noticed; the
// Linux kernel mount path did notice, with EIO.
// 2. UDP MOUNT v3 missing. Only TCP MOUNT was advertised; the kernel
// defaults mountproto=udp in many setups, so the in-tree client
// surfaced EPROTONOSUPPORT during MOUNT setup.
//
// These tests mount over the actual loopback interface using mount.nfs and
// shell out to /bin/mount and /bin/umount. They require root (mount(2) is
// privileged) and Linux (the in-tree NFS client is what's being exercised);
// they t.Skip cleanly when either prerequisite is missing.
//
// Run locally with:
//
// cd test/nfs
// sudo go test -v -run TestKernelMount ./...
//
// CI runs them via .github/workflows/nfs-tests.yml after installing
// nfs-common (mount.nfs + helpers).
import (
"errors"
"fmt"
"net"
"os"
"os/exec"
"strings"
"testing"
)
// kernelMountSkipIfUnsupported skips the test when the host can't run a
// real NFS mount. The combined check belongs in one place so the three
// kernel-mount tests stay focused on what they're actually verifying.
func kernelMountSkipIfUnsupported(t *testing.T) {
t.Helper()
if os.Geteuid() != 0 {
t.Skip("kernel mount test requires root; mount(2) is privileged")
}
if _, err := exec.LookPath("mount.nfs"); err != nil {
t.Skipf("mount.nfs not installed: %v (CI installs the nfs-common package)", err)
}
}
// kernelMount runs /bin/mount with the given options against the framework's
// running NFS server, returns the mountpoint and an unmount closure. We pass
// explicit port=/mountport= options so the kernel never queries portmap.
// That keeps the harness honest about what it's testing — the NFS / MOUNT
// wire protocol — and avoids colliding with a system rpcbind on shared CI
// runners (port 111 is privileged and frequently in use already).
func kernelMount(t *testing.T, fw *NfsTestFramework, optsTemplate string) (string, func()) {
t.Helper()
host, portStr, err := net.SplitHostPort(fw.NfsAddr())
if err != nil {
t.Fatalf("split nfs addr %q: %v", fw.NfsAddr(), err)
}
mountpoint, err := os.MkdirTemp("", "weed-nfs-kmount-")
if err != nil {
t.Fatalf("mkdtemp: %v", err)
}
opts := strings.ReplaceAll(optsTemplate, "{port}", portStr)
target := fmt.Sprintf("%s:%s", host, fw.ExportRoot())
cmd := exec.Command("mount", "-t", "nfs", "-o", opts, target, mountpoint)
if out, err := cmd.CombinedOutput(); err != nil {
_ = os.RemoveAll(mountpoint)
t.Fatalf("mount %s -o %s failed: %v\nmount output:\n%s", target, opts, err, out)
}
teardown := func() {
// -f to bail out faster if the server's already gone.
_ = exec.Command("umount", "-f", mountpoint).Run()
_ = os.RemoveAll(mountpoint)
}
return mountpoint, teardown
}
func newKernelMountFramework(t *testing.T) *NfsTestFramework {
t.Helper()
cfg := DefaultTestConfig()
fw := NewNfsTestFramework(t, cfg)
if err := fw.Setup(cfg); err != nil {
fw.Cleanup()
t.Fatalf("framework setup: %v", err)
}
t.Cleanup(fw.Cleanup)
return fw
}
// TestKernelMountV3TCP exercises the most common mount form: NFSv3 + MOUNT
// v3, both over TCP. This is what the existing go-nfs-client tests cover at
// the protocol layer, but running it through mount.nfs and the kernel
// confirms that the wire format we emit decodes cleanly under a different
// XDR/RPC parser.
func TestKernelMountV3TCP(t *testing.T) {
kernelMountSkipIfUnsupported(t)
fw := newKernelMountFramework(t)
mountpoint, undo := kernelMount(t, fw,
"nfsvers=3,nolock,port={port},mountport={port},proto=tcp,mountproto=tcp")
defer undo()
if _, err := os.Stat(mountpoint); err != nil {
t.Errorf("stat mountpoint: %v", err)
}
if _, err := os.ReadDir(mountpoint); err != nil {
t.Errorf("readdir mountpoint: %v", err)
}
}
// TestKernelMountV3MountProtoUDP is the regression test for the UDP MOUNT
// v3 responder. mountproto=udp forces the kernel to call MOUNT over UDP
// only; before the responder existed the kernel hit nothing (MOUNT was
// advertised TCP-only) and surfaced EPROTONOSUPPORT during mount setup.
func TestKernelMountV3MountProtoUDP(t *testing.T) {
kernelMountSkipIfUnsupported(t)
fw := newKernelMountFramework(t)
mountpoint, undo := kernelMount(t, fw,
"nfsvers=3,nolock,port={port},mountport={port},proto=tcp,mountproto=udp")
defer undo()
if _, err := os.Stat(mountpoint); err != nil {
t.Errorf("stat mountpoint: %v", err)
}
}
// TestKernelMountV4RejectsCleanly is the regression test for the NFSv4
// PROG_MISMATCH path (#9262). The server only speaks NFSv3, but the
// previous behaviour was to mis-route v4 COMPOUND to the v3 SETATTR
// handler and write garbage; the kernel surfaced EIO instead of a
// version-mismatch error and (depending on distro) didn't fall back to
// v3. The version filter now answers PROG_MISMATCH so the kernel sees
// "v4 not supported" cleanly.
//
// The test asserts:
// 1. mount.nfs exits non-zero (no silent success against a v3 server);
// 2. the failure message mentions protocol/version/io, which is what the
// kernel surfaces when it gets PROG_MISMATCH instead of garbage. A
// pre-fix server returns "mount system call failed" with no further
// context, so a regression collapses the assertion onto that branch.
func TestKernelMountV4RejectsCleanly(t *testing.T) {
kernelMountSkipIfUnsupported(t)
fw := newKernelMountFramework(t)
host, portStr, err := net.SplitHostPort(fw.NfsAddr())
if err != nil {
t.Fatalf("split nfs addr: %v", err)
}
mountpoint, err := os.MkdirTemp("", "weed-nfs-kmount-v4-")
if err != nil {
t.Fatalf("mkdtemp: %v", err)
}
defer os.RemoveAll(mountpoint)
target := fmt.Sprintf("%s:%s", host, fw.ExportRoot())
cmd := exec.Command("mount", "-t", "nfs", "-o",
fmt.Sprintf("vers=4,port=%s", portStr),
target, mountpoint)
out, err := cmd.CombinedOutput()
defer exec.Command("umount", "-f", mountpoint).Run()
if err == nil {
t.Fatalf("v4 mount unexpectedly succeeded against v3-only server\nmount output:\n%s", out)
}
// Don't pin the exact error string — different distros print slightly
// different things — but require some hint that the kernel saw a
// protocol-level failure rather than a generic "mount system call
// failed". Without the version filter, mount.nfs prints the latter
// alone; with it, the former.
lower := strings.ToLower(string(out))
if !strings.Contains(lower, "protocol") &&
!strings.Contains(lower, "version") &&
!strings.Contains(lower, "i/o") {
t.Errorf("v4 mount failure didn't mention protocol/version/io; output:\n%s", out)
}
// Also require a non-zero exit so a future change that makes mount(2)
// silently succeed (e.g. by relaxing the version filter) shows up
// here even if the message phrasing changes.
var ee *exec.ExitError
if !errors.As(err, &ee) {
t.Errorf("expected mount to exit non-zero with ExitError, got %v", err)
}
}
-1
View File
@@ -48,7 +48,6 @@ var Commands = []*Command{
cmdVolume,
cmdWebDav,
cmdSftp,
cmdNfs,
cmdWorker,
}
-127
View File
@@ -1,127 +0,0 @@
package command
import (
"fmt"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/security"
weed_server_nfs "github.com/seaweedfs/seaweedfs/weed/server/nfs"
"github.com/seaweedfs/seaweedfs/weed/util"
"github.com/seaweedfs/seaweedfs/weed/util/version"
)
var (
nfsStandaloneOptions NfsOptions
)
type NfsOptions struct {
filer *string
ipBind *string
port *int
filerRootPath *string
readOnly *bool
allowedClients *string
volumeServerAccess *string
portmapBind *string
}
func init() {
cmdNfs.Run = runNfs // break init cycle
nfsStandaloneOptions.filer = cmdNfs.Flag.String("filer", "localhost:8888", "filer server address")
nfsStandaloneOptions.ipBind = cmdNfs.Flag.String("ip.bind", "127.0.0.1", "ip address to bind to. Defaults to loopback; override explicitly to expose the experimental server to the network.")
nfsStandaloneOptions.port = cmdNfs.Flag.Int("port", 2049, "NFS server listen port")
nfsStandaloneOptions.filerRootPath = cmdNfs.Flag.String("filer.path", "", "remote path from filer server to export. Required: no default is provided so operators must opt in to exporting a namespace subtree.")
nfsStandaloneOptions.readOnly = cmdNfs.Flag.Bool("readOnly", false, "export the filer path as read only")
nfsStandaloneOptions.allowedClients = cmdNfs.Flag.String("allowedClients", "", "comma-separated client IPs, hostnames, or CIDRs allowed to connect")
nfsStandaloneOptions.volumeServerAccess = cmdNfs.Flag.String("volumeServerAccess", "direct", "access volume servers by [direct|publicUrl|filerProxy]")
nfsStandaloneOptions.portmapBind = cmdNfs.Flag.String("portmap.bind", "", "when set, bind a built-in portmap v2 responder on <ip>:111 so plain `mount -t nfs` works without client-side portmap bypass. Empty disables it. Binding port 111 requires root or CAP_NET_BIND_SERVICE and must not conflict with a system rpcbind.")
}
var cmdNfs = &Command{
UsageLine: "nfs -port=2049 -filer=<ip:port> -filer.path=<exported subtree>",
Short: "start an experimental NFSv3 server backed by a filer",
Long: `start an experimental NFSv3 server backed by a filer.
This command serves an experimental filer-native NFSv3 frontend with
deterministic filehandles, filer-backed metadata operations, and direct
volume-server data access for chunk reads and buffered writes.
Safer defaults (since export ACLs are still not implemented):
- ip.bind defaults to 127.0.0.1, so the server is not reachable from
other hosts unless you override it explicitly.
- filer.path has no default; you must pick the subtree to export.
Override -ip.bind to a routable address only after you have reviewed
-allowedClients and the readiness of the rest of your deployment.
Mounting from a Linux client
----------------------------
The server does not run portmap/rpcbind by default. That means Linux
mount.nfs, which queries portmap on port 111 first, will fail with
"portmap query failed" against the plain form:
mount -t nfs -o nfsvers=3,nolock <host>:/export /mnt
Either tell the client to bypass portmap:
mount -t nfs -o nfsvers=3,nolock,port=2049,mountport=2049,\
proto=tcp,mountproto=tcp <host>:/export /mnt
or enable the built-in portmap responder on the server:
weed nfs ... -portmap.bind=0.0.0.0
With the responder enabled MOUNT v3 is answered over both TCP and UDP,
so the plain mount form above just works no mountproto override is
required even on clients whose default mountproto is UDP.
Binding port 111 requires root or CAP_NET_BIND_SERVICE and must not
collide with a system rpcbind.
`,
}
func runNfs(cmd *Command, args []string) bool {
util.LoadSecurityConfiguration()
if *nfsStandaloneOptions.ipBind == "" {
*nfsStandaloneOptions.ipBind = "127.0.0.1"
}
if *nfsStandaloneOptions.filerRootPath == "" {
glog.Errorf("-filer.path is required: pick an explicit subtree to export; exporting \"/\" is not a default")
return false
}
if *nfsStandaloneOptions.filerRootPath == "/" {
glog.Warningf("-filer.path=/ exports the entire filer namespace; ensure -allowedClients or -ip.bind constrains access")
}
listenAddress := fmt.Sprintf("%s:%d", *nfsStandaloneOptions.ipBind, *nfsStandaloneOptions.port)
glog.V(0).Infof("Starting Seaweed NFS Server %s at %s", version.Version(), listenAddress)
grpcDialOption := security.LoadClientTLS(util.GetViper(), "grpc.client")
nfsServer, err := weed_server_nfs.NewServer(&weed_server_nfs.Option{
Filer: pb.ServerAddress(*nfsStandaloneOptions.filer),
BindIp: *nfsStandaloneOptions.ipBind,
Port: *nfsStandaloneOptions.port,
FilerRootPath: *nfsStandaloneOptions.filerRootPath,
ReadOnly: *nfsStandaloneOptions.readOnly,
AllowedClients: util.StringSplit(*nfsStandaloneOptions.allowedClients, ","),
VolumeServerAccess: *nfsStandaloneOptions.volumeServerAccess,
GrpcDialOption: grpcDialOption,
PortmapBind: *nfsStandaloneOptions.portmapBind,
})
if err != nil {
glog.Errorf("NFS Server startup error: %v", err)
return false
}
if err := nfsServer.Start(); err != nil {
glog.Errorf("NFS Server startup error: %v", err)
return false
}
return true
}
-3
View File
@@ -13,7 +13,6 @@ import (
"github.com/seaweedfs/seaweedfs/weed/cluster/lock_manager"
"github.com/seaweedfs/seaweedfs/weed/filer/empty_folder_cleanup"
"github.com/seaweedfs/seaweedfs/weed/sequence"
"github.com/seaweedfs/seaweedfs/weed/cluster"
"github.com/seaweedfs/seaweedfs/weed/pb"
@@ -64,7 +63,6 @@ type Filer struct {
DeletionRetryQueue *DeletionRetryQueue
EmptyFolderCleaner *empty_folder_cleanup.EmptyFolderCleaner
EmptyFolderCleanupDelay time.Duration
inodeSequencer sequence.Sequencer
}
func NewFiler(masters pb.ServerDiscovery, grpcDialOption grpc.DialOption, filerHost pb.ServerAddress, filerGroup string, collection string, replication string, dataCenter string, maxFilenameLength uint32, notifyFn func()) *Filer {
@@ -79,7 +77,6 @@ func NewFiler(masters pb.ServerDiscovery, grpcDialOption grpc.DialOption, filerH
MaxFilenameLength: maxFilenameLength,
deletionQuit: make(chan struct{}),
DeletionRetryQueue: NewDeletionRetryQueue(),
inodeSequencer: newInodeSequencer(filerHost),
}
if f.UniqueFilerId < 0 {
f.UniqueFilerId = -f.UniqueFilerId
+15 -40
View File
@@ -1,51 +1,26 @@
package filer
import (
"os"
"strconv"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/sequence"
"github.com/seaweedfs/seaweedfs/weed/util"
)
// newInodeSequencer constructs the inode sequencer used to assign object
// identity for filer entries. The Snowflake node id defaults to a masked hash
// of filerHost, which only has 1024 possible values; operators running a
// multi-filer cluster should set SEAWEEDFS_FILER_SNOWFLAKE_ID to an explicit
// per-filer value (1..1023) to avoid birthday-paradox collisions.
//
// Initialization failures are fatal: a process-local fallback allocator would
// re-use inode values across restarts and violate the stable object identity
// guarantee that NFS filehandles and the inode secondary index rely on.
func newInodeSequencer(filerHost pb.ServerAddress) sequence.Sequencer {
snowflakeId := parseSnowflakeIdFromEnv()
seq, err := sequence.NewSnowflakeSequencer(string(filerHost), snowflakeId)
if err != nil {
glog.Fatalf("initialize inode sequencer for filer %s (snowflakeId=%d): %v", filerHost, snowflakeId, err)
}
return seq
}
func parseSnowflakeIdFromEnv() int {
raw := os.Getenv("SEAWEEDFS_FILER_SNOWFLAKE_ID")
if raw == "" {
return 0
}
id, err := strconv.Atoi(raw)
if err != nil || id < 0 || id > 0x3ff {
glog.Fatalf("SEAWEEDFS_FILER_SNOWFLAKE_ID must be an integer in [0,1023], got %q", raw)
}
return id
}
// ensureEntryInode derives a stable inode the same way the FUSE mount does, so
// the filer-stored value matches what a mount would otherwise compute and no
// per-object reverse index is required. Hard links hash their shared
// HardLinkId, so every link resolves to one inode; other entries hash the path
// and creation time.
func (f *Filer) ensureEntryInode(entry *Entry) {
if entry == nil || entry.Attr.Inode != 0 {
return
}
entry.Attr.Inode = f.nextInode()
}
func (f *Filer) nextInode() uint64 {
return f.inodeSequencer.NextFileId(1)
if entry.Attr.Crtime.IsZero() {
entry.Attr.Crtime = time.Now()
}
if len(entry.HardLinkId) > 0 {
entry.Attr.Inode = uint64(util.HashStringToLong(string(entry.HardLinkId)))
return
}
entry.Attr.Inode = entry.FullPath.AsInode(entry.Attr.Crtime.Unix())
}
-300
View File
@@ -1,300 +0,0 @@
package filer
import (
"context"
"encoding/binary"
"encoding/json"
"sort"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/util"
)
const inodeIndexKeyPrefix = "filer.inode.path."
const InodeIndexInitialGeneration uint64 = 1
type inodeIndexEntry struct {
path util.FullPath
inode uint64
}
type InodeIndexRecord struct {
Generation uint64 `json:"generation,omitempty"`
Paths []string `json:"paths,omitempty"`
}
func InodeIndexKey(inode uint64) []byte {
key := make([]byte, len(inodeIndexKeyPrefix)+8)
copy(key, inodeIndexKeyPrefix)
binary.BigEndian.PutUint64(key[len(inodeIndexKeyPrefix):], inode)
return key
}
func DecodeInodeIndexRecord(value []byte) (*InodeIndexRecord, error) {
if len(value) == 0 {
return &InodeIndexRecord{}, nil
}
// The first foundation slice stored the current path as raw bytes. Keep that
// format readable so existing records are transparently upgraded on write.
if value[0] != '{' {
record := &InodeIndexRecord{Generation: InodeIndexInitialGeneration}
record.addPath(util.FullPath(value))
return record, nil
}
record := &InodeIndexRecord{}
if err := json.Unmarshal(value, record); err != nil {
return nil, err
}
record.normalize()
return record, nil
}
func (record *InodeIndexRecord) Encode() ([]byte, error) {
record.normalize()
return json.Marshal(record)
}
func (record *InodeIndexRecord) normalize() {
if len(record.Paths) == 0 {
return
}
if record.Generation == 0 {
record.Generation = InodeIndexInitialGeneration
}
sanitized := make([]string, 0, len(record.Paths))
for _, path := range record.Paths {
if path == "" {
continue
}
sanitized = append(sanitized, path)
}
if len(sanitized) == 0 {
record.Paths = nil
return
}
sort.Strings(sanitized)
deduped := sanitized[:1]
for _, path := range sanitized[1:] {
if path == deduped[len(deduped)-1] {
continue
}
deduped = append(deduped, path)
}
record.Paths = deduped
}
func (record *InodeIndexRecord) addPath(path util.FullPath) bool {
if path == "" {
return false
}
record.normalize()
target := string(path)
index := sort.SearchStrings(record.Paths, target)
if index < len(record.Paths) && record.Paths[index] == target {
return false
}
record.Paths = append(record.Paths, "")
copy(record.Paths[index+1:], record.Paths[index:])
record.Paths[index] = target
return true
}
func (record *InodeIndexRecord) removePath(path util.FullPath) bool {
if len(record.Paths) == 0 || path == "" {
return false
}
record.normalize()
target := string(path)
index := sort.SearchStrings(record.Paths, target)
if index >= len(record.Paths) || record.Paths[index] != target {
return false
}
record.Paths = append(record.Paths[:index], record.Paths[index+1:]...)
if len(record.Paths) == 0 {
record.Paths = nil
}
return true
}
func (record *InodeIndexRecord) CanonicalPath() util.FullPath {
record.normalize()
if len(record.Paths) == 0 {
return ""
}
return util.FullPath(record.Paths[0])
}
func (record *InodeIndexRecord) FullPaths() []util.FullPath {
record.normalize()
if len(record.Paths) == 0 {
return nil
}
paths := make([]util.FullPath, 0, len(record.Paths))
for _, path := range record.Paths {
paths = append(paths, util.FullPath(path))
}
return paths
}
func (fsw *FilerStoreWrapper) lookupInodeIndex(ctx context.Context, inode uint64) (*InodeIndexRecord, error) {
if inode == 0 {
return nil, ErrKvNotFound
}
value, err := fsw.KvGet(ctx, InodeIndexKey(inode))
if err != nil {
return nil, err
}
return DecodeInodeIndexRecord(value)
}
func (fsw *FilerStoreWrapper) storeInodeIndex(ctx context.Context, path util.FullPath, inode uint64) error {
if inode == 0 || path == "" {
return nil
}
record, err := fsw.lookupInodeIndex(ctx, inode)
if err != nil {
if err != ErrKvNotFound {
return err
}
record = &InodeIndexRecord{Generation: InodeIndexInitialGeneration}
}
record.addPath(path)
value, err := record.Encode()
if err != nil {
return err
}
return fsw.KvPut(ctx, InodeIndexKey(inode), value)
}
func (fsw *FilerStoreWrapper) lookupInodePath(ctx context.Context, inode uint64) (util.FullPath, error) {
record, err := fsw.lookupInodeIndex(ctx, inode)
if err != nil {
return "", err
}
path := record.CanonicalPath()
if path == "" {
return "", ErrKvNotFound
}
return path, nil
}
func (fsw *FilerStoreWrapper) lookupInodePaths(ctx context.Context, inode uint64) ([]util.FullPath, error) {
record, err := fsw.lookupInodeIndex(ctx, inode)
if err != nil {
return nil, err
}
paths := record.FullPaths()
if len(paths) == 0 {
return nil, ErrKvNotFound
}
return paths, nil
}
func (fsw *FilerStoreWrapper) removePathFromInodeIndex(ctx context.Context, path util.FullPath, inode uint64) error {
if inode == 0 || path == "" {
return nil
}
record, err := fsw.lookupInodeIndex(ctx, inode)
if err != nil {
if err == ErrKvNotFound {
return nil
}
return err
}
if !record.removePath(path) {
return nil
}
if len(record.Paths) == 0 {
return fsw.KvDelete(ctx, InodeIndexKey(inode))
}
value, err := record.Encode()
if err != nil {
return err
}
return fsw.KvPut(ctx, InodeIndexKey(inode), value)
}
func (fsw *FilerStoreWrapper) collectInodeIndexEntries(ctx context.Context, dirPath util.FullPath) ([]inodeIndexEntry, error) {
// Honor caller cancellation during the walk: a DeleteFolderChildren on a
// pathological directory could otherwise loop indefinitely gathering
// entries even after the client has given up, turning into a DoS vector.
// If the walk is aborted, the caller treats the index cleanup as
// best-effort and drops the partial result.
var collected []inodeIndexEntry
if err := fsw.collectInodeIndexEntriesRecursive(ctx, dirPath, &collected); err != nil {
return nil, err
}
return collected, nil
}
func (fsw *FilerStoreWrapper) collectInodeIndexEntriesRecursive(ctx context.Context, dirPath util.FullPath, collected *[]inodeIndexEntry) error {
actualStore := fsw.getActualStore(dirPath + "/")
lastFileName := ""
includeStartFile := false
for {
page := make([]*Entry, 0, PaginationSize)
nextLastFileName, err := actualStore.ListDirectoryEntries(ctx, dirPath, lastFileName, includeStartFile, PaginationSize, func(entry *Entry) (bool, error) {
page = append(page, entry)
return true, nil
})
if err != nil {
return err
}
for _, entry := range page {
if entry.Attr.Inode != 0 {
*collected = append(*collected, inodeIndexEntry{path: entry.FullPath, inode: entry.Attr.Inode})
}
if entry.IsDirectory() {
if err := fsw.collectInodeIndexEntriesRecursive(ctx, entry.FullPath, collected); err != nil {
return err
}
}
}
if len(page) < PaginationSize {
return nil
}
lastFileName = nextLastFileName
includeStartFile = false
}
}
// recordInodeIndexWrite updates the inode→path secondary index after the
// primary store mutation has already succeeded. The index is best-effort: a
// failure here must not surface as an operation error, because the caller
// would then observe a failed create/update even though the entry was
// persisted, and a retry cannot heal the index (DeleteEntry exits early once
// the entry is gone). We log and let later writes rebuild the record.
func (fsw *FilerStoreWrapper) recordInodeIndexWrite(ctx context.Context, op string, path util.FullPath, inode uint64) {
if inode == 0 || path == "" {
return
}
if err := fsw.storeInodeIndex(ctx, path, inode); err != nil {
glog.WarningfCtx(ctx, "%s: update inode index for %s (inode %d): %v", op, path, inode, err)
}
}
// recordInodeIndexRemoval mirrors recordInodeIndexWrite for removals.
func (fsw *FilerStoreWrapper) recordInodeIndexRemoval(ctx context.Context, op string, path util.FullPath, inode uint64) {
if inode == 0 || path == "" {
return
}
if err := fsw.removePathFromInodeIndex(ctx, path, inode); err != nil {
glog.WarningfCtx(ctx, "%s: clear inode index for %s (inode %d): %v", op, path, inode, err)
}
}
-206
View File
@@ -1,206 +0,0 @@
package filer
import (
"context"
"os"
"testing"
"github.com/seaweedfs/seaweedfs/weed/util"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestFilerStoreWrapperMaintainsInodeIndexLifecycle(t *testing.T) {
wrapper := NewFilerStoreWrapper(newStubFilerStore())
ctx := context.Background()
created := &Entry{
FullPath: util.FullPath("/docs/report.txt"),
Attr: Attr{
Mode: 0o644,
Inode: 42,
},
}
require.NoError(t, wrapper.InsertEntry(ctx, created))
path, err := wrapper.lookupInodePath(ctx, created.Attr.Inode)
require.NoError(t, err)
assert.Equal(t, created.FullPath, path)
paths, err := wrapper.lookupInodePaths(ctx, created.Attr.Inode)
require.NoError(t, err)
assert.Equal(t, []util.FullPath{created.FullPath}, paths)
record, err := wrapper.lookupInodeIndex(ctx, created.Attr.Inode)
require.NoError(t, err)
assert.Equal(t, InodeIndexInitialGeneration, record.Generation)
updated := &Entry{
FullPath: util.FullPath("/docs/report.txt"),
Attr: Attr{
Mode: 0o600,
Inode: 42,
},
}
require.NoError(t, wrapper.UpdateEntry(ctx, updated))
path, err = wrapper.lookupInodePath(ctx, updated.Attr.Inode)
require.NoError(t, err)
assert.Equal(t, updated.FullPath, path)
require.NoError(t, wrapper.DeleteEntry(ctx, created.FullPath))
_, err = wrapper.lookupInodePath(ctx, created.Attr.Inode)
require.ErrorIs(t, err, ErrKvNotFound)
}
func TestFilerStoreWrapperMaintainsMultiplePathsPerInode(t *testing.T) {
wrapper := NewFilerStoreWrapper(newStubFilerStore())
ctx := context.Background()
inode := uint64(88)
hardLinkId := NewHardLinkId()
require.NoError(t, wrapper.InsertEntry(ctx, &Entry{
FullPath: util.FullPath("/links/b.txt"),
Attr: Attr{
Mode: 0o644,
Inode: inode,
},
HardLinkId: hardLinkId,
HardLinkCounter: 2,
}))
require.NoError(t, wrapper.InsertEntry(ctx, &Entry{
FullPath: util.FullPath("/links/a.txt"),
Attr: Attr{
Mode: 0o644,
Inode: inode,
},
HardLinkId: hardLinkId,
HardLinkCounter: 2,
}))
paths, err := wrapper.lookupInodePaths(ctx, inode)
require.NoError(t, err)
assert.Equal(t, []util.FullPath{"/links/a.txt", "/links/b.txt"}, paths)
record, err := wrapper.lookupInodeIndex(ctx, inode)
require.NoError(t, err)
assert.Equal(t, InodeIndexInitialGeneration, record.Generation)
path, err := wrapper.lookupInodePath(ctx, inode)
require.NoError(t, err)
assert.Equal(t, util.FullPath("/links/a.txt"), path)
require.NoError(t, wrapper.DeleteEntry(ctx, util.FullPath("/links/a.txt")))
paths, err = wrapper.lookupInodePaths(ctx, inode)
require.NoError(t, err)
assert.Equal(t, []util.FullPath{"/links/b.txt"}, paths)
path, err = wrapper.lookupInodePath(ctx, inode)
require.NoError(t, err)
assert.Equal(t, util.FullPath("/links/b.txt"), path)
}
func TestFilerStoreWrapperUpgradesLegacySinglePathInodeIndexRecords(t *testing.T) {
wrapper := NewFilerStoreWrapper(newStubFilerStore())
ctx := context.Background()
inode := uint64(91)
require.NoError(t, wrapper.KvPut(ctx, InodeIndexKey(inode), []byte("/legacy/path.txt")))
path, err := wrapper.lookupInodePath(ctx, inode)
require.NoError(t, err)
assert.Equal(t, util.FullPath("/legacy/path.txt"), path)
paths, err := wrapper.lookupInodePaths(ctx, inode)
require.NoError(t, err)
assert.Equal(t, []util.FullPath{"/legacy/path.txt"}, paths)
require.NoError(t, wrapper.storeInodeIndex(ctx, util.FullPath("/legacy/second.txt"), inode))
paths, err = wrapper.lookupInodePaths(ctx, inode)
require.NoError(t, err)
assert.Equal(t, []util.FullPath{"/legacy/path.txt", "/legacy/second.txt"}, paths)
value, err := wrapper.KvGet(ctx, InodeIndexKey(inode))
require.NoError(t, err)
assert.JSONEq(t, `{"generation":1,"paths":["/legacy/path.txt","/legacy/second.txt"]}`, string(value))
}
func TestFilerStoreWrapperKeepsInodeIndexWhenDeleteArrivesAfterRenameInsert(t *testing.T) {
wrapper := NewFilerStoreWrapper(newStubFilerStore())
ctx := context.Background()
inode := uint64(77)
require.NoError(t, wrapper.InsertEntry(ctx, &Entry{
FullPath: util.FullPath("/old/name.txt"),
Attr: Attr{
Mode: 0o644,
Inode: inode,
},
}))
require.NoError(t, wrapper.InsertEntry(ctx, &Entry{
FullPath: util.FullPath("/new/name.txt"),
Attr: Attr{
Mode: 0o644,
Inode: inode,
},
}))
require.NoError(t, wrapper.DeleteEntry(ctx, util.FullPath("/old/name.txt")))
path, err := wrapper.lookupInodePath(ctx, inode)
require.NoError(t, err)
assert.Equal(t, util.FullPath("/new/name.txt"), path)
paths, err := wrapper.lookupInodePaths(ctx, inode)
require.NoError(t, err)
assert.Equal(t, []util.FullPath{"/new/name.txt"}, paths)
}
func TestRecursiveDeleteRemovesDescendantInodeIndexes(t *testing.T) {
f, store := newTestFilerWithStubStore()
ctx := context.Background()
entries := []*Entry{
{
FullPath: util.FullPath("/tree"),
Attr: Attr{
Mode: os.ModeDir | 0o755,
Inode: 100,
},
},
{
FullPath: util.FullPath("/tree/file.txt"),
Attr: Attr{
Mode: 0o644,
Inode: 101,
},
},
{
FullPath: util.FullPath("/tree/subdir"),
Attr: Attr{
Mode: os.ModeDir | 0o755,
Inode: 102,
},
},
{
FullPath: util.FullPath("/tree/subdir/nested.txt"),
Attr: Attr{
Mode: 0o644,
Inode: 103,
},
},
}
for _, entry := range entries {
require.NoError(t, f.Store.InsertEntry(ctx, entry))
}
require.NoError(t, f.DeleteEntryMetaAndData(ctx, util.FullPath("/tree"), true, false, false, false, nil, 0))
for _, inode := range []uint64{100, 101, 102, 103} {
_, err := f.Store.(*FilerStoreWrapper).lookupInodePath(ctx, inode)
require.ErrorIs(t, err, ErrKvNotFound)
}
for _, path := range []string{"/tree", "/tree/file.txt", "/tree/subdir", "/tree/subdir/nested.txt"} {
_, err := store.FindEntry(ctx, util.FullPath(path))
require.Error(t, err)
}
}
+42
View File
@@ -4,6 +4,7 @@ import (
"context"
"os"
"testing"
"time"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/util"
@@ -11,6 +12,47 @@ import (
"github.com/stretchr/testify/require"
)
func TestEnsureEntryInodeMatchesFuseDerivation(t *testing.T) {
f := &Filer{}
crtime := time.Unix(1700000000, 0)
entry := &Entry{
FullPath: util.FullPath("/dir/file.txt"),
Attr: Attr{Crtime: crtime},
}
f.ensureEntryInode(entry)
// The filer stores exactly what the FUSE mount would compute for a
// non-hard-linked entry, and it is deterministic across calls.
assert.Equal(t, entry.FullPath.AsInode(crtime.Unix()), entry.Attr.Inode)
again := &Entry{FullPath: entry.FullPath, Attr: Attr{Crtime: crtime}}
f.ensureEntryInode(again)
assert.Equal(t, entry.Attr.Inode, again.Attr.Inode)
}
func TestEnsureEntryInodeSharesAcrossHardLinks(t *testing.T) {
f := &Filer{}
hardLinkId := NewHardLinkId()
a := &Entry{
FullPath: util.FullPath("/links/a.txt"),
Attr: Attr{Crtime: time.Unix(1700000000, 0)},
HardLinkId: hardLinkId,
}
b := &Entry{
FullPath: util.FullPath("/links/b.txt"),
Attr: Attr{Crtime: time.Unix(1800000000, 0)},
HardLinkId: hardLinkId,
}
f.ensureEntryInode(a)
f.ensureEntryInode(b)
// Every link to the same target resolves to one inode, independent of path
// or creation time.
assert.Equal(t, uint64(util.HashStringToLong(string(hardLinkId))), a.Attr.Inode)
assert.Equal(t, a.Attr.Inode, b.Attr.Inode)
}
func newTestFilerWithStubStore() (*Filer, *stubFilerStore) {
store := newStubFilerStore()
f := NewFiler(pb.ServerDiscovery{}, nil, "", "", "", "", "", 255, nil)
-1
View File
@@ -276,7 +276,6 @@ func newTestFiler(t *testing.T, store *stubFilerStore, rs *FilerRemoteStorage) *
FilerConf: NewFilerConf(),
MaxFilenameLength: 255,
MasterClient: mc,
inodeSequencer: newInodeSequencer("test-filer"),
fileIdDeletionQueue: util.NewUnboundedQueue(),
deletionQuit: make(chan struct{}),
LocalMetaLogBuffer: log_buffer.NewLogBuffer("test", time.Minute,
+6 -46
View File
@@ -132,7 +132,6 @@ func (fsw *FilerStoreWrapper) InsertEntry(ctx context.Context, entry *Entry) err
return err
}
ctx = context.WithoutCancel(ctx)
fullPath := entry.FullPath
actualStore := fsw.getActualStore(entry.FullPath)
stats.FilerStoreCounter.WithLabelValues(actualStore.GetName(), "insert").Inc()
start := time.Now()
@@ -152,11 +151,7 @@ func (fsw *FilerStoreWrapper) InsertEntry(ctx context.Context, entry *Entry) err
return err
}
if err := actualStore.InsertEntry(ctx, entry); err != nil {
return err
}
fsw.recordInodeIndexWrite(ctx, "InsertEntry", fullPath, entry.Attr.Inode)
return nil
return actualStore.InsertEntry(ctx, entry)
}
// InsertEntryKnownAbsent skips the pre-insert FindEntry path when the caller has
@@ -166,7 +161,6 @@ func (fsw *FilerStoreWrapper) InsertEntryKnownAbsent(ctx context.Context, entry
return err
}
ctx = context.WithoutCancel(ctx)
fullPath := entry.FullPath
actualStore := fsw.getActualStore(entry.FullPath)
stats.FilerStoreCounter.WithLabelValues(actualStore.GetName(), "insert").Inc()
start := time.Now()
@@ -185,11 +179,7 @@ func (fsw *FilerStoreWrapper) InsertEntryKnownAbsent(ctx context.Context, entry
}
}
if err := actualStore.InsertEntry(ctx, entry); err != nil {
return err
}
fsw.recordInodeIndexWrite(ctx, "InsertEntryKnownAbsent", fullPath, entry.Attr.Inode)
return nil
return actualStore.InsertEntry(ctx, entry)
}
func (fsw *FilerStoreWrapper) UpdateEntry(ctx context.Context, entry *Entry) error {
@@ -197,7 +187,6 @@ func (fsw *FilerStoreWrapper) UpdateEntry(ctx context.Context, entry *Entry) err
return err
}
ctx = context.WithoutCancel(ctx)
fullPath := entry.FullPath
actualStore := fsw.getActualStore(entry.FullPath)
stats.FilerStoreCounter.WithLabelValues(actualStore.GetName(), "update").Inc()
start := time.Now()
@@ -217,11 +206,7 @@ func (fsw *FilerStoreWrapper) UpdateEntry(ctx context.Context, entry *Entry) err
return err
}
if err := actualStore.UpdateEntry(ctx, entry); err != nil {
return err
}
fsw.recordInodeIndexWrite(ctx, "UpdateEntry", fullPath, entry.Attr.Inode)
return nil
return actualStore.UpdateEntry(ctx, entry)
}
func normalizeEntryMimeForStore(entry *Entry) {
@@ -273,8 +258,6 @@ func (fsw *FilerStoreWrapper) DeleteEntry(ctx context.Context, fp util.FullPath)
if findErr == filer_pb.ErrNotFound || existingEntry == nil {
return nil
}
inode := existingEntry.Attr.Inode
fullPath := existingEntry.FullPath
if len(existingEntry.HardLinkId) != 0 {
// remove hard link
op := ctx.Value("OP")
@@ -289,11 +272,7 @@ func (fsw *FilerStoreWrapper) DeleteEntry(ctx context.Context, fp util.FullPath)
}
}
if err := actualStore.DeleteEntry(ctx, fp); err != nil {
return err
}
fsw.recordInodeIndexRemoval(ctx, "DeleteEntry", fullPath, inode)
return nil
return actualStore.DeleteEntry(ctx, fp)
}
func (fsw *FilerStoreWrapper) DeleteOneEntry(ctx context.Context, existingEntry *Entry) (err error) {
@@ -301,8 +280,6 @@ func (fsw *FilerStoreWrapper) DeleteOneEntry(ctx context.Context, existingEntry
return err
}
ctx = context.WithoutCancel(ctx)
fullPath := existingEntry.FullPath
inode := existingEntry.Attr.Inode
actualStore := fsw.getActualStore(existingEntry.FullPath)
stats.FilerStoreCounter.WithLabelValues(actualStore.GetName(), "delete").Inc()
start := time.Now()
@@ -325,11 +302,7 @@ func (fsw *FilerStoreWrapper) DeleteOneEntry(ctx context.Context, existingEntry
}
}
if err := actualStore.DeleteEntry(ctx, existingEntry.FullPath); err != nil {
return err
}
fsw.recordInodeIndexRemoval(ctx, "DeleteOneEntry", fullPath, inode)
return nil
return actualStore.DeleteEntry(ctx, existingEntry.FullPath)
}
func (fsw *FilerStoreWrapper) DeleteFolderChildren(ctx context.Context, fp util.FullPath) (err error) {
@@ -344,20 +317,7 @@ func (fsw *FilerStoreWrapper) DeleteFolderChildren(ctx context.Context, fp util.
stats.FilerStoreHistogram.WithLabelValues(actualStore.GetName(), "deleteFolderChildren").Observe(time.Since(start).Seconds())
}()
collected, err := fsw.collectInodeIndexEntries(ctx, fp)
if err != nil {
// Index collection is best-effort: a failure here only prevents inode
// index housekeeping, not the directory removal itself.
glog.WarningfCtx(ctx, "collectInodeIndexEntries %s: %v; deleting folder children without index cleanup", fp, err)
collected = nil
}
if err := actualStore.DeleteFolderChildren(ctx, fp); err != nil {
return err
}
for _, entry := range collected {
fsw.recordInodeIndexRemoval(ctx, "DeleteFolderChildren", entry.path, entry.inode)
}
return nil
return actualStore.DeleteFolderChildren(ctx, fp)
}
func (fsw *FilerStoreWrapper) ListDirectoryEntries(ctx context.Context, dirPath util.FullPath, startFileName string, includeStartFile bool, limit int64, eachEntryFunc ListEachEntryFunc) (string, error) {
-140
View File
@@ -1,140 +0,0 @@
package nfs
import (
"fmt"
"net"
"strings"
"github.com/seaweedfs/seaweedfs/weed/glog"
)
type clientAuthorizer struct {
exact map[string]struct{}
cidrs map[string]*net.IPNet
enabled bool
}
func newClientAuthorizer(allowed []string) (*clientAuthorizer, error) {
authorizer := &clientAuthorizer{
exact: make(map[string]struct{}),
cidrs: make(map[string]*net.IPNet),
}
for _, raw := range allowed {
entry := strings.TrimSpace(raw)
if entry == "" {
continue
}
if strings.Contains(entry, "/") {
_, network, err := net.ParseCIDR(entry)
if err != nil {
return nil, fmt.Errorf("parse allowed NFS client %q: %w", entry, err)
}
authorizer.cidrs[entry] = network
authorizer.enabled = true
continue
}
if ip := normalizeClientIP(entry); ip != nil {
authorizer.exact[ip.String()] = struct{}{}
authorizer.enabled = true
continue
}
ips, err := net.LookupIP(entry)
if err != nil {
return nil, fmt.Errorf("resolve allowed NFS client %q: %w", entry, err)
}
if len(ips) == 0 {
return nil, fmt.Errorf("resolve allowed NFS client %q: no addresses", entry)
}
authorizer.exact[entry] = struct{}{}
for _, ip := range ips {
if ip == nil {
continue
}
authorizer.exact[ip.String()] = struct{}{}
}
authorizer.enabled = true
}
return authorizer, nil
}
func (a *clientAuthorizer) isAllowedConn(conn net.Conn) bool {
if conn == nil {
return true
}
return a.isAllowedAddr(conn.RemoteAddr())
}
func (a *clientAuthorizer) isAllowedAddr(addr net.Addr) bool {
if a == nil || !a.enabled {
return true
}
if addr == nil {
return false
}
host := remoteHost(addr.String())
if host == "" {
return false
}
if _, found := a.exact[host]; found {
return true
}
ip := normalizeClientIP(host)
if ip == nil {
return false
}
if _, found := a.exact[ip.String()]; found {
return true
}
for _, network := range a.cidrs {
if network.Contains(ip) {
return true
}
}
return false
}
func remoteHost(remote string) string {
host, _, err := net.SplitHostPort(strings.TrimSpace(remote))
if err == nil {
return host
}
host = strings.TrimSpace(remote)
if strings.HasPrefix(host, "[") && strings.HasSuffix(host, "]") {
host = host[1 : len(host)-1]
}
return host
}
func normalizeClientIP(host string) net.IP {
host = strings.TrimSpace(host)
if zoneIndex := strings.LastIndex(host, "%"); zoneIndex >= 0 {
host = host[:zoneIndex]
}
return net.ParseIP(host)
}
type allowlistListener struct {
net.Listener
authorizer *clientAuthorizer
}
func (l *allowlistListener) Accept() (net.Conn, error) {
for {
conn, err := l.Listener.Accept()
if err != nil {
return nil, err
}
if l.authorizer == nil || l.authorizer.isAllowedConn(conn) {
return conn, nil
}
glog.V(0).Infof("reject unauthorized nfs client %s", conn.RemoteAddr())
_ = conn.Close()
}
}
-29
View File
@@ -1,29 +0,0 @@
package nfs
import (
"net"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestClientAuthorizerResolvesHostnameEntries(t *testing.T) {
ips, err := net.LookupIP("localhost")
require.NoError(t, err)
require.NotEmpty(t, ips)
authorizer, err := newClientAuthorizer([]string{"localhost"})
require.NoError(t, err)
matched := false
for _, ip := range ips {
if authorizer.isAllowedAddr(&net.TCPAddr{IP: ip, Port: 2049}) {
matched = true
break
}
}
assert.True(t, matched)
assert.False(t, authorizer.isAllowedAddr(&net.TCPAddr{IP: net.ParseIP("192.0.2.10"), Port: 2049}))
}
-251
View File
@@ -1,251 +0,0 @@
package nfs
import (
"context"
"encoding/binary"
"errors"
"fmt"
"hash/crc32"
"strings"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
"google.golang.org/grpc"
)
const (
fileHandleVersion = 1
fileHandleLength = 28
)
var (
ErrInvalidHandle = errors.New("invalid nfs filehandle")
ErrHandleExportMismatch = errors.New("nfs filehandle export mismatch")
ErrStaleHandle = errors.New("stale nfs filehandle")
)
type FileHandleKind uint8
const (
FileHandleKindUnknown FileHandleKind = 0
FileHandleKindFile FileHandleKind = 1
FileHandleKindDirectory FileHandleKind = 2
)
type FileHandle struct {
Kind FileHandleKind
ExportID uint32
Inode uint64
Generation uint64
}
type filerResolverClient interface {
KvGet(ctx context.Context, in *filer_pb.KvGetRequest, opts ...grpc.CallOption) (*filer_pb.KvGetResponse, error)
LookupDirectoryEntry(ctx context.Context, in *filer_pb.LookupDirectoryEntryRequest, opts ...grpc.CallOption) (*filer_pb.LookupDirectoryEntryResponse, error)
}
type Resolver struct {
exportRoot util.FullPath
exportID uint32
client filerResolverClient
}
type ResolvedHandle struct {
Handle FileHandle
Path util.FullPath
Entry *filer_pb.Entry
}
func NewFileHandle(exportID uint32, kind FileHandleKind, inode, generation uint64) FileHandle {
if generation == 0 {
generation = filer.InodeIndexInitialGeneration
}
return FileHandle{
Kind: kind,
ExportID: exportID,
Inode: inode,
Generation: generation,
}
}
func (h FileHandle) Encode() []byte {
buf := make([]byte, fileHandleLength)
buf[0] = fileHandleVersion
buf[1] = byte(h.Kind)
binary.BigEndian.PutUint32(buf[4:8], h.ExportID)
binary.BigEndian.PutUint64(buf[8:16], h.Inode)
binary.BigEndian.PutUint64(buf[16:24], h.Generation)
binary.BigEndian.PutUint32(buf[24:28], crc32.ChecksumIEEE(buf[:24]))
return buf
}
func DecodeFileHandle(raw []byte) (FileHandle, error) {
if len(raw) != fileHandleLength {
return FileHandle{}, fmt.Errorf("%w: unexpected length %d", ErrInvalidHandle, len(raw))
}
if raw[0] != fileHandleVersion {
return FileHandle{}, fmt.Errorf("%w: unsupported version %d", ErrInvalidHandle, raw[0])
}
wantChecksum := binary.BigEndian.Uint32(raw[24:28])
gotChecksum := crc32.ChecksumIEEE(raw[:24])
if wantChecksum != gotChecksum {
return FileHandle{}, fmt.Errorf("%w: checksum mismatch", ErrInvalidHandle)
}
handle := FileHandle{
Kind: FileHandleKind(raw[1]),
ExportID: binary.BigEndian.Uint32(raw[4:8]),
Inode: binary.BigEndian.Uint64(raw[8:16]),
Generation: binary.BigEndian.Uint64(raw[16:24]),
}
if handle.Generation == 0 {
return FileHandle{}, fmt.Errorf("%w: empty generation", ErrInvalidHandle)
}
return handle, nil
}
func NewResolver(exportRoot util.FullPath, client filerResolverClient) *Resolver {
root := normalizeExportRoot(exportRoot)
return &Resolver{
exportRoot: root,
exportID: exportIDForRoot(root),
client: client,
}
}
func (r *Resolver) ExportID() uint32 {
if r == nil {
return 0
}
return r.exportID
}
func (r *Resolver) ResolveHandle(ctx context.Context, raw []byte) (*ResolvedHandle, error) {
if r == nil || r.client == nil {
return nil, errors.New("nfs resolver is not configured")
}
handle, err := DecodeFileHandle(raw)
if err != nil {
return nil, err
}
if handle.ExportID != r.exportID {
return nil, ErrHandleExportMismatch
}
if handle.Inode == 0 {
return r.resolveSyntheticRoot(ctx, handle)
}
kvResp, err := r.client.KvGet(ctx, &filer_pb.KvGetRequest{Key: filer.InodeIndexKey(handle.Inode)})
if err != nil {
return nil, err
}
if kvResp.GetError() != "" {
return nil, errors.New(kvResp.GetError())
}
if len(kvResp.GetValue()) == 0 {
return nil, ErrStaleHandle
}
record, err := filer.DecodeInodeIndexRecord(kvResp.GetValue())
if err != nil {
return nil, err
}
if record.Generation != handle.Generation {
return nil, ErrStaleHandle
}
for _, path := range record.FullPaths() {
if !pathVisibleFromExport(path, r.exportRoot) {
continue
}
dir, name := path.DirAndName()
lookupResp, lookupErr := r.client.LookupDirectoryEntry(ctx, &filer_pb.LookupDirectoryEntryRequest{
Directory: dir,
Name: name,
})
if isLookupNotFound(lookupErr) || lookupResp == nil || lookupResp.Entry == nil {
continue
}
if lookupErr != nil {
return nil, lookupErr
}
if attrs := lookupResp.Entry.Attributes; attrs != nil && attrs.Inode != 0 && attrs.Inode != handle.Inode {
continue
}
if handle.Kind == FileHandleKindDirectory && !lookupResp.Entry.IsDirectory {
continue
}
if handle.Kind == FileHandleKindFile && lookupResp.Entry.IsDirectory {
continue
}
return &ResolvedHandle{
Handle: handle,
Path: path,
Entry: lookupResp.Entry,
}, nil
}
return nil, ErrStaleHandle
}
func (r *Resolver) resolveSyntheticRoot(ctx context.Context, handle FileHandle) (*ResolvedHandle, error) {
if handle.Kind != FileHandleKindDirectory || handle.Generation != filer.InodeIndexInitialGeneration {
return nil, ErrStaleHandle
}
dir, name := r.exportRoot.DirAndName()
lookupResp, err := r.client.LookupDirectoryEntry(ctx, &filer_pb.LookupDirectoryEntryRequest{
Directory: dir,
Name: name,
})
if isLookupNotFound(err) {
return &ResolvedHandle{
Handle: handle,
Path: r.exportRoot,
Entry: syntheticRootEntry(),
}, nil
}
if err != nil {
return nil, err
}
if lookupResp == nil || lookupResp.Entry == nil {
return &ResolvedHandle{
Handle: handle,
Path: r.exportRoot,
Entry: syntheticRootEntry(),
}, nil
}
return &ResolvedHandle{
Handle: handle,
Path: r.exportRoot,
Entry: lookupResp.Entry,
}, nil
}
func normalizeExportRoot(root util.FullPath) util.FullPath {
if normalized := util.NormalizePath(string(root)); normalized != "" {
return normalized
}
return "/"
}
func exportIDForRoot(root util.FullPath) uint32 {
return crc32.ChecksumIEEE([]byte(normalizeExportRoot(root)))
}
func pathVisibleFromExport(path, exportRoot util.FullPath) bool {
return path == exportRoot || path.IsUnder(exportRoot)
}
func isLookupNotFound(err error) bool {
if err == nil {
return false
}
return err == filer_pb.ErrNotFound || strings.Contains(err.Error(), filer_pb.ErrNotFound.Error())
}
-182
View File
@@ -1,182 +0,0 @@
package nfs
import (
"context"
"testing"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"google.golang.org/grpc"
)
type fakeResolverClient struct {
kv map[string][]byte
entries map[util.FullPath]*filer_pb.Entry
}
func (f *fakeResolverClient) KvGet(_ context.Context, in *filer_pb.KvGetRequest, _ ...grpc.CallOption) (*filer_pb.KvGetResponse, error) {
if value, found := f.kv[string(in.Key)]; found {
return &filer_pb.KvGetResponse{Value: value}, nil
}
return &filer_pb.KvGetResponse{}, nil
}
func (f *fakeResolverClient) LookupDirectoryEntry(_ context.Context, in *filer_pb.LookupDirectoryEntryRequest, _ ...grpc.CallOption) (*filer_pb.LookupDirectoryEntryResponse, error) {
fullPath := util.NewFullPath(in.Directory, in.Name)
if entry, found := f.entries[fullPath]; found {
return &filer_pb.LookupDirectoryEntryResponse{Entry: entry}, nil
}
return nil, filer_pb.ErrNotFound
}
func TestFileHandleEncodeDecodeRoundTrip(t *testing.T) {
handle := NewFileHandle(1234, FileHandleKindDirectory, 5678, 9)
raw := handle.Encode()
decoded, err := DecodeFileHandle(raw)
require.NoError(t, err)
assert.Equal(t, handle, decoded)
raw[len(raw)-1] ^= 0xff
_, err = DecodeFileHandle(raw)
require.ErrorIs(t, err, ErrInvalidHandle)
}
func TestResolverUsesPathVisibleFromExportRoot(t *testing.T) {
client := &fakeResolverClient{
kv: make(map[string][]byte),
entries: make(map[util.FullPath]*filer_pb.Entry),
}
resolver := NewResolver("/exports", client)
record := &filer.InodeIndexRecord{
Generation: 7,
Paths: []string{"/a/other.txt", "/exports/demo/link.txt"},
}
value, err := record.Encode()
require.NoError(t, err)
client.kv[string(filer.InodeIndexKey(101))] = value
client.entries["/exports/demo/link.txt"] = &filer_pb.Entry{
Name: "link.txt",
Attributes: &filer_pb.FuseAttributes{
Inode: 101,
},
}
handle := NewFileHandle(resolver.ExportID(), FileHandleKindFile, 101, 7)
resolved, err := resolver.ResolveHandle(context.Background(), handle.Encode())
require.NoError(t, err)
assert.Equal(t, util.FullPath("/exports/demo/link.txt"), resolved.Path)
require.NotNil(t, resolved.Entry)
assert.Equal(t, uint64(101), resolved.Entry.Attributes.Inode)
}
func TestResolverRejectsGenerationMismatch(t *testing.T) {
client := &fakeResolverClient{
kv: make(map[string][]byte),
entries: make(map[util.FullPath]*filer_pb.Entry),
}
resolver := NewResolver("/", client)
record := &filer.InodeIndexRecord{
Generation: 3,
Paths: []string{"/data/file.txt"},
}
value, err := record.Encode()
require.NoError(t, err)
client.kv[string(filer.InodeIndexKey(44))] = value
client.entries["/data/file.txt"] = &filer_pb.Entry{
Name: "file.txt",
Attributes: &filer_pb.FuseAttributes{
Inode: 44,
},
}
handle := NewFileHandle(resolver.ExportID(), FileHandleKindFile, 44, 4)
_, err = resolver.ResolveHandle(context.Background(), handle.Encode())
require.ErrorIs(t, err, ErrStaleHandle)
}
func TestResolverKeepsHandleValidAcrossRename(t *testing.T) {
client := &fakeResolverClient{
kv: make(map[string][]byte),
entries: make(map[util.FullPath]*filer_pb.Entry),
}
resolver := NewResolver("/exports", client)
record := &filer.InodeIndexRecord{
Generation: 5,
Paths: []string{"/exports/new-name.txt"},
}
value, err := record.Encode()
require.NoError(t, err)
client.kv[string(filer.InodeIndexKey(88))] = value
client.entries["/exports/new-name.txt"] = &filer_pb.Entry{
Name: "new-name.txt",
Attributes: &filer_pb.FuseAttributes{
Inode: 88,
},
}
handle := NewFileHandle(resolver.ExportID(), FileHandleKindFile, 88, 5)
resolved, err := resolver.ResolveHandle(context.Background(), handle.Encode())
require.NoError(t, err)
assert.Equal(t, util.FullPath("/exports/new-name.txt"), resolved.Path)
require.NotNil(t, resolved.Entry)
assert.Equal(t, uint64(88), resolved.Entry.Attributes.Inode)
}
func TestResolverRejectsHandleAfterDeleteRecreateWithNewInode(t *testing.T) {
client := &fakeResolverClient{
kv: make(map[string][]byte),
entries: make(map[util.FullPath]*filer_pb.Entry),
}
resolver := NewResolver("/exports", client)
client.entries["/exports/file.txt"] = &filer_pb.Entry{
Name: "file.txt",
Attributes: &filer_pb.FuseAttributes{
Inode: 999,
},
}
record := &filer.InodeIndexRecord{
Generation: 4,
Paths: []string{"/exports/file.txt"},
}
value, err := record.Encode()
require.NoError(t, err)
client.kv[string(filer.InodeIndexKey(77))] = value
handle := NewFileHandle(resolver.ExportID(), FileHandleKindFile, 77, 4)
_, err = resolver.ResolveHandle(context.Background(), handle.Encode())
require.ErrorIs(t, err, ErrStaleHandle)
}
func TestResolverSupportsSyntheticRootHandle(t *testing.T) {
client := &fakeResolverClient{
kv: make(map[string][]byte),
entries: make(map[util.FullPath]*filer_pb.Entry),
}
resolver := NewResolver("/", client)
handle := NewFileHandle(resolver.ExportID(), FileHandleKindDirectory, 0, filer.InodeIndexInitialGeneration)
resolved, err := resolver.ResolveHandle(context.Background(), handle.Encode())
require.NoError(t, err)
assert.Equal(t, util.FullPath("/"), resolved.Path)
require.NotNil(t, resolved.Entry)
assert.True(t, resolved.Entry.IsDirectory)
}
func TestNewServerNormalizesExportRootAndExportID(t *testing.T) {
server, err := NewServer(&Option{
FilerRootPath: "/export/path/",
Port: 2049,
})
require.NoError(t, err)
assert.Equal(t, util.FullPath("/export/path"), server.exportRoot)
assert.Equal(t, exportIDForRoot("/export/path"), server.exportID)
}
File diff suppressed because it is too large Load Diff
-182
View File
@@ -1,182 +0,0 @@
package nfs
import (
"context"
"net"
"os"
"strings"
billy "github.com/go-git/go-billy/v5"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
gonfs "github.com/willscott/go-nfs"
)
type Handler struct {
server *Server
rootFS *seaweedFileSystem
}
var _ gonfs.Handler = (*Handler)(nil)
func (h *Handler) Mount(ctx context.Context, conn net.Conn, req gonfs.MountRequest) (gonfs.MountStatus, billy.Filesystem, []gonfs.AuthFlavor) {
if h.server.clientAuthorizer != nil && !h.server.clientAuthorizer.isAllowedConn(conn) {
return gonfs.MountStatusErrAcces, nil, []gonfs.AuthFlavor{gonfs.AuthFlavorNull}
}
fs, status := h.resolveMountFilesystem(ctx, string(req.Dirpath))
if status != gonfs.MountStatusOk {
return status, nil, []gonfs.AuthFlavor{gonfs.AuthFlavorNull}
}
return gonfs.MountStatusOk, fs, []gonfs.AuthFlavor{gonfs.AuthFlavorNull, gonfs.AuthFlavorUnix}
}
// resolveMountFilesystem resolves the MOUNT3 dirpath to a filesystem:
// exact match serves the export root; a path strictly under the export
// is mounted at that subdirectory (NoEnt/NotDir if missing or not a
// directory); anything else falls back to the export root with an INFO
// log. The UDP MOUNT path mirrors this in mount_udp.go.
func (h *Handler) resolveMountFilesystem(ctx context.Context, requestedPath string) (*seaweedFileSystem, gonfs.MountStatus) {
requested := normalizeExportRoot(util.FullPath(requestedPath))
// Exact match and outside-export both fall back to the export root.
// Only the second case logs; the first is the boring common path.
if requested == h.server.exportRoot || !requested.IsUnder(h.server.exportRoot) {
if requested != h.server.exportRoot {
glog.V(0).Infof("nfs mount: client requested %q (outside export %q); serving configured export", requestedPath, h.server.exportRoot)
}
return h.rootFS, h.lstatExportStatus(ctx)
}
entry, err := h.lookupSubexportEntry(ctx, requested)
switch {
case err != nil && isLookupNotFound(err):
return nil, gonfs.MountStatusErrNoEnt
case err != nil:
glog.Errorf("nfs mount: lookup %q under export %q failed: %v", requested, h.server.exportRoot, err)
return nil, gonfs.MountStatusErrServerFault
case entry == nil:
return nil, gonfs.MountStatusErrNoEnt
case !entry.IsDirectory:
return nil, gonfs.MountStatusErrNotDir
}
glog.V(1).Infof("nfs mount: client requested %q under export %q; mounting at subdirectory", requestedPath, h.server.exportRoot)
return newSeaweedFileSystem(h.server, requested, h.server.sharedReaderCache), gonfs.MountStatusOk
}
func (h *Handler) lstatExportStatus(ctx context.Context) gonfs.MountStatus {
if _, err := h.rootFS.fileInfoForVirtualPath(ctx, "/"); err != nil {
if os.IsNotExist(err) {
return gonfs.MountStatusErrNoEnt
}
return gonfs.MountStatusErrServerFault
}
return gonfs.MountStatusOk
}
func (h *Handler) lookupSubexportEntry(ctx context.Context, p util.FullPath) (*filer_pb.Entry, error) {
var entry *filer_pb.Entry
err := h.server.withInternalClient(false, func(client nfsFilerClient) error {
dir, name := p.DirAndName()
resp, lerr := client.LookupDirectoryEntry(ctx, &filer_pb.LookupDirectoryEntryRequest{
Directory: dir,
Name: name,
})
if lerr != nil {
return lerr
}
if resp != nil {
entry = resp.Entry
}
return nil
})
return entry, err
}
func (h *Handler) Change(filesystem billy.Filesystem) billy.Change {
if h.server != nil && h.server.option != nil && h.server.option.ReadOnly {
return nil
}
if changer, ok := filesystem.(billy.Change); ok {
return changer
}
return nil
}
func (h *Handler) FSStat(ctx context.Context, _ billy.Filesystem, stat *gonfs.FSStat) error {
return h.server.withInternalClient(false, func(client nfsFilerClient) error {
resp, err := client.Statistics(ctx, &filer_pb.StatisticsRequest{})
if err != nil {
return err
}
if resp == nil {
return nil
}
stat.TotalSize = resp.TotalSize
if resp.TotalSize >= resp.UsedSize {
stat.FreeSize = resp.TotalSize - resp.UsedSize
stat.AvailableSize = resp.TotalSize - resp.UsedSize
}
stat.TotalFiles = resp.FileCount
return nil
})
}
func (h *Handler) ToHandle(filesystem billy.Filesystem, path []string) []byte {
fs, ok := filesystem.(*seaweedFileSystem)
if !ok {
fs = h.rootFS
}
info, err := fs.fileInfoForVirtualPath(context.Background(), fs.Join(path...))
if err != nil {
return nil
}
inode := info.entry.GetAttributes().GetInode()
if inode == 0 && info.actualPath == h.server.exportRoot && info.entry.IsDirectory {
return NewFileHandle(h.server.exportID, FileHandleKindDirectory, 0, filer.InodeIndexInitialGeneration).Encode()
}
return NewFileHandle(h.server.exportID, fileHandleKindForEntry(info.entry), inode, info.generation).Encode()
}
func (h *Handler) FromHandle(raw []byte) (billy.Filesystem, []string, error) {
var resolved *ResolvedHandle
err := h.server.withInternalClient(false, func(client nfsFilerClient) error {
var resolveErr error
resolved, resolveErr = NewResolver(h.server.exportRoot, client).ResolveHandle(context.Background(), raw)
return resolveErr
})
if err != nil {
return nil, nil, err
}
if resolved.Path == h.server.exportRoot {
return h.rootFS, nil, nil
}
if !pathVisibleFromExport(resolved.Path, h.server.exportRoot) {
return nil, nil, ErrHandleExportMismatch
}
relativePath := string(resolved.Path)
if h.server.exportRoot != "/" {
relativePath = strings.TrimPrefix(relativePath, string(h.server.exportRoot))
}
return h.rootFS, util.NormalizePath(relativePath).Split(), nil
}
func (h *Handler) InvalidateHandle(billy.Filesystem, []byte) error {
return nil
}
func (h *Handler) HandleLimit() int {
return h.server.handleLimit
}
func fileHandleKindForEntry(entry *filer_pb.Entry) FileHandleKind {
if entry != nil && entry.IsDirectory {
return FileHandleKindDirectory
}
return FileHandleKindFile
}
-880
View File
@@ -1,880 +0,0 @@
package nfs
import (
"bytes"
"context"
"crypto/md5"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"math/rand"
"mime/multipart"
"net"
"net/http"
"net/http/httptest"
"path"
"strconv"
"strings"
"sync"
"testing"
"time"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
"github.com/seaweedfs/seaweedfs/weed/wdclient"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
gonfs "github.com/willscott/go-nfs"
nfsclient "github.com/willscott/go-nfs-client/nfs"
"github.com/willscott/go-nfs-client/nfs/rpc"
"github.com/willscott/go-nfs-client/nfs/xdr"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
)
type fakeVolumeBlob struct {
data []byte
contentEncoding string
}
type fakeVolumeServer struct {
mu sync.Mutex
blobs map[string]fakeVolumeBlob
server *httptest.Server
}
type fakeVolumeControlPlane struct {
filer_pb.UnimplementedSeaweedFilerServer
mu sync.Mutex
host string
nextID int
assigns []*filer_pb.AssignVolumeRequest
lookups []*filer_pb.LookupVolumeRequest
}
var initIntegrationHTTPClient sync.Once
const nfsProc3Link = 15
func newFakeVolumeServer(t *testing.T) *fakeVolumeServer {
t.Helper()
fake := &fakeVolumeServer{
blobs: make(map[string]fakeVolumeBlob),
}
fake.server = httptest.NewServer(http.HandlerFunc(fake.serveHTTP))
t.Cleanup(fake.server.Close)
return fake
}
func (f *fakeVolumeServer) host() string {
return strings.TrimPrefix(f.server.URL, "http://")
}
func (f *fakeVolumeServer) serveHTTP(w http.ResponseWriter, r *http.Request) {
fileID := strings.TrimPrefix(r.URL.Path, "/")
if fileID == "" {
http.NotFound(w, r)
return
}
switch r.Method {
case http.MethodPost:
part, err := firstMultipartFile(r)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
defer part.Close()
data, err := io.ReadAll(part)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
contentEncoding := part.Header.Get("Content-Encoding")
sum := md5.Sum(data)
f.mu.Lock()
f.blobs[fileID] = fakeVolumeBlob{
data: bytes.Clone(data),
contentEncoding: contentEncoding,
}
f.mu.Unlock()
w.Header().Set("Content-MD5", base64.StdEncoding.EncodeToString(sum[:]))
w.Header().Set("ETag", `"`+base64.StdEncoding.EncodeToString(sum[:])+`"`)
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(map[string]any{
"name": path.Base(fileID),
"size": len(data),
})
case http.MethodGet:
f.mu.Lock()
blob, found := f.blobs[fileID]
f.mu.Unlock()
if !found {
http.NotFound(w, r)
return
}
if blob.contentEncoding != "" {
w.Header().Set("Content-Encoding", blob.contentEncoding)
}
http.ServeContent(w, r, fileID, time.Unix(0, 0), bytes.NewReader(blob.data))
default:
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
}
}
func firstMultipartFile(r *http.Request) (*multipart.Part, error) {
reader, err := r.MultipartReader()
if err != nil {
return nil, err
}
for {
part, err := reader.NextPart()
if err == io.EOF {
return nil, io.ErrUnexpectedEOF
}
if err != nil {
return nil, err
}
if part.FormName() == "file" {
return part, nil
}
part.Close()
}
}
func (f *fakeVolumeControlPlane) AssignVolume(_ context.Context, req *filer_pb.AssignVolumeRequest) (*filer_pb.AssignVolumeResponse, error) {
f.mu.Lock()
defer f.mu.Unlock()
f.assigns = append(f.assigns, req)
f.nextID++
fileID := fmt.Sprintf("7,%08x", f.nextID)
return &filer_pb.AssignVolumeResponse{
FileId: fileID,
Count: 1,
Location: &filer_pb.Location{
Url: f.host,
},
}, nil
}
func (f *fakeVolumeControlPlane) LookupVolume(_ context.Context, req *filer_pb.LookupVolumeRequest) (*filer_pb.LookupVolumeResponse, error) {
f.mu.Lock()
f.lookups = append(f.lookups, req)
f.mu.Unlock()
locations := make(map[string]*filer_pb.Locations, len(req.GetVolumeIds()))
for _, volumeID := range req.GetVolumeIds() {
locations[volumeID] = &filer_pb.Locations{
Locations: []*filer_pb.Location{
{Url: f.host},
},
}
}
return &filer_pb.LookupVolumeResponse{LocationsMap: locations}, nil
}
func startFakeVolumeControlPlane(t *testing.T, controlPlane *fakeVolumeControlPlane) string {
t.Helper()
listener, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
grpcServer := grpc.NewServer()
filer_pb.RegisterSeaweedFilerServer(grpcServer, controlPlane)
done := make(chan error, 1)
go func() {
done <- grpcServer.Serve(listener)
}()
t.Cleanup(func() {
grpcServer.Stop()
_ = listener.Close()
select {
case err := <-done:
if err != nil && !isClosedNetworkErr(err) {
t.Errorf("fake control plane exited with error: %v", err)
}
case <-time.After(time.Second):
t.Errorf("timed out waiting for fake control plane shutdown")
}
})
return listener.Addr().String()
}
func mountTestTarget(t *testing.T, server *Server) (*nfsclient.Target, func()) {
t.Helper()
listener, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
handler, err := server.newHandler()
require.NoError(t, err)
done := make(chan error, 1)
go func() {
done <- gonfs.Serve(listener, handler)
}()
var client *rpc.Client
for attempt := 0; attempt < 10; attempt++ {
client, err = rpc.DialTCP(listener.Addr().Network(), listener.Addr().String(), false)
if err == nil {
break
}
if attempt == 9 {
require.NoError(t, err)
}
time.Sleep(10 * time.Millisecond)
}
require.NoError(t, err)
mounter := &nfsclient.Mount{Client: client}
target, err := mounter.Mount(string(server.exportRoot), rpc.AuthNull)
require.NoError(t, err)
cleanup := func() {
_ = mounter.Unmount()
client.Close()
_ = listener.Close()
select {
case err := <-done:
if err != nil && !isClosedNetworkErr(err) {
t.Errorf("nfs server exited with error: %v", err)
}
case <-time.After(time.Second):
t.Errorf("timed out waiting for nfs server shutdown")
}
}
return target, cleanup
}
func isClosedNetworkErr(err error) bool {
if err == nil {
return false
}
if strings.Contains(err.Error(), "use of closed network connection") {
return true
}
return strings.Contains(err.Error(), "listener closed")
}
func nfsLink(target *nfsclient.Target, sourceHandle []byte, linkPath string) error {
parentDir, linkName := path.Split(path.Clean(linkPath))
if linkName == "" {
return fmt.Errorf("invalid hard link path %q", linkPath)
}
if parentDir == "" {
parentDir = "/"
}
_, parentHandle, err := target.Lookup(parentDir)
if err != nil {
return err
}
// Field layout matches the go-nfs server's onLink handler
// (vendor: github.com/willscott/go-nfs/nfs_onlink.go), which reads
// DirOpArg + SetFileAttributes + opaque target handle. That wire
// order differs from RFC 1813 §3.3.15 LINK3args {nfs_fh3 file;
// diropargs3 link;} — the go-nfs library is not strictly compliant
// here, and we mirror its layout so the integration test exercises
// the same parser the server uses. Do not reorder fields to match
// the RFC: the test would then fail against a correctly-functioning
// server.
type LinkArgs struct {
rpc.Header
Link nfsclient.Diropargs3
Sattr nfsclient.Sattr3
Target []byte
}
res, err := target.Call(&LinkArgs{
Header: rpc.Header{
Rpcvers: 2,
Prog: nfsclient.Nfs3Prog,
Vers: nfsclient.Nfs3Vers,
Proc: nfsProc3Link,
Cred: rpc.AuthNull,
Verf: rpc.AuthNull,
},
Link: nfsclient.Diropargs3{
FH: parentHandle,
Filename: linkName,
},
Target: sourceHandle,
})
if err != nil {
return err
}
status, err := xdr.ReadUint32(res)
if err != nil {
return err
}
return nfsclient.NFS3Error(status)
}
func TestSeaweedNFSAcceptsAnyMountPathOverRPC(t *testing.T) {
const exportRoot = "/buckets/data"
client := &fakeNFSFilerClient{
entries: map[util.FullPath]*filer_pb.Entry{
"/buckets": testEntry("buckets", true, 100, uint32(0755), nil),
"/buckets/data": testEntry("data", true, 101, uint32(0755), nil),
},
kv: map[string][]byte{
string(filer.InodeIndexKey(100)): testIndexRecord(t, 100, 1, "/buckets"),
string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/buckets/data"),
},
}
server := newTestServer(t, exportRoot, client)
listener, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
handler, err := server.newHandler()
require.NoError(t, err)
serveDone := make(chan error, 1)
go func() {
serveDone <- gonfs.Serve(listener, handler)
}()
t.Cleanup(func() {
_ = listener.Close()
select {
case err := <-serveDone:
if err != nil && !isClosedNetworkErr(err) {
t.Errorf("nfs server exited with error: %v", err)
}
case <-time.After(time.Second):
t.Errorf("timed out waiting for nfs server shutdown")
}
})
dirpaths := []string{
"/",
"/buckets",
"/buckets/other",
"/wrong/path",
exportRoot,
exportRoot + "/",
}
for _, dirpath := range dirpaths {
t.Run(dirpath, func(t *testing.T) {
var rpcClient *rpc.Client
var dialErr error
for attempt := 0; attempt < 10; attempt++ {
rpcClient, dialErr = rpc.DialTCP(listener.Addr().Network(), listener.Addr().String(), false)
if dialErr == nil {
break
}
time.Sleep(10 * time.Millisecond)
}
require.NoError(t, dialErr)
defer rpcClient.Close()
mounter := &nfsclient.Mount{Client: rpcClient}
target, err := mounter.Mount(dirpath, rpc.AuthNull)
require.NoErrorf(t, err, "Mount(%q)", dirpath)
defer target.Close()
entries, err := target.ReadDirPlus("/")
require.NoError(t, err)
assert.Empty(t, entries, "Mount(%q) should land at the empty export root", dirpath)
})
}
}
func TestSeaweedNFSSubexportMountOverRPC(t *testing.T) {
const exportRoot = "/buckets"
client := &fakeNFSFilerClient{
entries: map[util.FullPath]*filer_pb.Entry{
"/buckets": testEntry("buckets", true, 100, uint32(0755), nil),
"/buckets/data": testEntry("data", true, 101, uint32(0755), nil),
"/buckets/data/inner": testEntry("inner", false, 104, uint32(0644), []byte("payload")),
"/buckets/other": testEntry("other", true, 105, uint32(0755), nil),
"/buckets/file.txt": testEntry("file.txt", false, 103, uint32(0644), []byte("hi")),
},
kv: map[string][]byte{
string(filer.InodeIndexKey(100)): testIndexRecord(t, 100, 1, "/buckets"),
string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/buckets/data"),
string(filer.InodeIndexKey(103)): testIndexRecord(t, 103, 1, "/buckets/file.txt"),
string(filer.InodeIndexKey(104)): testIndexRecord(t, 104, 1, "/buckets/data/inner"),
string(filer.InodeIndexKey(105)): testIndexRecord(t, 105, 1, "/buckets/other"),
},
}
server := newTestServer(t, exportRoot, client)
listener, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
handler, err := server.newHandler()
require.NoError(t, err)
serveDone := make(chan error, 1)
go func() {
serveDone <- gonfs.Serve(listener, handler)
}()
t.Cleanup(func() {
_ = listener.Close()
select {
case err := <-serveDone:
if err != nil && !isClosedNetworkErr(err) {
t.Errorf("nfs server exited with error: %v", err)
}
case <-time.After(time.Second):
t.Errorf("timed out waiting for nfs server shutdown")
}
})
dial := func(t *testing.T) *rpc.Client {
t.Helper()
var rpcClient *rpc.Client
var dialErr error
for attempt := 0; attempt < 10; attempt++ {
rpcClient, dialErr = rpc.DialTCP(listener.Addr().Network(), listener.Addr().String(), false)
if dialErr == nil {
break
}
time.Sleep(10 * time.Millisecond)
}
require.NoError(t, dialErr)
t.Cleanup(func() { rpcClient.Close() })
return rpcClient
}
t.Run("mounts_under_export_at_subdirectory", func(t *testing.T) {
mounter := &nfsclient.Mount{Client: dial(t)}
target, err := mounter.Mount("/buckets/data", rpc.AuthNull)
require.NoError(t, err)
defer target.Close()
entries, err := target.ReadDirPlus("/")
require.NoError(t, err)
require.Len(t, entries, 1)
assert.Equal(t, "inner", entries[0].Name())
readFile, err := target.Open("/inner")
require.NoError(t, err)
defer readFile.Close()
data, err := io.ReadAll(readFile)
require.NoError(t, err)
assert.Equal(t, []byte("payload"), data)
})
t.Run("missing_entry_under_export_rejects", func(t *testing.T) {
mounter := &nfsclient.Mount{Client: dial(t)}
_, err := mounter.Mount("/buckets/missing", rpc.AuthNull)
require.Error(t, err)
})
t.Run("regular_file_under_export_rejects", func(t *testing.T) {
mounter := &nfsclient.Mount{Client: dial(t)}
_, err := mounter.Mount("/buckets/file.txt", rpc.AuthNull)
require.Error(t, err)
})
}
func TestSeaweedNFSServesInlineRoundTripOverRPC(t *testing.T) {
client := &fakeNFSFilerClient{
kv: map[string][]byte{
string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"),
},
entries: map[util.FullPath]*filer_pb.Entry{
"/exports": testEntry("exports", true, 101, uint32(0755), nil),
},
}
server := newTestServer(t, "/exports", client)
target, cleanup := mountTestTarget(t, server)
defer cleanup()
defer target.Close()
_, err := target.Mkdir("/docs", 0o755)
require.NoError(t, err)
file, err := target.OpenFile("/docs/note.txt", 0o644)
require.NoError(t, err)
payload := []byte("hello over rpc")
_, err = file.Write(payload)
require.NoError(t, err)
require.NoError(t, file.Close())
readFile, err := target.Open("/docs/note.txt")
require.NoError(t, err)
defer readFile.Close()
data, err := io.ReadAll(readFile)
require.NoError(t, err)
assert.Equal(t, payload, data)
entry := client.entries["/exports/docs/note.txt"]
require.NotNil(t, entry)
assert.Equal(t, payload, entry.Content)
assert.Empty(t, entry.Chunks)
_, beforeRenameHandle, err := target.Lookup("/docs/note.txt")
require.NoError(t, err)
entries, err := target.ReadDirPlus("/docs")
require.NoError(t, err)
require.Len(t, entries, 1)
assert.Equal(t, "note.txt", entries[0].Name())
require.NoError(t, target.Rename("/docs/note.txt", "/docs/final.txt"))
_, err = target.GetAttr(beforeRenameHandle)
require.NoError(t, err)
_, _, err = target.Lookup("/docs/final.txt")
require.NoError(t, err)
_, _, err = target.Lookup("/docs/note.txt")
require.Error(t, err)
require.NoError(t, target.Remove("/docs/final.txt"))
_, _, err = target.Lookup("/docs/final.txt")
require.Error(t, err)
}
func TestSeaweedNFSReadOnlyRejectsMutations(t *testing.T) {
client := &fakeNFSFilerClient{
kv: map[string][]byte{
string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"),
string(filer.InodeIndexKey(202)): testIndexRecord(t, 202, 3, "/exports/existing.txt"),
},
entries: map[util.FullPath]*filer_pb.Entry{
"/exports": testEntry("exports", true, 101, uint32(0755), nil),
"/exports/existing.txt": testEntry("existing.txt", false, 202, uint32(0644), []byte("seed")),
},
}
server := newTestServer(t, "/exports", client)
server.option.ReadOnly = true
target, cleanup := mountTestTarget(t, server)
defer cleanup()
defer target.Close()
_, err := target.OpenFile("/created.txt", 0o644)
require.Error(t, err)
nfsErr, ok := err.(*nfsclient.Error)
require.True(t, ok)
assert.Equal(t, uint32(nfsclient.NFS3ErrROFS), nfsErr.ErrorNum)
file, err := target.Open("/existing.txt")
require.NoError(t, err)
_, err = file.Write([]byte("mutate"))
require.Error(t, err)
nfsErr, ok = err.(*nfsclient.Error)
require.True(t, ok)
assert.Equal(t, uint32(nfsclient.NFS3ErrROFS), nfsErr.ErrorNum)
_ = file.Close()
readFile, err := target.Open("/existing.txt")
require.NoError(t, err)
defer readFile.Close()
data, err := io.ReadAll(readFile)
require.NoError(t, err)
assert.Equal(t, []byte("seed"), data)
}
func TestSeaweedNFSServesSymlinkRoundTripOverRPC(t *testing.T) {
client := &fakeNFSFilerClient{
kv: map[string][]byte{
string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"),
},
entries: map[util.FullPath]*filer_pb.Entry{
"/exports": testEntry("exports", true, 101, uint32(0755), nil),
},
}
server := newTestServer(t, "/exports", client)
target, cleanup := mountTestTarget(t, server)
defer cleanup()
defer target.Close()
file, err := target.OpenFile("/target.txt", 0o644)
require.NoError(t, err)
_, err = file.Write([]byte("payload"))
require.NoError(t, err)
require.NoError(t, file.Close())
require.NoError(t, target.Symlink("target.txt", "/target.link"))
info, _, err := target.Lookup("/target.link")
require.NoError(t, err)
attr, ok := info.(*nfsclient.Fattr)
require.True(t, ok)
assert.Equal(t, uint32(nfsclient.NF3Lnk), attr.Type)
linkFile, err := target.Open("/target.link")
require.NoError(t, err)
defer linkFile.Close()
linkTarget, err := linkFile.Readlink()
require.NoError(t, err)
assert.Equal(t, "target.txt", linkTarget)
entry := client.entries["/exports/target.link"]
require.NotNil(t, entry)
assert.Equal(t, "target.txt", entry.GetAttributes().GetSymlinkTarget())
}
func TestSeaweedNFSServesHardLinkRoundTripOverRPC(t *testing.T) {
client := &fakeNFSFilerClient{
kv: map[string][]byte{
string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"),
},
entries: map[util.FullPath]*filer_pb.Entry{
"/exports": testEntry("exports", true, 101, uint32(0755), nil),
},
}
server := newTestServer(t, "/exports", client)
target, cleanup := mountTestTarget(t, server)
defer cleanup()
defer target.Close()
file, err := target.OpenFile("/source.txt", 0o644)
require.NoError(t, err)
payload := []byte("shared content")
_, err = file.Write(payload)
require.NoError(t, err)
require.NoError(t, file.Close())
_, sourceHandle, err := target.Lookup("/source.txt")
require.NoError(t, err)
require.NoError(t, nfsLink(target, sourceHandle, "/linked.txt"))
sourceInfo, sourceHandle, err := target.Lookup("/source.txt")
require.NoError(t, err)
linkedInfo, linkedHandle, err := target.Lookup("/linked.txt")
require.NoError(t, err)
sourceAttr, ok := sourceInfo.(*nfsclient.Fattr)
require.True(t, ok)
linkAttr, ok := linkedInfo.(*nfsclient.Fattr)
require.True(t, ok)
assert.Equal(t, sourceHandle, linkedHandle)
assert.Equal(t, sourceAttr.Fileid, linkAttr.Fileid)
assert.Equal(t, uint32(2), sourceAttr.Nlink)
assert.Equal(t, uint32(2), linkAttr.Nlink)
linkedFile, err := target.Open("/linked.txt")
require.NoError(t, err)
defer linkedFile.Close()
data, err := io.ReadAll(linkedFile)
require.NoError(t, err)
assert.Equal(t, payload, data)
sourceEntry := client.entries["/exports/source.txt"]
linkedEntry := client.entries["/exports/linked.txt"]
require.NotNil(t, sourceEntry)
require.NotNil(t, linkedEntry)
assert.Equal(t, sourceEntry.GetHardLinkId(), linkedEntry.GetHardLinkId())
assert.Equal(t, int32(2), sourceEntry.GetHardLinkCounter())
assert.Equal(t, int32(2), linkedEntry.GetHardLinkCounter())
require.NoError(t, target.Remove("/source.txt"))
remainingAttr, err := target.GetAttr(sourceHandle)
require.NoError(t, err)
assert.Equal(t, uint32(1), remainingAttr.Nlink)
_, _, err = target.Lookup("/source.txt")
require.Error(t, err)
linkedFile, err = target.Open("/linked.txt")
require.NoError(t, err)
data, err = io.ReadAll(linkedFile)
require.NoError(t, err)
require.NoError(t, linkedFile.Close())
assert.Equal(t, payload, data)
require.NoError(t, target.Remove("/linked.txt"))
_, err = target.GetAttr(linkedHandle)
require.Error(t, err)
nfsErr, ok := err.(*nfsclient.Error)
require.True(t, ok)
assert.Equal(t, uint32(nfsclient.NFS3ErrStale), nfsErr.ErrorNum)
}
func TestSeaweedNFSServesLargeChunkRoundTripOverRPC(t *testing.T) {
initIntegrationHTTPClient.Do(util_http.InitGlobalHttpClient)
client := &fakeNFSFilerClient{
kv: map[string][]byte{
string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"),
},
entries: map[util.FullPath]*filer_pb.Entry{
"/exports": testEntry("exports", true, 101, uint32(0755), nil),
},
}
volumeServer := newFakeVolumeServer(t)
controlPlane := &fakeVolumeControlPlane{host: volumeServer.host()}
controlPlaneAddr := startFakeVolumeControlPlane(t, controlPlane)
_, grpcPortString, err := net.SplitHostPort(controlPlaneAddr)
require.NoError(t, err)
grpcPort, err := strconv.Atoi(grpcPortString)
require.NoError(t, err)
server := newTestServer(t, "/exports", client)
server.option.Filer = pb.NewServerAddressWithGrpcPort(controlPlaneAddr, grpcPort)
server.option.GrpcDialOption = grpc.WithTransportCredentials(insecure.NewCredentials())
if server.filerClient != nil {
server.filerClient.Close()
}
server.filerClient = wdclient.NewFilerClient([]pb.ServerAddress{server.option.Filer}, server.option.GrpcDialOption, "")
server.withFilerClient = func(_ bool, fn func(filer_pb.SeaweedFilerClient) error) error {
conn, err := grpc.NewClient(controlPlaneAddr, grpc.WithTransportCredentials(insecure.NewCredentials()))
if err != nil {
return err
}
defer conn.Close()
return fn(filer_pb.NewSeaweedFilerClient(conn))
}
target, cleanup := mountTestTarget(t, server)
defer cleanup()
defer target.Close()
payload := make([]byte, maxInlineWriteSize+4096)
_, err = rand.New(rand.NewSource(1)).Read(payload)
require.NoError(t, err)
file, err := target.OpenFile("/big.bin", 0o644)
require.NoError(t, err)
_, err = file.Write(payload)
require.NoError(t, err)
require.NoError(t, file.Close())
entry := client.entries["/exports/big.bin"]
require.NotNil(t, entry)
require.Len(t, entry.GetChunks(), 1)
assert.Nil(t, entry.Content)
assert.Equal(t, uint64(len(payload)), entry.GetAttributes().GetFileSize())
readFile, err := target.Open("/big.bin")
require.NoError(t, err)
defer readFile.Close()
data, err := io.ReadAll(readFile)
require.NoError(t, err)
assert.Equal(t, payload, data)
controlPlane.mu.Lock()
defer controlPlane.mu.Unlock()
require.Len(t, controlPlane.assigns, 1)
assert.Equal(t, "/exports/big.bin", controlPlane.assigns[0].GetPath())
assert.NotEmpty(t, controlPlane.lookups)
}
func TestSeaweedNFSRejectsStaleHandleAfterDeleteRecreate(t *testing.T) {
client := &fakeNFSFilerClient{
kv: map[string][]byte{
string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"),
},
entries: map[util.FullPath]*filer_pb.Entry{
"/exports": testEntry("exports", true, 101, uint32(0755), nil),
},
}
server := newTestServer(t, "/exports", client)
target, cleanup := mountTestTarget(t, server)
defer cleanup()
defer target.Close()
file, err := target.OpenFile("/stale.txt", 0o644)
require.NoError(t, err)
_, err = file.Write([]byte("old"))
require.NoError(t, err)
require.NoError(t, file.Close())
_, oldHandle, err := target.Lookup("/stale.txt")
require.NoError(t, err)
require.NoError(t, target.Remove("/stale.txt"))
file, err = target.OpenFile("/stale.txt", 0o644)
require.NoError(t, err)
_, err = file.Write([]byte("new"))
require.NoError(t, err)
require.NoError(t, file.Close())
_, err = target.GetAttr(oldHandle)
require.Error(t, err)
nfsErr, ok := err.(*nfsclient.Error)
require.True(t, ok)
assert.Equal(t, uint32(nfsclient.NFS3ErrStale), nfsErr.ErrorNum)
_, newHandle, err := target.Lookup("/stale.txt")
require.NoError(t, err)
_, err = target.GetAttr(newHandle)
require.NoError(t, err)
}
func TestSeaweedNFSFileHandleSurvivesServerRestart(t *testing.T) {
client := &fakeNFSFilerClient{
kv: map[string][]byte{
string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/exports"),
},
entries: map[util.FullPath]*filer_pb.Entry{
"/exports": testEntry("exports", true, 101, uint32(0755), nil),
},
}
server := newTestServer(t, "/exports", client)
target, cleanup := mountTestTarget(t, server)
file, err := target.OpenFile("/restart.txt", 0o644)
require.NoError(t, err)
payload := []byte("survives restart")
_, err = file.Write(payload)
require.NoError(t, err)
require.NoError(t, file.Close())
_, handle, err := target.Lookup("/restart.txt")
require.NoError(t, err)
target.Close()
cleanup()
restartedServer := newTestServer(t, "/exports", client)
restartedTarget, restartedCleanup := mountTestTarget(t, restartedServer)
defer restartedCleanup()
defer restartedTarget.Close()
attr, err := restartedTarget.GetAttr(handle)
require.NoError(t, err)
assert.Equal(t, uint64(client.entries["/exports/restart.txt"].GetAttributes().GetInode()), attr.Fileid)
_, restartedHandle, err := restartedTarget.Lookup("/restart.txt")
require.NoError(t, err)
assert.Equal(t, handle, restartedHandle)
readFile, err := restartedTarget.Open("/restart.txt")
require.NoError(t, err)
defer readFile.Close()
data, err := io.ReadAll(readFile)
require.NoError(t, err)
assert.Equal(t, payload, data)
}
-88
View File
@@ -1,88 +0,0 @@
package nfs
import (
"context"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"google.golang.org/grpc"
)
type filerClientExecutor func(streamingMode bool, fn func(filer_pb.SeaweedFilerClient) error) error
type internalClientExecutor func(streamingMode bool, fn func(nfsFilerClient) error) error
type nfsListEntriesClient interface {
Recv() (*filer_pb.ListEntriesResponse, error)
}
type nfsSubscribeMetadataClient interface {
Recv() (*filer_pb.SubscribeMetadataResponse, error)
}
type nfsFilerClient interface {
KvGet(ctx context.Context, in *filer_pb.KvGetRequest, opts ...grpc.CallOption) (*filer_pb.KvGetResponse, error)
LookupDirectoryEntry(ctx context.Context, in *filer_pb.LookupDirectoryEntryRequest, opts ...grpc.CallOption) (*filer_pb.LookupDirectoryEntryResponse, error)
ListEntries(ctx context.Context, in *filer_pb.ListEntriesRequest, opts ...grpc.CallOption) (nfsListEntriesClient, error)
SubscribeMetadata(ctx context.Context, in *filer_pb.SubscribeMetadataRequest, opts ...grpc.CallOption) (nfsSubscribeMetadataClient, error)
CreateEntry(ctx context.Context, in *filer_pb.CreateEntryRequest, opts ...grpc.CallOption) (*filer_pb.CreateEntryResponse, error)
UpdateEntry(ctx context.Context, in *filer_pb.UpdateEntryRequest, opts ...grpc.CallOption) (*filer_pb.UpdateEntryResponse, error)
DeleteEntry(ctx context.Context, in *filer_pb.DeleteEntryRequest, opts ...grpc.CallOption) (*filer_pb.DeleteEntryResponse, error)
AtomicRenameEntry(ctx context.Context, in *filer_pb.AtomicRenameEntryRequest, opts ...grpc.CallOption) (*filer_pb.AtomicRenameEntryResponse, error)
Statistics(ctx context.Context, in *filer_pb.StatisticsRequest, opts ...grpc.CallOption) (*filer_pb.StatisticsResponse, error)
}
type grpcNFSFilerClient struct {
client filer_pb.SeaweedFilerClient
}
func (c grpcNFSFilerClient) KvGet(ctx context.Context, in *filer_pb.KvGetRequest, opts ...grpc.CallOption) (*filer_pb.KvGetResponse, error) {
return c.client.KvGet(ctx, in, opts...)
}
func (c grpcNFSFilerClient) LookupDirectoryEntry(ctx context.Context, in *filer_pb.LookupDirectoryEntryRequest, opts ...grpc.CallOption) (*filer_pb.LookupDirectoryEntryResponse, error) {
return c.client.LookupDirectoryEntry(ctx, in, opts...)
}
func (c grpcNFSFilerClient) ListEntries(ctx context.Context, in *filer_pb.ListEntriesRequest, opts ...grpc.CallOption) (nfsListEntriesClient, error) {
return c.client.ListEntries(ctx, in, opts...)
}
func (c grpcNFSFilerClient) SubscribeMetadata(ctx context.Context, in *filer_pb.SubscribeMetadataRequest, opts ...grpc.CallOption) (nfsSubscribeMetadataClient, error) {
return c.client.SubscribeMetadata(ctx, in, opts...)
}
func (c grpcNFSFilerClient) CreateEntry(ctx context.Context, in *filer_pb.CreateEntryRequest, opts ...grpc.CallOption) (*filer_pb.CreateEntryResponse, error) {
return c.client.CreateEntry(ctx, in, opts...)
}
func (c grpcNFSFilerClient) UpdateEntry(ctx context.Context, in *filer_pb.UpdateEntryRequest, opts ...grpc.CallOption) (*filer_pb.UpdateEntryResponse, error) {
return c.client.UpdateEntry(ctx, in, opts...)
}
func (c grpcNFSFilerClient) DeleteEntry(ctx context.Context, in *filer_pb.DeleteEntryRequest, opts ...grpc.CallOption) (*filer_pb.DeleteEntryResponse, error) {
return c.client.DeleteEntry(ctx, in, opts...)
}
func (c grpcNFSFilerClient) AtomicRenameEntry(ctx context.Context, in *filer_pb.AtomicRenameEntryRequest, opts ...grpc.CallOption) (*filer_pb.AtomicRenameEntryResponse, error) {
return c.client.AtomicRenameEntry(ctx, in, opts...)
}
func (c grpcNFSFilerClient) Statistics(ctx context.Context, in *filer_pb.StatisticsRequest, opts ...grpc.CallOption) (*filer_pb.StatisticsResponse, error) {
return c.client.Statistics(ctx, in, opts...)
}
func newFilerClientExecutor(option *Option, signature int32) filerClientExecutor {
return func(streamingMode bool, fn func(filer_pb.SeaweedFilerClient) error) error {
return pb.WithGrpcClient(streamingMode, signature, func(grpcConnection *grpc.ClientConn) error {
return fn(filer_pb.NewSeaweedFilerClient(grpcConnection))
}, option.Filer.ToGrpcAddress(), false, option.GrpcDialOption)
}
}
func newInternalClientExecutor(option *Option, signature int32) internalClientExecutor {
return func(streamingMode bool, fn func(nfsFilerClient) error) error {
return pb.WithGrpcClient(streamingMode, signature, func(grpcConnection *grpc.ClientConn) error {
return fn(grpcNFSFilerClient{client: filer_pb.NewSeaweedFilerClient(grpcConnection)})
}, option.Filer.ToGrpcAddress(), false, option.GrpcDialOption)
}
}
-147
View File
@@ -1,147 +0,0 @@
package nfs
import (
"context"
"errors"
"io"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
)
type chunkInvalidator interface {
UnCache(fileID string)
}
type metadataInvalidation struct {
path util.FullPath
entry *filer_pb.Entry
}
func (s *Server) runMetadataInvalidationLoop(ctx context.Context) {
if s == nil || s.chunkInvalidator == nil || s.withInternalClient == nil {
return
}
waitTime := time.Second
for ctx.Err() == nil {
err := s.followMetadataStream(ctx)
if err == nil || errors.Is(err, context.Canceled) || ctx.Err() != nil {
return
}
glog.V(0).Infof("retry nfs metadata invalidation stream for %s in %v: %v", s.exportRoot, waitTime, err)
timer := time.NewTimer(waitTime)
select {
case <-ctx.Done():
if !timer.Stop() {
<-timer.C
}
return
case <-timer.C:
}
if waitTime < util.RetryWaitTime {
waitTime += waitTime / 2
}
}
}
func (s *Server) followMetadataStream(ctx context.Context) error {
req := &filer_pb.SubscribeMetadataRequest{
ClientName: "nfs",
PathPrefix: string(s.exportRoot),
ClientId: s.signature,
ClientEpoch: 1,
ClientSupportsBatching: true,
}
return s.withInternalClient(true, func(client nfsFilerClient) error {
stream, err := client.SubscribeMetadata(ctx, req)
if err != nil {
return err
}
for {
resp, err := stream.Recv()
if err == io.EOF {
return nil
}
if err != nil {
return err
}
s.applyMetadataInvalidationResponse(resp)
}
})
}
func (s *Server) applyMetadataInvalidationResponse(resp *filer_pb.SubscribeMetadataResponse) {
if s == nil || s.chunkInvalidator == nil || resp == nil {
return
}
uncached := make(map[string]struct{})
apply := func(event *filer_pb.SubscribeMetadataResponse) {
for _, invalidation := range metadataInvalidationsForEvent(event) {
if invalidation.entry == nil || !pathVisibleFromExport(invalidation.path, s.exportRoot) {
continue
}
for _, chunk := range invalidation.entry.GetChunks() {
fileID := chunk.GetFileIdString()
if fileID == "" {
continue
}
if _, seen := uncached[fileID]; seen {
continue
}
uncached[fileID] = struct{}{}
s.chunkInvalidator.UnCache(fileID)
}
}
}
apply(resp)
for _, event := range resp.Events {
apply(event)
}
}
func metadataInvalidationsForEvent(resp *filer_pb.SubscribeMetadataResponse) []metadataInvalidation {
message := resp.GetEventNotification()
if message == nil {
return nil
}
var invalidations []metadataInvalidation
if message.OldEntry != nil && message.NewEntry != nil {
oldPath := util.NewFullPath(resp.Directory, message.OldEntry.Name)
invalidations = append(invalidations, metadataInvalidation{path: oldPath, entry: message.OldEntry})
newDir := resp.Directory
if message.NewParentPath != "" {
newDir = message.NewParentPath
}
if message.OldEntry.Name != message.NewEntry.Name || resp.Directory != newDir {
newPath := util.NewFullPath(newDir, message.NewEntry.Name)
invalidations = append(invalidations, metadataInvalidation{path: newPath, entry: message.NewEntry})
}
return invalidations
}
if message.NewEntry != nil {
newDir := resp.Directory
if message.NewParentPath != "" {
newDir = message.NewParentPath
}
newPath := util.NewFullPath(newDir, message.NewEntry.Name)
invalidations = append(invalidations, metadataInvalidation{path: newPath, entry: message.NewEntry})
}
if message.OldEntry != nil {
oldPath := util.NewFullPath(resp.Directory, message.OldEntry.Name)
invalidations = append(invalidations, metadataInvalidation{path: oldPath, entry: message.OldEntry})
}
return invalidations
}
-343
View File
@@ -1,343 +0,0 @@
package nfs
import (
"context"
"encoding/binary"
"fmt"
"net"
"os"
"sync"
"time"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/util"
)
// The upstream willscott/go-nfs library only serves the MOUNT protocol over
// TCP. Linux's mount.nfs and the in-kernel NFS client default `mountproto` to
// UDP in many configurations, so against a stock `weed nfs` deployment the
// kernel queries portmap for "MOUNT v3 UDP", gets port=0 ("not registered"),
// and either falls back inconsistently or surfaces EPROTONOSUPPORT
// ("requested NFS version or transport protocol is not supported"). The user
// either has to add `mountproto=tcp` / `mountport=2049` to their mount
// options or guess that their distro happens to fall back to TCP on its own.
//
// This responder closes that gap. It speaks just enough of MOUNT v3 to handle
// MOUNT_NULL / MOUNT_MNT / MOUNT_UMNT over UDP — the only procedures the
// kernel actually invokes during mount setup and teardown — so plain
// `mount -t nfs <host>:<export> /mnt` works without any client-side protocol
// hints. The protocol layout is intentionally identical to the TCP MOUNT
// handler in handler.go's Mount() so the two paths return the same
// filehandle and the same set of auth flavors for the same export.
//
// References: RFC 1813 §5 (NFSv3/MOUNTv3), RFC 5531 (RPC).
const (
mountUDPMaxRecord = 32 * 1024
// mountUDPRetryBackoff mirrors portmapRetryBackoff so the two
// listening goroutines back off identically under host pressure.
mountUDPRetryBackoff = 50 * time.Millisecond
// mountUDPLookupTimeout bounds any filer round-trip the UDP MOUNT
// path makes (export-root existence check, subexport lookup). The
// UDP serve loop is single-threaded, so a stalled filer call would
// otherwise block every later MOUNT packet.
mountUDPLookupTimeout = 5 * time.Second
mountVersion = 3
mountProcNull = 0
mountProcMnt = 1
mountProcUmnt = 3
// MOUNT v3 status codes (mountstat3 in RFC 1813 §5.1.1).
mnt3StatOK uint32 = 0
mnt3ErrAcces uint32 = 13
mnt3ErrNoEnt uint32 = 2
mnt3ErrNotDir uint32 = 20
mnt3ErrServerFault uint32 = 10006
// XDR opaque length cap for dirpath. RFC 1813 §5.1 limits MNTPATHLEN
// to 1024; cap a bit higher for headroom and reject anything beyond.
mountUDPMaxPathLen = 4096
// AuthFlavor numeric IDs (matches go-nfs and RFC 5531 §8).
authFlavorNull = 0
authFlavorUnix = 1
)
// mountUDPServer answers MOUNT v3 RPCs over UDP. It listens on the same port
// the NFS TCP server uses (2049 by default), since that's what we advertise
// via portmap, and shares the parent Server's exportRoot, exportID, and
// client allowlist so the UDP MOUNT path applies the same access policy as
// the TCP path.
type mountUDPServer struct {
bindIP string
port int
server *Server
udpConn *net.UDPConn
mu sync.Mutex
closed bool
done chan struct{}
wg sync.WaitGroup
}
func newMountUDPServer(bindIP string, port int, server *Server) *mountUDPServer {
return &mountUDPServer{
bindIP: bindIP,
port: port,
server: server,
done: make(chan struct{}),
}
}
func (m *mountUDPServer) Start() error {
addr := net.JoinHostPort(m.bindIP, fmt.Sprintf("%d", m.port))
udpAddr, err := net.ResolveUDPAddr("udp", addr)
if err != nil {
return fmt.Errorf("mount udp resolve %s: %w", addr, err)
}
udpConn, err := net.ListenUDP("udp", udpAddr)
if err != nil {
return fmt.Errorf("mount udp listen %s: %w", addr, err)
}
m.udpConn = udpConn
m.wg.Add(1)
go func() {
defer m.wg.Done()
m.serve()
}()
return nil
}
func (m *mountUDPServer) Close() error {
m.mu.Lock()
if m.closed {
m.mu.Unlock()
return nil
}
m.closed = true
close(m.done)
m.mu.Unlock()
if m.udpConn != nil {
_ = m.udpConn.Close()
}
m.wg.Wait()
return nil
}
func (m *mountUDPServer) isClosed() bool {
m.mu.Lock()
defer m.mu.Unlock()
return m.closed
}
func (m *mountUDPServer) serve() {
buf := make([]byte, mountUDPMaxRecord)
for {
n, addr, err := m.udpConn.ReadFromUDP(buf)
if err != nil {
if m.isClosed() {
return
}
// Transient read failure: log, back off, keep the
// responder alive — same pattern as portmap UDP.
glog.V(1).Infof("mount udp read: %v", err)
select {
case <-m.done:
return
case <-time.After(mountUDPRetryBackoff):
continue
}
}
// Apply the parent server's client allowlist before we even
// look at the RPC bytes, mirroring the TCP path's
// allowlistListener wrapping.
if m.server != nil && m.server.clientAuthorizer != nil && !m.server.clientAuthorizer.isAllowedAddr(addr) {
glog.V(1).Infof("mount udp: rejecting unauthorized client %s", addr)
continue
}
reply := m.handleCall(buf[:n], addr)
if reply == nil {
continue
}
if _, err := m.udpConn.WriteToUDP(reply, addr); err != nil {
glog.V(1).Infof("mount udp write to %s: %v", addr, err)
}
}
}
// handleCall classifies one RPC CALL message and returns the encoded reply,
// or nil if the call is malformed enough to drop silently.
func (m *mountUDPServer) handleCall(callBuf []byte, addr *net.UDPAddr) []byte {
xid, prog, vers, proc, args, err := parseRPCCall(callBuf)
if err != nil {
return nil
}
if prog != mountProgram {
return encodeAcceptedReply(xid, rpcAcceptProgUnavail, nil)
}
if vers != mountVersion {
// Mismatch — advertise the v3..v3 we actually support.
body := make([]byte, 8)
binary.BigEndian.PutUint32(body[0:4], mountVersion)
binary.BigEndian.PutUint32(body[4:8], mountVersion)
return encodeAcceptedReply(xid, rpcAcceptProgMismatch, body)
}
switch proc {
case mountProcNull:
return encodeAcceptedReply(xid, rpcAcceptSuccess, nil)
case mountProcMnt:
return m.handleMount(xid, args, addr)
case mountProcUmnt:
// Stateless server: there's nothing to forget, just acknowledge.
// The client sends back the dirpath in args; we don't need to
// validate it here because UMNT has no return data.
return encodeAcceptedReply(xid, rpcAcceptSuccess, nil)
default:
// MOUNT v3 also defines DUMP / EXPORT / UMNTALL but the kernel
// mount path doesn't invoke them. Returning PROC_UNAVAIL is
// the protocol-correct response.
return encodeAcceptedReply(xid, rpcAcceptProcUnavail, nil)
}
}
// handleMount implements MOUNT v3 MNT. RFC 1813 §5.1.4:
//
// MOUNT3args { dirpath3 dirpath; } // XDR opaque
// MOUNT3res { mountstat3 status; if OK { handle, auth_flavors[] } }
//
// Mirrors Handler.resolveMountFilesystem: exact match returns the
// synthetic root handle; under-export resolves to the subdirectory's
// handle; outside-export falls back to the synthetic root.
func (m *mountUDPServer) handleMount(xid uint32, args []byte, addr *net.UDPAddr) []byte {
if len(args) < 4 {
return encodeAcceptedReply(xid, rpcAcceptGarbageArgs, nil)
}
pathLen := binary.BigEndian.Uint32(args[0:4])
if pathLen > mountUDPMaxPathLen {
return encodeAcceptedReply(xid, rpcAcceptGarbageArgs, nil)
}
padded := (pathLen + 3) &^ 3
if uint32(len(args)) < 4+padded {
return encodeAcceptedReply(xid, rpcAcceptGarbageArgs, nil)
}
dirpath := string(args[4 : 4+pathLen])
requested := normalizeExportRoot(util.FullPath(dirpath))
flavors := []uint32{authFlavorNull, authFlavorUnix}
ctx, cancel := context.WithTimeout(context.Background(), mountUDPLookupTimeout)
defer cancel()
// Exact match and outside-export both fall back to the synthetic root
// handle. Only the second case logs; the first is the common path.
if requested == m.server.exportRoot || !requested.IsUnder(m.server.exportRoot) {
if requested != m.server.exportRoot {
glog.V(0).Infof("mount udp: client %s requested %q (outside export %q); serving configured export", addr, dirpath, m.server.exportRoot)
}
if status := m.rootMountStatus(ctx); status != mnt3StatOK {
return encodeMountStatus(xid, status)
}
return encodeMountSuccess(xid, syntheticRootHandle(m.server), flavors)
}
fh, status := m.resolveSubexportFileHandle(ctx, requested)
if status != mnt3StatOK {
return encodeMountStatus(xid, status)
}
glog.V(1).Infof("mount udp: client %s requested %q under export %q; mounting at subdirectory", addr, dirpath, m.server.exportRoot)
return encodeMountSuccess(xid, fh, flavors)
}
// rootMountStatus is the UDP analogue of Handler.lstatExportStatus:
// confirms the configured export root still exists in the filer so the
// transport-OK branches can't hand out a handle pointing at a deleted
// directory. Reuses the Server's shared rootFS instance so we don't
// construct a wrapper per MOUNT request.
func (m *mountUDPServer) rootMountStatus(ctx context.Context) uint32 {
if m.server.withInternalClient == nil {
return mnt3StatOK
}
switch _, err := m.server.rootFilesystem().fileInfoForVirtualPath(ctx, "/"); {
case err == nil:
return mnt3StatOK
case os.IsNotExist(err):
return mnt3ErrNoEnt
default:
glog.Errorf("mount udp: export root %q lookup failed: %v", m.server.exportRoot, err)
return mnt3ErrServerFault
}
}
// resolveSubexportFileHandle is the UDP analogue of the sub-fs branch in
// Handler.resolveMountFilesystem. The TCP path lets go-nfs's onMount call
// ToHandle on the returned filesystem; UDP encodes the FH itself, so the
// inode/generation lookup happens explicitly here.
//
// The UDP listener is up before serve() runs newHandler(), so a subexport
// MOUNT can land here before sharedReaderCache has been assigned. Resolve
// the rootFS first to drive Server.rootFilesystem's sync.Once and read
// the cache directly off it, so the new sub-fs always shares the same
// reader cache the TCP path uses.
func (m *mountUDPServer) resolveSubexportFileHandle(ctx context.Context, requested util.FullPath) ([]byte, uint32) {
if m.server.withInternalClient == nil {
return nil, mnt3ErrServerFault
}
rootFS := m.server.rootFilesystem()
subFS := newSeaweedFileSystem(m.server, requested, rootFS.readerCache)
info, err := subFS.fileInfoForVirtualPath(ctx, "/")
switch {
case err == nil:
case os.IsNotExist(err):
return nil, mnt3ErrNoEnt
default:
glog.Errorf("mount udp: subexport lookup %q failed: %v", requested, err)
return nil, mnt3ErrServerFault
}
if !info.entry.IsDirectory {
return nil, mnt3ErrNotDir
}
inode := info.entry.GetAttributes().GetInode()
return NewFileHandle(m.server.exportID, FileHandleKindDirectory, inode, info.generation).Encode(), mnt3StatOK
}
func syntheticRootHandle(s *Server) []byte {
return NewFileHandle(s.exportID, FileHandleKindDirectory, 0, filer.InodeIndexInitialGeneration).Encode()
}
// encodeMountStatus returns a MOUNT MNT reply carrying just an error status.
// Per RFC 1813 §5.1.4 a non-OK status terminates the response — no handle or
// flavors follow.
func encodeMountStatus(xid, status uint32) []byte {
body := make([]byte, 4)
binary.BigEndian.PutUint32(body, status)
return encodeAcceptedReply(xid, rpcAcceptSuccess, body)
}
// encodeMountSuccess builds the OK MOUNT MNT reply: status=OK, file handle
// (XDR opaque), and the supported auth_flavors list.
func encodeMountSuccess(xid uint32, handle []byte, flavors []uint32) []byte {
handleLen := uint32(len(handle))
handlePadded := (handleLen + 3) &^ 3
bodyLen := 4 + 4 + handlePadded + 4 + 4*uint32(len(flavors))
body := make([]byte, bodyLen)
binary.BigEndian.PutUint32(body[0:4], mnt3StatOK)
binary.BigEndian.PutUint32(body[4:8], handleLen)
copy(body[8:8+handleLen], handle)
// Trailing pad bytes are already zero from make().
pos := 8 + handlePadded
binary.BigEndian.PutUint32(body[pos:pos+4], uint32(len(flavors)))
pos += 4
for _, fl := range flavors {
binary.BigEndian.PutUint32(body[pos:pos+4], fl)
pos += 4
}
return encodeAcceptedReply(xid, rpcAcceptSuccess, body)
}
-431
View File
@@ -1,431 +0,0 @@
package nfs
import (
"context"
"encoding/binary"
"net"
"testing"
"time"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
gonfs "github.com/willscott/go-nfs"
)
// buildMountCallFrame constructs a MOUNT v3 RPC CALL with an opaque dirpath
// argument. The shape matches RFC 5531 §9: xid + msg_type=CALL + rpcvers=2 +
// prog + vers + proc + cred(AUTH_NONE) + verf(AUTH_NONE) + arg.
func buildMountCallFrame(xid, prog, vers, proc uint32, dirpath string) []byte {
// RPC CALL header (24 bytes) + 2 × AUTH_NONE opaque_auth (16 bytes) +
// dirpath as XDR opaque (4-byte length + padded body).
dpLen := uint32(len(dirpath))
dpPadded := (dpLen + 3) &^ 3
out := make([]byte, 24+16+4+dpPadded)
binary.BigEndian.PutUint32(out[0:4], xid)
binary.BigEndian.PutUint32(out[4:8], rpcMsgCall)
binary.BigEndian.PutUint32(out[8:12], 2) // rpcvers
binary.BigEndian.PutUint32(out[12:16], prog)
binary.BigEndian.PutUint32(out[16:20], vers)
binary.BigEndian.PutUint32(out[20:24], proc)
// cred + verf both AUTH_NONE / length 0 (already zero-filled).
binary.BigEndian.PutUint32(out[40:44], dpLen)
copy(out[44:44+dpLen], dirpath)
return out
}
func newMountUDPTestServer(t *testing.T, exportPath string) (*mountUDPServer, *net.UDPConn) {
t.Helper()
return newMountUDPTestServerWithClient(t, exportPath, nil)
}
// newMountUDPTestServerWithClient wires Server.withInternalClient when
// client is non-nil, so the under-export lookup branch in handleMount
// can find directory entries.
func newMountUDPTestServerWithClient(t *testing.T, exportPath string, client *fakeNFSFilerClient) (*mountUDPServer, *net.UDPConn) {
t.Helper()
exportRoot := normalizeExportRoot(util.FullPath(exportPath))
authz, err := newClientAuthorizer(nil)
if err != nil {
t.Fatal(err)
}
srv := &Server{
option: &Option{},
exportRoot: exportRoot,
exportID: exportIDForRoot(exportRoot),
clientAuthorizer: authz,
}
if client != nil {
srv.withInternalClient = func(_ bool, fn func(nfsFilerClient) error) error {
return fn(client)
}
}
udpAddr, err := net.ResolveUDPAddr("udp", "127.0.0.1:0")
if err != nil {
t.Fatal(err)
}
conn, err := net.ListenUDP("udp", udpAddr)
if err != nil {
t.Fatal(err)
}
m := &mountUDPServer{
bindIP: "127.0.0.1",
port: conn.LocalAddr().(*net.UDPAddr).Port,
server: srv,
udpConn: conn,
done: make(chan struct{}),
}
m.wg.Add(1)
go func() {
defer m.wg.Done()
m.serve()
}()
t.Cleanup(func() {
_ = m.Close()
})
return m, conn
}
func sendMountUDP(t *testing.T, target *net.UDPAddr, payload []byte) []byte {
t.Helper()
c, err := net.DialUDP("udp", nil, target)
if err != nil {
t.Fatal(err)
}
defer c.Close()
if _, err := c.Write(payload); err != nil {
t.Fatal(err)
}
_ = c.SetReadDeadline(time.Now().Add(2 * time.Second))
buf := make([]byte, 4096)
n, err := c.Read(buf)
if err != nil {
t.Fatalf("read reply: %v", err)
}
return buf[:n]
}
// parseRPCReply pulls xid, accept_stat, and the body that follows accept_stat
// out of a MSG_ACCEPTED reply. Unlike the TCP path there is no fragment
// marker — the entire UDP datagram is the reply.
func parseRPCReply(t *testing.T, reply []byte) (xid, acceptStat uint32, body []byte) {
t.Helper()
if len(reply) < 24 {
t.Fatalf("reply too short: %d bytes", len(reply))
}
xid = binary.BigEndian.Uint32(reply[0:4])
if mt := binary.BigEndian.Uint32(reply[4:8]); mt != rpcMsgReply {
t.Fatalf("msg_type=%d want REPLY(1)", mt)
}
if rs := binary.BigEndian.Uint32(reply[8:12]); rs != rpcMsgAccepted {
t.Fatalf("reply_stat=%d want MSG_ACCEPTED(0)", rs)
}
acceptStat = binary.BigEndian.Uint32(reply[20:24])
body = reply[24:]
return
}
func TestMountUDPNullReturnsSuccess(t *testing.T) {
m, conn := newMountUDPTestServer(t, "/exports")
target := conn.LocalAddr().(*net.UDPAddr)
reply := sendMountUDP(t, target, buildMountCallFrame(7, mountProgram, 3, mountProcNull, ""))
xid, astat, body := parseRPCReply(t, reply)
if xid != 7 {
t.Errorf("xid=%d want 7", xid)
}
if astat != rpcAcceptSuccess {
t.Errorf("accept_stat=%d want SUCCESS(0)", astat)
}
if len(body) != 0 {
t.Errorf("NULL reply body should be empty, got %d bytes", len(body))
}
_ = m
}
func TestMountUDPMntReturnsHandleAndFlavors(t *testing.T) {
m, conn := newMountUDPTestServer(t, "/exports")
target := conn.LocalAddr().(*net.UDPAddr)
reply := sendMountUDP(t, target, buildMountCallFrame(42, mountProgram, 3, mountProcMnt, "/exports"))
xid, astat, body := parseRPCReply(t, reply)
if xid != 42 {
t.Errorf("xid=%d want 42", xid)
}
if astat != rpcAcceptSuccess {
t.Fatalf("accept_stat=%d want SUCCESS(0)", astat)
}
if len(body) < 4 {
t.Fatalf("body too short: %d bytes", len(body))
}
status := binary.BigEndian.Uint32(body[0:4])
if status != mnt3StatOK {
t.Fatalf("mountstat3=%d want OK(0)", status)
}
// fhandle3: uint32 length + padded opaque bytes.
if len(body) < 8 {
t.Fatalf("body missing handle length: %d bytes", len(body))
}
handleLen := binary.BigEndian.Uint32(body[4:8])
handlePadded := (handleLen + 3) &^ 3
if uint32(len(body)) < 8+handlePadded+4 {
t.Fatalf("body truncated: have %d, need at least %d", len(body), 8+handlePadded+4)
}
handle := body[8 : 8+handleLen]
if _, err := DecodeFileHandle(handle); err != nil {
t.Fatalf("returned handle does not decode: %v", err)
}
flavorOff := 8 + handlePadded
count := binary.BigEndian.Uint32(body[flavorOff : flavorOff+4])
if count != 2 {
t.Errorf("flavor count=%d want 2 (NULL + UNIX)", count)
}
got := []uint32{
binary.BigEndian.Uint32(body[flavorOff+4 : flavorOff+8]),
binary.BigEndian.Uint32(body[flavorOff+8 : flavorOff+12]),
}
if got[0] != authFlavorNull || got[1] != authFlavorUnix {
t.Errorf("flavors=%v want [%d %d]", got, authFlavorNull, authFlavorUnix)
}
_ = m
}
func TestMountUDPMntAcceptsAnyPath(t *testing.T) {
const exportRoot = "/buckets/data"
_, conn := newMountUDPTestServer(t, exportRoot)
target := conn.LocalAddr().(*net.UDPAddr)
dirpaths := []string{
"/",
"/buckets",
"/buckets/other",
"/wrong/path",
"",
"buckets/data",
exportRoot,
exportRoot + "/",
}
for i, dirpath := range dirpaths {
t.Run(dirpath, func(t *testing.T) {
xid := uint32(1000 + i)
reply := sendMountUDP(t, target, buildMountCallFrame(xid, mountProgram, 3, mountProcMnt, dirpath))
_, astat, body := parseRPCReply(t, reply)
if astat != rpcAcceptSuccess {
t.Fatalf("accept_stat=%d want SUCCESS(0)", astat)
}
if len(body) < 4 {
t.Fatalf("body too short: %d bytes", len(body))
}
if got := binary.BigEndian.Uint32(body[0:4]); got != mnt3StatOK {
t.Errorf("MNT(%q): mountstat3=%d want OK(0)", dirpath, got)
}
if len(body) <= 4 {
t.Errorf("MNT(%q) success body must include handle and flavors", dirpath)
}
})
}
}
func TestMountUDPSubexportMount(t *testing.T) {
const exportRoot = "/buckets"
client := &fakeNFSFilerClient{
entries: map[util.FullPath]*filer_pb.Entry{
"/buckets": testEntry("buckets", true, 100, uint32(0755), nil),
"/buckets/data": testEntry("data", true, 101, uint32(0755), nil),
"/buckets/data/nested": testEntry("nested", true, 102, uint32(0755), nil),
"/buckets/file.txt": testEntry("file.txt", false, 103, uint32(0644), []byte("hi")),
},
kv: map[string][]byte{
string(filer.InodeIndexKey(100)): testIndexRecord(t, 100, 1, "/buckets"),
string(filer.InodeIndexKey(101)): testIndexRecord(t, 101, 1, "/buckets/data"),
string(filer.InodeIndexKey(102)): testIndexRecord(t, 102, 1, "/buckets/data/nested"),
string(filer.InodeIndexKey(103)): testIndexRecord(t, 103, 1, "/buckets/file.txt"),
},
}
m, conn := newMountUDPTestServerWithClient(t, exportRoot, client)
target := conn.LocalAddr().(*net.UDPAddr)
// Build a TCP Handler from the same Server so we can compare the
// raw FH bytes both transports produce for the same subdirectory.
tcpHandler, err := m.server.newHandler()
require.NoError(t, err)
cases := []struct {
name string
dirpath string
wantStatus uint32
wantInode uint64
}{
{name: "subdirectory_one_level", dirpath: "/buckets/data", wantStatus: mnt3StatOK, wantInode: 101},
{name: "subdirectory_two_levels", dirpath: "/buckets/data/nested", wantStatus: mnt3StatOK, wantInode: 102},
{name: "subdirectory_trailing_slash", dirpath: "/buckets/data/", wantStatus: mnt3StatOK, wantInode: 101},
{name: "missing_under_export", dirpath: "/buckets/missing", wantStatus: mnt3ErrNoEnt},
{name: "deep_missing_under_export", dirpath: "/buckets/data/no-such-thing", wantStatus: mnt3ErrNoEnt},
{name: "regular_file_not_directory", dirpath: "/buckets/file.txt", wantStatus: mnt3ErrNotDir},
}
for i, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
xid := uint32(2000 + i)
reply := sendMountUDP(t, target, buildMountCallFrame(xid, mountProgram, 3, mountProcMnt, tc.dirpath))
_, astat, body := parseRPCReply(t, reply)
if astat != rpcAcceptSuccess {
t.Fatalf("accept_stat=%d want SUCCESS(0)", astat)
}
if len(body) < 4 {
t.Fatalf("body too short: %d bytes", len(body))
}
got := binary.BigEndian.Uint32(body[0:4])
if got != tc.wantStatus {
t.Fatalf("MNT(%q) status=%d want %d", tc.dirpath, got, tc.wantStatus)
}
if tc.wantStatus != mnt3StatOK {
if len(body) != 4 {
t.Errorf("MNT(%q) error body should carry only the status; got %d trailing bytes", tc.dirpath, len(body)-4)
}
return
}
if len(body) < 8 {
t.Fatalf("MNT(%q) success body missing handle length", tc.dirpath)
}
handleLen := binary.BigEndian.Uint32(body[4:8])
if uint32(len(body)) < 8+handleLen {
t.Fatalf("MNT(%q) success body truncated", tc.dirpath)
}
udpHandleBytes := body[8 : 8+handleLen]
handle, err := DecodeFileHandle(udpHandleBytes)
if err != nil {
t.Fatalf("MNT(%q) handle decode: %v", tc.dirpath, err)
}
if handle.Inode != tc.wantInode {
t.Errorf("MNT(%q) FH inode=%d want %d", tc.dirpath, handle.Inode, tc.wantInode)
}
if handle.Kind != FileHandleKindDirectory {
t.Errorf("MNT(%q) FH kind=%d want directory", tc.dirpath, handle.Kind)
}
// Transport parity: drive the TCP Handler with the same dirpath
// and confirm the bytes go-nfs's onMount would write match the
// UDP responder's bytes exactly. A regression that drifts the
// generation, exportID, or kind on one transport would fail here.
tcpStatus, tcpFS, _ := tcpHandler.Mount(context.Background(), nil, gonfs.MountRequest{Dirpath: []byte(tc.dirpath)})
require.Equal(t, gonfs.MountStatusOk, tcpStatus, "TCP Mount(%q)", tc.dirpath)
tcpHandleBytes := tcpHandler.ToHandle(tcpFS, nil)
require.NotEmpty(t, tcpHandleBytes, "TCP Mount(%q) ToHandle returned empty", tc.dirpath)
assert.Equal(t, tcpHandleBytes, udpHandleBytes, "TCP/UDP FH bytes diverge for %q", tc.dirpath)
})
}
}
func TestMountUDPRejectsWrongVersion(t *testing.T) {
// Same defence-in-depth as the TCP version filter: don't speak v1/v4
// MOUNT — return PROG_MISMATCH advertising 3..3 so the client knows
// to retry with v3.
_, conn := newMountUDPTestServer(t, "/exports")
target := conn.LocalAddr().(*net.UDPAddr)
reply := sendMountUDP(t, target, buildMountCallFrame(1, mountProgram, 4, mountProcNull, ""))
_, astat, body := parseRPCReply(t, reply)
if astat != rpcAcceptProgMismatch {
t.Fatalf("accept_stat=%d want PROG_MISMATCH(2)", astat)
}
if len(body) != 8 {
t.Fatalf("PROG_MISMATCH body=%d bytes want 8", len(body))
}
low := binary.BigEndian.Uint32(body[0:4])
high := binary.BigEndian.Uint32(body[4:8])
if low != 3 || high != 3 {
t.Errorf("supported range=(%d,%d) want (3,3)", low, high)
}
}
func TestMountUDPRejectsWrongProgram(t *testing.T) {
_, conn := newMountUDPTestServer(t, "/exports")
target := conn.LocalAddr().(*net.UDPAddr)
// 100021 is NLM, which we don't run here.
reply := sendMountUDP(t, target, buildMountCallFrame(1, 100021, 4, mountProcNull, ""))
_, astat, _ := parseRPCReply(t, reply)
if astat != rpcAcceptProgUnavail {
t.Errorf("accept_stat=%d want PROG_UNAVAIL(1)", astat)
}
}
func TestMountUDPUmntAcknowledges(t *testing.T) {
_, conn := newMountUDPTestServer(t, "/exports")
target := conn.LocalAddr().(*net.UDPAddr)
// UMNT carries a dirpath but the server is stateless and ignores it.
reply := sendMountUDP(t, target, buildMountCallFrame(8, mountProgram, 3, mountProcUmnt, "/exports"))
_, astat, body := parseRPCReply(t, reply)
if astat != rpcAcceptSuccess {
t.Errorf("accept_stat=%d want SUCCESS(0)", astat)
}
if len(body) != 0 {
t.Errorf("UMNT reply body should be empty, got %d bytes", len(body))
}
}
func TestMountUDPRejectsTruncatedMntArgs(t *testing.T) {
_, conn := newMountUDPTestServer(t, "/exports")
target := conn.LocalAddr().(*net.UDPAddr)
// Hand-craft an MNT call whose dirpath length field claims 16 bytes
// but no body follows. Using buildMountCallFrame would also emit a
// trailing length=0 from the empty-string default; we need exactly
// "length, no body" so the GARBAGE_ARGS path actually fires.
frame := make([]byte, 24+16+4) // header + auth + 4-byte length only
binary.BigEndian.PutUint32(frame[0:4], 1) // xid
binary.BigEndian.PutUint32(frame[4:8], rpcMsgCall) // msg_type
binary.BigEndian.PutUint32(frame[8:12], 2) // rpcvers
binary.BigEndian.PutUint32(frame[12:16], mountProgram)
binary.BigEndian.PutUint32(frame[16:20], 3) // mount vers
binary.BigEndian.PutUint32(frame[20:24], mountProcMnt)
// auth = two AUTH_NONE / length-0 stanzas (already zero from make).
binary.BigEndian.PutUint32(frame[40:44], 16) // dirpath length=16, no bytes follow
reply := sendMountUDP(t, target, frame)
_, astat, _ := parseRPCReply(t, reply)
if astat != rpcAcceptGarbageArgs {
t.Errorf("accept_stat=%d want GARBAGE_ARGS(4)", astat)
}
}
func TestMountUDPCloseStopsServing(t *testing.T) {
m, conn := newMountUDPTestServer(t, "/exports")
target := conn.LocalAddr().(*net.UDPAddr)
// Sanity: NULL works before close.
_ = sendMountUDP(t, target, buildMountCallFrame(1, mountProgram, 3, mountProcNull, ""))
if err := m.Close(); err != nil {
t.Fatalf("Close: %v", err)
}
// After Close the socket is shut, so a fresh send should fail to
// read a reply within the deadline rather than producing a
// well-formed response.
c, err := net.DialUDP("udp", nil, target)
if err != nil {
// Some platforms refuse the dial outright after Close — that's
// also acceptable: the server is gone either way.
return
}
defer c.Close()
_, _ = c.Write(buildMountCallFrame(2, mountProgram, 3, mountProcNull, ""))
_ = c.SetReadDeadline(time.Now().Add(200 * time.Millisecond))
buf := make([]byte, 1024)
if _, err := c.Read(buf); err == nil {
t.Error("Close should have stopped the responder, but a reply still arrived")
}
}
-447
View File
@@ -1,447 +0,0 @@
package nfs
import (
"encoding/binary"
"errors"
"fmt"
"io"
"net"
"sync"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
)
// Minimal PORTMAP v2 responder.
//
// The upstream willscott/go-nfs library serves NFSv3 and MOUNT on a single TCP
// port and deliberately does not register with portmap (RPC program 100000).
// Linux mount.nfs, however, queries portmap on port 111 before sending the
// MOUNT RPC, so the plain `mount -t nfs host:/export /mnt` command fails
// against a default `weed nfs` deployment.
//
// When enabled, this responder binds the privileged port 111 (RFC 1833) on
// both TCP and UDP and answers the subset of PORTMAP v2 calls that standard
// Linux clients make: PMAP_NULL, PMAP_GETPORT and PMAP_DUMP. It refuses
// registration from third parties (PMAP_SET / PMAP_UNSET return false) and
// only exposes the programs that weed itself serves.
//
// References: RFC 1833 (Portmap v2), RFC 5531 (RPC).
const (
portmapProgram = 100000
portmapVersion = 2
portmapPort = 111
pmapProcNull = 0
pmapProcSet = 1
pmapProcUnset = 2
pmapProcGetPort = 3
pmapProcDump = 4
ipProtoTCP = 6
ipProtoUDP = 17
nfsProgram = 100003
mountProgram = 100005
// RPC
rpcMsgCall = 0
rpcMsgReply = 1
rpcMsgAccepted = 0
rpcAcceptSuccess = 0
rpcAcceptProgUnavail = 1
rpcAcceptProgMismatch = 2
rpcAcceptProcUnavail = 3
rpcAcceptGarbageArgs = 4
rpcAuthNone = 0
// Defensive limits. Portmap messages are tiny in practice; these caps
// protect the responder from large or slow reads.
portmapMaxRecord = 64 * 1024
// Per-connection read/write deadlines on the TCP listener. The idle
// timeout bounds how long we wait for the next request on an otherwise
// quiet connection; the IO timeout bounds a single read or write once
// one is in flight. Both guard against slowloris-style stalls on the
// privileged port 111.
portmapTCPIdleTimeout = 30 * time.Second
portmapTCPIOTimeout = 10 * time.Second
// Back-off applied before retrying after a non-fatal listener error
// (e.g. EMFILE on TCP Accept, or a transient UDP read failure) so we
// don't busy-loop when the host is under pressure.
portmapRetryBackoff = 50 * time.Millisecond
)
type portmapEntry struct {
Program uint32
Version uint32
Protocol uint32
Port uint32
}
type portmapServer struct {
bindIP string
port int
entries []portmapEntry
tcpListener net.Listener
udpConn *net.UDPConn
// mu guards closed and conns. It is held only for bookkeeping, never
// across network IO.
mu sync.Mutex
closed bool
conns map[net.Conn]struct{}
// done is closed exactly once by Close() so that background loops can
// interrupt a retry-backoff sleep instead of waiting it out.
done chan struct{}
wg sync.WaitGroup
}
// newPortmapServer builds a responder advertising the NFS services the caller
// runs on nfsPort. NFS itself is TCP-only here (the upstream go-nfs library
// doesn't speak NFS UDP). MOUNT, however, is served over both TCP (via
// go-nfs) and UDP (via mountUDPServer in mount_udp.go), so we advertise
// both — that's what makes plain `mount -t nfs <host>:<export> /mnt` work
// against Linux clients whose default mountproto is UDP without needing
// mountproto=tcp / mountport=2049 mount options.
func newPortmapServer(bindIP string, port int, nfsPort uint32) *portmapServer {
if port <= 0 {
port = portmapPort
}
return &portmapServer{
bindIP: bindIP,
port: port,
done: make(chan struct{}),
entries: []portmapEntry{
{Program: nfsProgram, Version: 3, Protocol: ipProtoTCP, Port: nfsPort},
{Program: mountProgram, Version: 3, Protocol: ipProtoTCP, Port: nfsPort},
{Program: mountProgram, Version: 3, Protocol: ipProtoUDP, Port: nfsPort},
},
}
}
func (ps *portmapServer) Start() error {
addr := net.JoinHostPort(ps.bindIP, fmt.Sprintf("%d", ps.port))
tcpLn, err := net.Listen("tcp", addr)
if err != nil {
return fmt.Errorf("portmap tcp listen %s: %w", addr, err)
}
udpAddr, err := net.ResolveUDPAddr("udp", addr)
if err != nil {
_ = tcpLn.Close()
return fmt.Errorf("portmap udp resolve %s: %w", addr, err)
}
udpConn, err := net.ListenUDP("udp", udpAddr)
if err != nil {
_ = tcpLn.Close()
return fmt.Errorf("portmap udp listen %s: %w", addr, err)
}
ps.tcpListener = tcpLn
ps.udpConn = udpConn
ps.wg.Add(2)
go func() {
defer ps.wg.Done()
ps.serveTCP()
}()
go func() {
defer ps.wg.Done()
ps.serveUDP()
}()
return nil
}
func (ps *portmapServer) Close() error {
ps.mu.Lock()
if ps.closed {
ps.mu.Unlock()
return nil
}
ps.closed = true
conns := ps.conns
ps.conns = nil
close(ps.done)
ps.mu.Unlock()
var first error
if ps.tcpListener != nil {
if err := ps.tcpListener.Close(); err != nil {
first = err
}
}
if ps.udpConn != nil {
if err := ps.udpConn.Close(); err != nil && first == nil {
first = err
}
}
// Evict in-flight TCP handlers so Close() does not block on idle
// clients; their read goroutines will unwind on the closed conn.
for c := range conns {
_ = c.Close()
}
ps.wg.Wait()
return first
}
func (ps *portmapServer) isClosed() bool {
ps.mu.Lock()
defer ps.mu.Unlock()
return ps.closed
}
// addConn registers c for shutdown eviction. It returns false (and the
// caller must drop c) if the server has already started shutting down.
func (ps *portmapServer) addConn(c net.Conn) bool {
ps.mu.Lock()
defer ps.mu.Unlock()
if ps.closed {
return false
}
if ps.conns == nil {
ps.conns = make(map[net.Conn]struct{})
}
ps.conns[c] = struct{}{}
return true
}
func (ps *portmapServer) removeConn(c net.Conn) {
ps.mu.Lock()
defer ps.mu.Unlock()
delete(ps.conns, c)
}
func (ps *portmapServer) serveTCP() {
for {
conn, err := ps.tcpListener.Accept()
if err != nil {
if ps.isClosed() {
return
}
// Non-fatal (e.g. EMFILE, EINTR): log and back off rather
// than tear the listener down on a transient resource blip.
// Wake early if Close() fires during the sleep.
glog.V(1).Infof("portmap tcp accept: %v", err)
select {
case <-ps.done:
return
case <-time.After(portmapRetryBackoff):
continue
}
}
if !ps.addConn(conn) {
_ = conn.Close()
continue
}
ps.wg.Add(1)
go func(c net.Conn) {
defer ps.wg.Done()
defer ps.removeConn(c)
ps.handleTCPConn(c)
}(conn)
}
}
func (ps *portmapServer) handleTCPConn(conn net.Conn) {
defer conn.Close()
hdr := make([]byte, 4)
for {
_ = conn.SetReadDeadline(time.Now().Add(portmapTCPIdleTimeout))
if _, err := io.ReadFull(conn, hdr); err != nil {
return
}
mark := binary.BigEndian.Uint32(hdr)
// Bit 31: last-fragment flag. Portmap messages are always single
// fragment in practice; drop the connection if we see otherwise.
if mark&(1<<31) == 0 {
return
}
recLen := mark &^ (1 << 31)
if recLen == 0 || recLen > portmapMaxRecord {
return
}
buf := make([]byte, recLen)
_ = conn.SetReadDeadline(time.Now().Add(portmapTCPIOTimeout))
if _, err := io.ReadFull(conn, buf); err != nil {
return
}
reply := ps.handleCall(buf)
if reply == nil {
continue
}
out := make([]byte, 4+len(reply))
binary.BigEndian.PutUint32(out[0:4], uint32(len(reply))|(1<<31))
copy(out[4:], reply)
_ = conn.SetWriteDeadline(time.Now().Add(portmapTCPIOTimeout))
if _, err := conn.Write(out); err != nil {
return
}
}
}
func (ps *portmapServer) serveUDP() {
buf := make([]byte, portmapMaxRecord)
for {
n, addr, err := ps.udpConn.ReadFromUDP(buf)
if err != nil {
if ps.isClosed() {
return
}
// Transient read failure: log, back off, and keep the
// responder alive instead of taking UDP portmap down.
// Wake early if Close() fires during the sleep.
glog.V(1).Infof("portmap udp read: %v", err)
select {
case <-ps.done:
return
case <-time.After(portmapRetryBackoff):
continue
}
}
reply := ps.handleCall(buf[:n])
if reply == nil {
continue
}
if _, err := ps.udpConn.WriteToUDP(reply, addr); err != nil {
glog.V(1).Infof("portmap udp write to %s: %v", addr, err)
}
}
}
// handleCall parses one RPC CALL message and returns the encoded reply, or nil
// if the call is malformed enough that we should drop it silently.
func (ps *portmapServer) handleCall(callBuf []byte) []byte {
xid, prog, vers, proc, args, err := parseRPCCall(callBuf)
if err != nil {
return nil
}
if prog != portmapProgram {
return encodeAcceptedReply(xid, rpcAcceptProgUnavail, nil)
}
if vers != portmapVersion {
// Program-version mismatch: RFC 5531 says we should return the
// accepted range; keep it simple and report 2..2.
body := make([]byte, 8)
binary.BigEndian.PutUint32(body[0:4], portmapVersion)
binary.BigEndian.PutUint32(body[4:8], portmapVersion)
return encodeAcceptedReply(xid, rpcAcceptProgMismatch, body)
}
switch proc {
case pmapProcNull:
return encodeAcceptedReply(xid, rpcAcceptSuccess, nil)
case pmapProcGetPort:
if len(args) < 16 {
return encodeAcceptedReply(xid, rpcAcceptGarbageArgs, nil)
}
q := portmapEntry{
Program: binary.BigEndian.Uint32(args[0:4]),
Version: binary.BigEndian.Uint32(args[4:8]),
Protocol: binary.BigEndian.Uint32(args[8:12]),
}
port := uint32(0)
for _, e := range ps.entries {
if e.Program == q.Program && e.Version == q.Version && e.Protocol == q.Protocol {
port = e.Port
break
}
}
body := make([]byte, 4)
binary.BigEndian.PutUint32(body, port)
return encodeAcceptedReply(xid, rpcAcceptSuccess, body)
case pmapProcDump:
// Each entry is 4-byte value_follows + 16-byte mapping = 20 bytes,
// plus a 4-byte terminator value_follows=FALSE.
body := make([]byte, 0, 20*len(ps.entries)+4)
for _, e := range ps.entries {
chunk := make([]byte, 20)
binary.BigEndian.PutUint32(chunk[0:4], 1) // value_follows = TRUE
binary.BigEndian.PutUint32(chunk[4:8], e.Program)
binary.BigEndian.PutUint32(chunk[8:12], e.Version)
binary.BigEndian.PutUint32(chunk[12:16], e.Protocol)
binary.BigEndian.PutUint32(chunk[16:20], e.Port)
body = append(body, chunk...)
}
end := make([]byte, 4) // value_follows = FALSE
body = append(body, end...)
return encodeAcceptedReply(xid, rpcAcceptSuccess, body)
case pmapProcSet, pmapProcUnset:
// Don't accept third-party registrations. bool=FALSE.
body := make([]byte, 4)
return encodeAcceptedReply(xid, rpcAcceptSuccess, body)
default:
return encodeAcceptedReply(xid, rpcAcceptProcUnavail, nil)
}
}
// parseRPCCall parses the fixed portion of an RPC CALL header and returns the
// remaining procedure arguments. It skips both opaque_auth fields (cred and
// verf) so callers get a buffer starting at the procedure arguments.
func parseRPCCall(buf []byte) (xid, prog, vers, proc uint32, args []byte, err error) {
// Minimum header: xid + msg_type + rpcvers + prog + vers + proc + 2x
// (flavor + len) = 6*4 + 2*8 = 40 bytes.
const minHeader = 40
if len(buf) < minHeader {
err = fmt.Errorf("rpc call too short: %d bytes", len(buf))
return
}
xid = binary.BigEndian.Uint32(buf[0:4])
if msgType := binary.BigEndian.Uint32(buf[4:8]); msgType != rpcMsgCall {
err = fmt.Errorf("not an rpc call: msg_type=%d", msgType)
return
}
if rpcvers := binary.BigEndian.Uint32(buf[8:12]); rpcvers != 2 {
err = fmt.Errorf("unsupported rpc version %d", rpcvers)
return
}
prog = binary.BigEndian.Uint32(buf[12:16])
vers = binary.BigEndian.Uint32(buf[16:20])
proc = binary.BigEndian.Uint32(buf[20:24])
p := 24
for i := 0; i < 2; i++ {
if len(buf) < p+8 {
err = fmt.Errorf("truncated opaque_auth at offset %d", p)
return
}
authLen := binary.BigEndian.Uint32(buf[p+4 : p+8])
// Validate before applying the XDR 4-byte padding so that
// lengths near uint32 max can't wrap to a tiny padded value.
if authLen > uint32(portmapMaxRecord) {
err = errors.New("opaque_auth length exceeds limit")
return
}
padded := (authLen + 3) &^ 3
end := uint64(p) + 8 + uint64(padded)
if end > uint64(len(buf)) {
err = fmt.Errorf("truncated opaque_auth body at offset %d (len=%d)", p, authLen)
return
}
p = int(end)
}
args = buf[p:]
return
}
// encodeAcceptedReply builds a MSG_ACCEPTED reply with the given accept_stat.
// body is the already-XDR-encoded data that follows accept_stat in the reply.
// For SUCCESS it is the procedure result; it is nil for most error
// accept_stat values (PROG_UNAVAIL, PROC_UNAVAIL, GARBAGE_ARGS) but is
// non-nil for PROG_MISMATCH, which carries a struct { uint32 low; uint32
// high; } mismatch_info range per RFC 5531 §9.
func encodeAcceptedReply(xid, acceptStat uint32, body []byte) []byte {
out := make([]byte, 24+len(body))
binary.BigEndian.PutUint32(out[0:4], xid)
binary.BigEndian.PutUint32(out[4:8], rpcMsgReply)
binary.BigEndian.PutUint32(out[8:12], rpcMsgAccepted)
// verf: AUTH_NONE, zero-length opaque
binary.BigEndian.PutUint32(out[12:16], rpcAuthNone)
binary.BigEndian.PutUint32(out[16:20], 0)
binary.BigEndian.PutUint32(out[20:24], acceptStat)
copy(out[24:], body)
return out
}
-418
View File
@@ -1,418 +0,0 @@
package nfs
import (
"bytes"
"encoding/binary"
"io"
"net"
"strconv"
"testing"
"time"
)
func buildRPCCall(t *testing.T, xid, prog, vers, proc uint32, credBody, verfBody, args []byte) []byte {
t.Helper()
pad := func(b []byte) []byte {
r := len(b) % 4
if r == 0 {
return b
}
out := make([]byte, len(b)+4-r)
copy(out, b)
return out
}
buf := new(bytes.Buffer)
write := func(v uint32) {
var b [4]byte
binary.BigEndian.PutUint32(b[:], v)
buf.Write(b[:])
}
write(xid)
write(rpcMsgCall)
write(2) // rpcvers
write(prog)
write(vers)
write(proc)
// cred
write(rpcAuthNone)
write(uint32(len(credBody)))
buf.Write(pad(credBody))
// verf
write(rpcAuthNone)
write(uint32(len(verfBody)))
buf.Write(pad(verfBody))
buf.Write(args)
return buf.Bytes()
}
func parseAcceptedReply(t *testing.T, reply []byte) (xid, acceptStat uint32, body []byte) {
t.Helper()
if len(reply) < 24 {
t.Fatalf("reply too short: %d bytes", len(reply))
}
xid = binary.BigEndian.Uint32(reply[0:4])
if mt := binary.BigEndian.Uint32(reply[4:8]); mt != rpcMsgReply {
t.Fatalf("msg_type=%d, want REPLY", mt)
}
if rs := binary.BigEndian.Uint32(reply[8:12]); rs != rpcMsgAccepted {
t.Fatalf("reply_stat=%d, want ACCEPTED", rs)
}
// verf
verfLen := binary.BigEndian.Uint32(reply[16:20])
if verfLen != 0 {
t.Fatalf("unexpected verf length %d", verfLen)
}
acceptStat = binary.BigEndian.Uint32(reply[20:24])
body = reply[24:]
return
}
func newTestPortmap() *portmapServer {
return newPortmapServer("127.0.0.1", portmapPort, 2049)
}
func TestParseRPCCall_SkipsAuth(t *testing.T) {
cred := []byte("hello") // 5 bytes -> padded to 8
verf := []byte{}
args := []byte{0x01, 0x02, 0x03, 0x04}
msg := buildRPCCall(t, 42, portmapProgram, portmapVersion, pmapProcNull, cred, verf, args)
xid, prog, vers, proc, gotArgs, err := parseRPCCall(msg)
if err != nil {
t.Fatalf("parseRPCCall: %v", err)
}
if xid != 42 || prog != portmapProgram || vers != portmapVersion || proc != pmapProcNull {
t.Fatalf("header mismatch: xid=%d prog=%d vers=%d proc=%d", xid, prog, vers, proc)
}
if !bytes.Equal(gotArgs, args) {
t.Fatalf("args mismatch: got %x want %x", gotArgs, args)
}
}
func TestParseRPCCall_RejectsReply(t *testing.T) {
buf := make([]byte, 40)
binary.BigEndian.PutUint32(buf[4:8], rpcMsgReply)
if _, _, _, _, _, err := parseRPCCall(buf); err == nil {
t.Fatal("expected error on reply-typed message")
}
}
func TestParseRPCCall_TruncatedAuth(t *testing.T) {
// Claim huge cred length but provide no body.
buf := make([]byte, 40)
binary.BigEndian.PutUint32(buf[4:8], rpcMsgCall)
binary.BigEndian.PutUint32(buf[8:12], 2)
binary.BigEndian.PutUint32(buf[28:32], 1000) // cred len
if _, _, _, _, _, err := parseRPCCall(buf); err == nil {
t.Fatal("expected error on truncated auth")
}
}
func TestHandleCall_Null(t *testing.T) {
ps := newTestPortmap()
msg := buildRPCCall(t, 7, portmapProgram, portmapVersion, pmapProcNull, nil, nil, nil)
reply := ps.handleCall(msg)
xid, acc, body := parseAcceptedReply(t, reply)
if xid != 7 || acc != rpcAcceptSuccess || len(body) != 0 {
t.Fatalf("null reply xid=%d acc=%d body=%x", xid, acc, body)
}
}
func TestHandleCall_GetPort_HitAndMiss(t *testing.T) {
ps := newTestPortmap()
buildQuery := func(prog, vers, prot uint32) []byte {
args := make([]byte, 16)
binary.BigEndian.PutUint32(args[0:4], prog)
binary.BigEndian.PutUint32(args[4:8], vers)
binary.BigEndian.PutUint32(args[8:12], prot)
// port field is ignored by the server; leave zero
return args
}
cases := []struct {
name string
prog, vers, prot uint32
wantPort uint32
}{
{"nfs-v3-tcp-hit", nfsProgram, 3, ipProtoTCP, 2049},
{"mount-v3-tcp-hit", mountProgram, 3, ipProtoTCP, 2049},
{"mount-v3-udp-hit", mountProgram, 3, ipProtoUDP, 2049},
{"mount-v1-tcp-miss", mountProgram, 1, ipProtoTCP, 0},
{"nfs-v3-udp-miss", nfsProgram, 3, ipProtoUDP, 0},
{"nlm-miss", 100021, 4, ipProtoTCP, 0},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
msg := buildRPCCall(t, 11, portmapProgram, portmapVersion, pmapProcGetPort, nil, nil, buildQuery(tc.prog, tc.vers, tc.prot))
reply := ps.handleCall(msg)
xid, acc, body := parseAcceptedReply(t, reply)
if xid != 11 {
t.Fatalf("xid=%d want 11", xid)
}
if acc != rpcAcceptSuccess {
t.Fatalf("acc=%d want SUCCESS", acc)
}
if len(body) != 4 {
t.Fatalf("getport body len=%d want 4", len(body))
}
got := binary.BigEndian.Uint32(body)
if got != tc.wantPort {
t.Fatalf("port=%d want %d", got, tc.wantPort)
}
})
}
}
func TestHandleCall_Dump(t *testing.T) {
ps := newTestPortmap()
msg := buildRPCCall(t, 13, portmapProgram, portmapVersion, pmapProcDump, nil, nil, nil)
reply := ps.handleCall(msg)
_, acc, body := parseAcceptedReply(t, reply)
if acc != rpcAcceptSuccess {
t.Fatalf("acc=%d", acc)
}
var entries []portmapEntry
p := 0
for p+4 <= len(body) {
vf := binary.BigEndian.Uint32(body[p : p+4])
p += 4
if vf == 0 {
break
}
if p+16 > len(body) {
t.Fatalf("truncated entry at %d", p)
}
entries = append(entries, portmapEntry{
Program: binary.BigEndian.Uint32(body[p : p+4]),
Version: binary.BigEndian.Uint32(body[p+4 : p+8]),
Protocol: binary.BigEndian.Uint32(body[p+8 : p+12]),
Port: binary.BigEndian.Uint32(body[p+12 : p+16]),
})
p += 16
}
if len(entries) != 3 {
t.Fatalf("got %d dump entries, want 3: %+v", len(entries), entries)
}
wantSet := map[portmapEntry]bool{
{Program: nfsProgram, Version: 3, Protocol: ipProtoTCP, Port: 2049}: false,
{Program: mountProgram, Version: 3, Protocol: ipProtoTCP, Port: 2049}: false,
{Program: mountProgram, Version: 3, Protocol: ipProtoUDP, Port: 2049}: false,
}
for _, e := range entries {
if _, ok := wantSet[e]; !ok {
t.Fatalf("unexpected dump entry %+v", e)
}
wantSet[e] = true
}
for e, seen := range wantSet {
if !seen {
t.Fatalf("missing dump entry %+v", e)
}
}
}
func TestHandleCall_UnknownProg(t *testing.T) {
ps := newTestPortmap()
msg := buildRPCCall(t, 1, 999999, 1, 0, nil, nil, nil)
reply := ps.handleCall(msg)
_, acc, _ := parseAcceptedReply(t, reply)
if acc != rpcAcceptProgUnavail {
t.Fatalf("acc=%d want PROG_UNAVAIL", acc)
}
}
func TestHandleCall_VersionMismatch(t *testing.T) {
ps := newTestPortmap()
msg := buildRPCCall(t, 1, portmapProgram, 42, pmapProcNull, nil, nil, nil)
reply := ps.handleCall(msg)
_, acc, body := parseAcceptedReply(t, reply)
if acc != rpcAcceptProgMismatch {
t.Fatalf("acc=%d want PROG_MISMATCH", acc)
}
if len(body) != 8 {
t.Fatalf("mismatch body len=%d want 8", len(body))
}
lo := binary.BigEndian.Uint32(body[0:4])
hi := binary.BigEndian.Uint32(body[4:8])
if lo != portmapVersion || hi != portmapVersion {
t.Fatalf("mismatch range lo=%d hi=%d", lo, hi)
}
}
func TestHandleCall_UnknownProc(t *testing.T) {
ps := newTestPortmap()
msg := buildRPCCall(t, 1, portmapProgram, portmapVersion, 42, nil, nil, nil)
reply := ps.handleCall(msg)
_, acc, _ := parseAcceptedReply(t, reply)
if acc != rpcAcceptProcUnavail {
t.Fatalf("acc=%d want PROC_UNAVAIL", acc)
}
}
func TestHandleCall_SetRefused(t *testing.T) {
ps := newTestPortmap()
args := make([]byte, 16) // mapping struct
msg := buildRPCCall(t, 1, portmapProgram, portmapVersion, pmapProcSet, nil, nil, args)
reply := ps.handleCall(msg)
_, acc, body := parseAcceptedReply(t, reply)
if acc != rpcAcceptSuccess {
t.Fatalf("acc=%d", acc)
}
if len(body) != 4 || binary.BigEndian.Uint32(body) != 0 {
t.Fatalf("PMAP_SET must return FALSE, got %x", body)
}
}
// pickFreePort asks the OS for an unused high port by opening and closing a
// listener on it. Used so the end-to-end tests can run in parallel without
// stepping on the privileged default port 111.
func pickFreePort(t *testing.T) int {
t.Helper()
ln, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatalf("listen: %v", err)
}
defer ln.Close()
return ln.Addr().(*net.TCPAddr).Port
}
func TestPortmapServer_UDPGetPort(t *testing.T) {
port := pickFreePort(t)
ps := newPortmapServer("127.0.0.1", port, 2049)
if err := ps.Start(); err != nil {
t.Fatalf("start: %v", err)
}
t.Cleanup(func() { _ = ps.Close() })
args := make([]byte, 16)
binary.BigEndian.PutUint32(args[0:4], nfsProgram)
binary.BigEndian.PutUint32(args[4:8], 3)
binary.BigEndian.PutUint32(args[8:12], ipProtoTCP)
msg := buildRPCCall(t, 99, portmapProgram, portmapVersion, pmapProcGetPort, nil, nil, args)
conn, err := net.Dial("udp", net.JoinHostPort("127.0.0.1", strconv.Itoa(port)))
if err != nil {
t.Fatalf("dial udp: %v", err)
}
defer conn.Close()
_ = conn.SetDeadline(time.Now().Add(2 * time.Second))
if _, err := conn.Write(msg); err != nil {
t.Fatalf("write: %v", err)
}
buf := make([]byte, 4096)
n, err := conn.Read(buf)
if err != nil {
t.Fatalf("read: %v", err)
}
xid, acc, body := parseAcceptedReply(t, buf[:n])
if xid != 99 || acc != rpcAcceptSuccess || len(body) != 4 {
t.Fatalf("bad reply xid=%d acc=%d body=%x", xid, acc, body)
}
if got := binary.BigEndian.Uint32(body); got != 2049 {
t.Fatalf("udp getport port=%d want 2049", got)
}
}
func TestPortmapServer_CloseEvictsIdleTCPConn(t *testing.T) {
port := pickFreePort(t)
ps := newPortmapServer("127.0.0.1", port, 2049)
if err := ps.Start(); err != nil {
t.Fatalf("start: %v", err)
}
conn, err := net.Dial("tcp", net.JoinHostPort("127.0.0.1", strconv.Itoa(port)))
if err != nil {
_ = ps.Close()
t.Fatalf("dial: %v", err)
}
defer conn.Close()
// Issue one call and read its reply so the server-side connection is
// definitely registered before we trigger shutdown.
msg := buildRPCCall(t, 1, portmapProgram, portmapVersion, pmapProcNull, nil, nil, nil)
var mark [4]byte
binary.BigEndian.PutUint32(mark[:], uint32(len(msg))|(1<<31))
_ = conn.SetDeadline(time.Now().Add(2 * time.Second))
if _, err := conn.Write(mark[:]); err != nil {
t.Fatalf("write mark: %v", err)
}
if _, err := conn.Write(msg); err != nil {
t.Fatalf("write msg: %v", err)
}
if _, err := io.ReadFull(conn, mark[:]); err != nil {
t.Fatalf("read mark: %v", err)
}
rlen := binary.BigEndian.Uint32(mark[:]) &^ (1 << 31)
if _, err := io.ReadFull(conn, make([]byte, rlen)); err != nil {
t.Fatalf("read body: %v", err)
}
// Close must return long before the TCP idle deadline (30s) — in
// other words, the server must actively close the idle conn rather
// than wait for the deadline or for the client to disconnect.
done := make(chan error, 1)
go func() { done <- ps.Close() }()
select {
case err := <-done:
if err != nil {
t.Fatalf("Close: %v", err)
}
case <-time.After(2 * time.Second):
t.Fatal("Close did not return within 2s; in-flight conn not evicted")
}
_ = conn.SetReadDeadline(time.Now().Add(1 * time.Second))
if _, err := conn.Read(make([]byte, 4)); err == nil {
t.Fatal("expected read error on client conn after server Close")
}
}
func TestPortmapServer_TCPGetPort(t *testing.T) {
port := pickFreePort(t)
ps := newPortmapServer("127.0.0.1", port, 2049)
if err := ps.Start(); err != nil {
t.Fatalf("start: %v", err)
}
t.Cleanup(func() { _ = ps.Close() })
args := make([]byte, 16)
binary.BigEndian.PutUint32(args[0:4], mountProgram)
binary.BigEndian.PutUint32(args[4:8], 3)
binary.BigEndian.PutUint32(args[8:12], ipProtoTCP)
msg := buildRPCCall(t, 123, portmapProgram, portmapVersion, pmapProcGetPort, nil, nil, args)
conn, err := net.Dial("tcp", net.JoinHostPort("127.0.0.1", strconv.Itoa(port)))
if err != nil {
t.Fatalf("dial tcp: %v", err)
}
defer conn.Close()
_ = conn.SetDeadline(time.Now().Add(2 * time.Second))
// record mark: last-fragment bit + length
var mark [4]byte
binary.BigEndian.PutUint32(mark[:], uint32(len(msg))|(1<<31))
if _, err := conn.Write(mark[:]); err != nil {
t.Fatalf("write mark: %v", err)
}
if _, err := conn.Write(msg); err != nil {
t.Fatalf("write msg: %v", err)
}
var rmark [4]byte
if _, err := io.ReadFull(conn, rmark[:]); err != nil {
t.Fatalf("read mark: %v", err)
}
rlen := binary.BigEndian.Uint32(rmark[:]) &^ (1 << 31)
buf := make([]byte, rlen)
if _, err := io.ReadFull(conn, buf); err != nil {
t.Fatalf("read body: %v", err)
}
xid, acc, body := parseAcceptedReply(t, buf)
if xid != 123 || acc != rpcAcceptSuccess || len(body) != 4 {
t.Fatalf("bad reply xid=%d acc=%d body=%x", xid, acc, body)
}
if got := binary.BigEndian.Uint32(body); got != 2049 {
t.Fatalf("tcp getport port=%d want 2049", got)
}
}
-377
View File
@@ -1,377 +0,0 @@
package nfs
import (
"bufio"
"encoding/binary"
"errors"
"io"
"net"
"sync"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
)
// The upstream willscott/go-nfs library dispatches RPC calls by (program,
// procedure) only — it does not validate the RPC program version. That means
// a Linux client speaking NFSv4 (program 100003 vers 4) lands on the same
// handler map as NFSv3: proc=1 routes to NFSv3 SETATTR, which parses the
// NFSv4 COMPOUND args as if they were SETATTR3args and writes a malformed
// reply. The client cannot decode that reply, the kernel returns
// EPROTONOSUPPORT, and mount.nfs prints "requested NFS version or transport
// protocol is not supported" without ever falling back to v3.
//
// The default Linux mount.nfs path is to try NFSv4 first, so this affects
// every plain `mount -t nfs <host>:<export> /mnt` against a `weed nfs`
// deployment. To make the v4→v3 fallback work, we wrap the listener so the
// first RPC frame on each new TCP connection is inspected: if the program is
// NFS or MOUNT and the version is not 3, we synthesize a PROG_MISMATCH reply
// (with the supported version range 3..3) directly to the socket and close
// the connection. The client then retries with v3 and proceeds normally.
//
// Clients keep the same program/version for the lifetime of a TCP connection
// in practice, so we only need to check the first frame; subsequent frames
// flow through to go-nfs unchanged. This avoids vendoring go-nfs while still
// producing protocol-correct rejections.
// RPC numeric constants used here (rpcMsgCall, rpcMsgReply, rpcMsgAccepted,
// rpcAcceptProgMismatch, rpcAuthNone, nfsProgram, mountProgram) are defined
// alongside the portmap responder in portmap.go to keep one source of truth
// per package.
const (
// rpcVersionFilterPeekTimeout bounds how long we wait for the first frame
// header on a new connection before giving up and letting go-nfs handle
// the (possibly half-open) socket.
rpcVersionFilterPeekTimeout = 10 * time.Second
// peeked length: 4-byte fragment marker + 24 bytes of fixed RPC header
// (xid + msg_type + rpcvers + prog + vers + proc).
rpcVersionFilterPeekLen = 28
// rpcVersionFilterAcceptBackoff is how long the accept loop sleeps
// after a transient Accept() error (EMFILE, EAGAIN, ECONNABORTED,
// etc.) before retrying. Mirrors portmapRetryBackoff in portmap.go so
// both NFS-listening goroutines back off identically under host
// resource pressure.
rpcVersionFilterAcceptBackoff = 50 * time.Millisecond
supportedNFSVer = 3
)
// versionFilterListener moves the per-connection RPC peek off the
// Listener.Accept() critical path. Peeking inline would let one slow or idle
// client (or a TCP three-way handshake without any RPC payload) hold
// rpcVersionFilterPeekTimeout — i.e. up to 10 seconds — of head-of-line
// blocking against every other connect, since gonfs.Serve only calls Accept
// serially. Instead, a background goroutine runs the inner Accept() loop and
// hands each raw conn to its own short-lived goroutine that does the peek;
// validated conns are sent on acceptCh and the wrapper's Accept() reads from
// that channel. Rejected conns never reach the channel — PROG_MISMATCH is
// already on the wire by the time the per-conn goroutine returns.
type versionFilterListener struct {
inner net.Listener
acceptCh chan net.Conn
// closed is signalled either by Close() or by the accept loop after the
// inner listener returns a terminal error. After it fires Accept() will
// stop blocking and return acceptErr (or net.ErrClosed if none).
closed chan struct{}
closeOnce sync.Once
mu sync.Mutex
acceptErr error
// inFlight tracks raw (pre-peek) conns that are currently in
// handleConn so Close() can break their Peek() deadline by closing
// them, instead of waiting up to rpcVersionFilterPeekTimeout per
// idle client for the timeout to fire on its own.
inFlight map[net.Conn]struct{}
startOnce sync.Once
wg sync.WaitGroup
}
func newVersionFilterListener(inner net.Listener) net.Listener {
return &versionFilterListener{
inner: inner,
acceptCh: make(chan net.Conn),
closed: make(chan struct{}),
}
}
// start lazily kicks off the background accept loop the first time someone
// calls Accept(). This matches the behaviour of the embedded-listener form we
// replaced — no goroutines spawn just from constructing the wrapper.
func (l *versionFilterListener) start() {
l.startOnce.Do(func() {
l.wg.Add(1)
go l.acceptLoop()
})
}
func (l *versionFilterListener) Accept() (net.Conn, error) {
l.start()
select {
case c := <-l.acceptCh:
return c, nil
case <-l.closed:
return nil, l.terminalErr()
}
}
func (l *versionFilterListener) Close() error {
l.signalClose()
err := l.inner.Close()
// Eagerly close any raw conns currently blocked in filterFirstRPCFrame's
// Peek so handleConn returns promptly. Without this, an idle client
// (TCP handshake without any RPC payload) holds Close() up to
// rpcVersionFilterPeekTimeout — 10s of stop-the-world per such conn.
l.evictInFlight()
l.wg.Wait()
return err
}
func (l *versionFilterListener) Addr() net.Addr {
return l.inner.Addr()
}
func (l *versionFilterListener) signalClose() {
l.closeOnce.Do(func() {
close(l.closed)
})
}
func (l *versionFilterListener) terminalErr() error {
l.mu.Lock()
defer l.mu.Unlock()
if l.acceptErr != nil {
return l.acceptErr
}
return net.ErrClosed
}
// trackInFlight records a raw conn that's about to be peeked, so Close()
// can break its Peek() deadline by closing it. Returns false if shutdown
// has already started; the caller must close the conn and bail.
func (l *versionFilterListener) trackInFlight(c net.Conn) bool {
l.mu.Lock()
defer l.mu.Unlock()
select {
case <-l.closed:
return false
default:
}
if l.inFlight == nil {
l.inFlight = make(map[net.Conn]struct{})
}
l.inFlight[c] = struct{}{}
return true
}
func (l *versionFilterListener) untrackInFlight(c net.Conn) {
l.mu.Lock()
defer l.mu.Unlock()
delete(l.inFlight, c)
}
// evictInFlight closes every conn currently in handleConn so their
// in-flight Peek() returns immediately. delete(nil-map, k) is a no-op,
// so handleConn's deferred untrackInFlight is safe even after we've
// nilled the map here.
func (l *versionFilterListener) evictInFlight() {
l.mu.Lock()
conns := l.inFlight
l.inFlight = nil
l.mu.Unlock()
for c := range conns {
_ = c.Close()
}
}
func (l *versionFilterListener) acceptLoop() {
defer l.wg.Done()
defer l.signalClose()
for {
conn, err := l.inner.Accept()
if err != nil {
// Permanent: the inner listener has been closed (Close(),
// shutdown, or an unrecoverable error from the OS). Surface
// the error to Accept() and stop.
if errors.Is(err, net.ErrClosed) {
l.mu.Lock()
if l.acceptErr == nil {
l.acceptErr = err
}
l.mu.Unlock()
return
}
// Transient (EMFILE, EAGAIN, ECONNABORTED on accept,
// timeouts if a deadline is ever set): treating these as
// terminal would tear the whole NFS server down on a
// resource blip. Back off briefly and retry, mirroring the
// pattern in portmap.go's serveTCP.
glog.V(1).Infof("nfs version filter: transient accept error: %v", err)
select {
case <-l.closed:
return
case <-time.After(rpcVersionFilterAcceptBackoff):
continue
}
}
l.wg.Add(1)
go l.handleConn(conn)
}
}
// handleConn runs the version peek for a single accepted conn. Because each
// conn has its own goroutine, a slow client only blocks itself; concurrent
// peeks proceed in parallel up to whatever the runtime can schedule. If
// Close() fires before the peek completes we drop the validated conn so we
// don't leak a socket past shutdown.
func (l *versionFilterListener) handleConn(conn net.Conn) {
defer l.wg.Done()
if !l.trackInFlight(conn) {
// Shutdown beat us: don't start the Peek that we'd then
// have to break, just close the raw conn.
_ = conn.Close()
return
}
defer l.untrackInFlight(conn)
wrapped, accepted := filterFirstRPCFrame(conn)
if !accepted {
// Already replied with PROG_MISMATCH and closed conn.
return
}
select {
case l.acceptCh <- wrapped:
case <-l.closed:
_ = wrapped.Close()
}
}
// peekedConn returns the bytes that filterFirstRPCFrame already buffered when
// it peeked the first RPC header, then transparently reads from the
// underlying connection. Writes go straight to the socket; the bufio reader
// only buffers the read side.
type peekedConn struct {
net.Conn
reader io.Reader
}
func (c *peekedConn) Read(p []byte) (int, error) {
return c.reader.Read(p)
}
// filterFirstRPCFrame inspects the first RPC frame on conn and decides whether
// to pass it through to go-nfs. Returns (wrappedConn, true) if the frame is
// for a supported (program, version) — including programs we don't recognize,
// since go-nfs handles its own PROG_UNAVAIL response. Returns (nil, false) if
// we already replied with PROG_MISMATCH and closed conn.
//
// On peek failure (early close, deadline) we pass the connection through:
// returning an error here would silently drop legitimate clients on a flaky
// link, and go-nfs has its own per-frame error handling.
func filterFirstRPCFrame(conn net.Conn) (net.Conn, bool) {
r := bufio.NewReader(conn)
deadlineErr := conn.SetReadDeadline(time.Now().Add(rpcVersionFilterPeekTimeout))
hdr, peekErr := r.Peek(rpcVersionFilterPeekLen)
// Always clear the deadline before returning to go-nfs; failing to do so
// would make every subsequent Read() time out at the same instant.
if deadlineErr == nil {
_ = conn.SetReadDeadline(time.Time{})
}
if peekErr != nil {
return &peekedConn{Conn: conn, reader: r}, true
}
fragMark := binary.BigEndian.Uint32(hdr[0:4])
if fragMark&(1<<31) == 0 {
// Multi-fragment record: portmap-style filtering of the first frame
// would need reassembly. Fall through to go-nfs which handles this.
return &peekedConn{Conn: conn, reader: r}, true
}
// Peek(28) can read across record boundaries — the first fragment may
// be shorter than the fixed RPC CALL header (24 bytes after the marker)
// with the remaining bytes belonging to the *next* RPC. Indexing into
// hdr[16:24] without first checking the fragment length would parse
// fields from a different RPC and either spuriously reject or pass it.
// Pass through if the first fragment can't possibly hold a full header
// and let go-nfs surface the framing error.
if fragLen := fragMark &^ uint32(1<<31); fragLen < 24 {
return &peekedConn{Conn: conn, reader: r}, true
}
xid := binary.BigEndian.Uint32(hdr[4:8])
if msgType := binary.BigEndian.Uint32(hdr[8:12]); msgType != rpcMsgCall {
// Not a CALL — odd, but pass through.
return &peekedConn{Conn: conn, reader: r}, true
}
if rpcVers := binary.BigEndian.Uint32(hdr[12:16]); rpcVers != 2 {
// ONC RPC v2 is the only version we and go-nfs speak; if the
// rpcvers field is anything else the rest of the header is
// untrusted (could be a non-RPC protocol that happens to share
// the port, or simply garbled traffic). Don't synthesize a
// PROG_MISMATCH that lies about supporting NFS — pass it
// through and let go-nfs / RFC 5531 §9 RPC_MISMATCH handling
// in the upstream library do the right thing.
return &peekedConn{Conn: conn, reader: r}, true
}
prog := binary.BigEndian.Uint32(hdr[16:20])
vers := binary.BigEndian.Uint32(hdr[20:24])
switch prog {
case nfsProgram, mountProgram:
default:
// Unknown program: let go-nfs reply PROG_UNAVAIL itself.
return &peekedConn{Conn: conn, reader: r}, true
}
if vers == supportedNFSVer {
return &peekedConn{Conn: conn, reader: r}, true
}
glog.V(1).Infof("nfs: rejecting client %s with PROG_MISMATCH: prog=%d vers=%d (supported=%d)",
conn.RemoteAddr(), prog, vers, supportedNFSVer)
if err := writeProgMismatchTCP(conn, xid, supportedNFSVer, supportedNFSVer); err != nil {
glog.V(1).Infof("nfs: write PROG_MISMATCH to %s: %v", conn.RemoteAddr(), err)
}
_ = conn.Close()
return nil, false
}
// writeProgMismatchTCP encodes a single-frame TCP RPC reply carrying
// MSG_ACCEPTED + PROG_MISMATCH along with the supported version range, per
// RFC 5531 section 9. The frame layout is:
//
// uint32 fragment_header (last-fragment | length)
// uint32 xid
// uint32 msg_type=REPLY(1)
// uint32 reply_stat=MSG_ACCEPTED(0)
// uint32 verf_flavor=AUTH_NONE(0)
// uint32 verf_len=0
// uint32 accept_stat=PROG_MISMATCH(2)
// uint32 low
// uint32 high
const progMismatchBodyLen = 32
func writeProgMismatchTCP(w io.Writer, xid, low, high uint32) error {
out := make([]byte, 4+progMismatchBodyLen)
binary.BigEndian.PutUint32(out[0:4], uint32(progMismatchBodyLen)|(1<<31))
binary.BigEndian.PutUint32(out[4:8], xid)
binary.BigEndian.PutUint32(out[8:12], rpcMsgReply)
binary.BigEndian.PutUint32(out[12:16], rpcMsgAccepted)
binary.BigEndian.PutUint32(out[16:20], rpcAuthNone)
binary.BigEndian.PutUint32(out[20:24], 0) // verf opaque length (always zero for AUTH_NONE)
binary.BigEndian.PutUint32(out[24:28], rpcAcceptProgMismatch)
binary.BigEndian.PutUint32(out[28:32], low)
binary.BigEndian.PutUint32(out[32:36], high)
_, err := w.Write(out)
return err
}
-561
View File
@@ -1,561 +0,0 @@
package nfs
import (
"encoding/binary"
"errors"
"io"
"net"
"sync"
"testing"
"time"
)
// buildRPCCallFrame constructs a single TCP-framed RPC CALL header without
// procedure arguments — enough for the version filter to decide whether to
// reject the connection. The frame layout matches RFC 5531 (Open Network
// Computing RPC v2): a 4-byte fragment marker (last-fragment bit set on a
// 40-byte body) followed by xid + msg_type=CALL + rpcvers=2 + prog + vers +
// proc + two empty AUTH_NONE opaque_auth structs.
func buildRPCCallFrame(xid, prog, vers, proc uint32) []byte {
const bodyLen = 40
frame := make([]byte, 4+bodyLen)
binary.BigEndian.PutUint32(frame[0:4], uint32(bodyLen)|(1<<31))
binary.BigEndian.PutUint32(frame[4:8], xid)
binary.BigEndian.PutUint32(frame[8:12], 0) // msg_type CALL
binary.BigEndian.PutUint32(frame[12:16], 2)
binary.BigEndian.PutUint32(frame[16:20], prog)
binary.BigEndian.PutUint32(frame[20:24], vers)
binary.BigEndian.PutUint32(frame[24:28], proc)
// cred + verf both AUTH_NONE / length 0
return frame
}
// readPROGMismatchReply parses a TCP-framed PROG_MISMATCH reply produced by
// writeProgMismatchTCP and returns the xid plus the supported (low, high)
// version range advertised by the server.
func readPROGMismatchReply(t *testing.T, conn net.Conn) (xid, low, high uint32) {
t.Helper()
_ = conn.SetReadDeadline(time.Now().Add(2 * time.Second))
buf := make([]byte, 4+progMismatchBodyLen)
n, err := io.ReadFull(conn, buf)
if err != nil {
t.Fatalf("read reply: %v (got %d bytes)", err, n)
}
frag := binary.BigEndian.Uint32(buf[0:4])
if frag&(1<<31) == 0 {
t.Fatalf("reply frame missing last-fragment bit: %x", frag)
}
if got := frag &^ (1 << 31); got != progMismatchBodyLen {
t.Fatalf("reply body length=%d want %d", got, progMismatchBodyLen)
}
xid = binary.BigEndian.Uint32(buf[4:8])
if mt := binary.BigEndian.Uint32(buf[8:12]); mt != 1 {
t.Fatalf("reply msg_type=%d want REPLY(1)", mt)
}
if rs := binary.BigEndian.Uint32(buf[12:16]); rs != 0 {
t.Fatalf("reply reply_stat=%d want MSG_ACCEPTED(0)", rs)
}
if as := binary.BigEndian.Uint32(buf[24:28]); as != 2 {
t.Fatalf("reply accept_stat=%d want PROG_MISMATCH(2)", as)
}
low = binary.BigEndian.Uint32(buf[28:32])
high = binary.BigEndian.Uint32(buf[32:36])
return
}
func TestVersionFilterRejectsNFSv4WithProgMismatch(t *testing.T) {
innerListener, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatal(err)
}
defer innerListener.Close()
listener := newVersionFilterListener(innerListener)
// In a real server, accepted conns are passed to go-nfs. We just need
// to drive Accept() so the filter runs; the test never sees a wrapped
// conn because the v4 frame is rejected.
accepted := make(chan net.Conn, 1)
go func() {
for {
c, aerr := listener.Accept()
if aerr != nil {
return
}
accepted <- c
}
}()
conn, err := net.Dial("tcp", innerListener.Addr().String())
if err != nil {
t.Fatalf("dial: %v", err)
}
defer conn.Close()
// NFSv4 NULL: the first probe Linux mount.nfs sends when trying v4.
if _, err := conn.Write(buildRPCCallFrame(0xdeadbeef, nfsProgram, 4, 0)); err != nil {
t.Fatalf("write: %v", err)
}
xid, low, high := readPROGMismatchReply(t, conn)
const wantXID uint32 = 0xdeadbeef
if xid != wantXID {
t.Errorf("xid=%x want %x", xid, wantXID)
}
if low != supportedNFSVer || high != supportedNFSVer {
t.Errorf("supported range=(%d,%d) want (%d,%d)", low, high, supportedNFSVer, supportedNFSVer)
}
// Filter must close the connection after replying so the client knows
// not to send another RPC on this socket. Insist on io.EOF specifically:
// "any error" would let a stuck (but still-open) connection pass this
// check via a deadline timeout, which is exactly the regression we want
// to catch.
_ = conn.SetReadDeadline(time.Now().Add(time.Second))
one := make([]byte, 1)
n, err := conn.Read(one)
switch {
case err == nil:
t.Errorf("expected EOF after PROG_MISMATCH but read returned %d bytes", n)
case !errors.Is(err, io.EOF):
t.Errorf("expected io.EOF after PROG_MISMATCH, got %v (likely a regression where the filter replies but does not close)", err)
}
select {
case c := <-accepted:
c.Close()
t.Error("rejected connection should not be returned to caller")
case <-time.After(100 * time.Millisecond):
}
}
func TestVersionFilterRejectsMOUNTv4WithProgMismatch(t *testing.T) {
innerListener, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatal(err)
}
defer innerListener.Close()
listener := newVersionFilterListener(innerListener)
go func() {
for {
c, aerr := listener.Accept()
if aerr != nil {
return
}
c.Close()
}
}()
conn, err := net.Dial("tcp", innerListener.Addr().String())
if err != nil {
t.Fatal(err)
}
defer conn.Close()
if _, err := conn.Write(buildRPCCallFrame(42, mountProgram, 4, 0)); err != nil {
t.Fatal(err)
}
xid, low, high := readPROGMismatchReply(t, conn)
if xid != 42 {
t.Errorf("xid=%d want 42", xid)
}
if low != supportedNFSVer || high != supportedNFSVer {
t.Errorf("supported range=(%d,%d) want (3,3)", low, high)
}
}
func TestVersionFilterPassesThroughNFSv3(t *testing.T) {
innerListener, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatal(err)
}
defer innerListener.Close()
listener := newVersionFilterListener(innerListener)
got := make(chan []byte, 1)
go func() {
c, aerr := listener.Accept()
if aerr != nil {
return
}
defer c.Close()
buf := make([]byte, 44)
_, rerr := io.ReadFull(c, buf)
if rerr != nil {
return
}
got <- buf
}()
conn, err := net.Dial("tcp", innerListener.Addr().String())
if err != nil {
t.Fatal(err)
}
defer conn.Close()
frame := buildRPCCallFrame(7, nfsProgram, 3, 0)
if _, err := conn.Write(frame); err != nil {
t.Fatal(err)
}
select {
case received := <-got:
if string(received) != string(frame) {
t.Error("v3 frame was modified or partially consumed by filter")
}
case <-time.After(2 * time.Second):
t.Fatal("v3 frame not delivered to inner accept handler")
}
}
func TestVersionFilterPassesThroughUnknownProgram(t *testing.T) {
// The filter should only police NFS / MOUNT versions; other programs
// reach go-nfs which already responds PROG_UNAVAIL itself. Otherwise
// adding a new program (e.g. NLM) here would require updating the
// filter, which would defeat the point of using it as a thin shim.
innerListener, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatal(err)
}
defer innerListener.Close()
listener := newVersionFilterListener(innerListener)
delivered := make(chan struct{}, 1)
go func() {
c, aerr := listener.Accept()
if aerr != nil {
return
}
defer c.Close()
buf := make([]byte, 44)
if _, rerr := io.ReadFull(c, buf); rerr == nil {
delivered <- struct{}{}
}
}()
conn, err := net.Dial("tcp", innerListener.Addr().String())
if err != nil {
t.Fatal(err)
}
defer conn.Close()
// Program 100021 is NLM, which weed nfs doesn't run; let go-nfs handle
// the unsupported-program reply.
if _, err := conn.Write(buildRPCCallFrame(99, 100021, 4, 0)); err != nil {
t.Fatal(err)
}
select {
case <-delivered:
case <-time.After(2 * time.Second):
t.Fatal("unknown-program frame should pass through filter")
}
}
// transientErrListener wraps a real net.Listener but injects a configurable
// number of transient Accept() errors before delegating. It exists only to
// regression-test the version filter's transient-retry behaviour without
// having to provoke real EMFILE conditions on the host.
type transientErrListener struct {
inner net.Listener
mu sync.Mutex
remaining int
}
type fakeAcceptError struct{}
func (fakeAcceptError) Error() string { return "fake transient accept error" }
func (l *transientErrListener) Accept() (net.Conn, error) {
l.mu.Lock()
if l.remaining > 0 {
l.remaining--
l.mu.Unlock()
return nil, fakeAcceptError{}
}
l.mu.Unlock()
return l.inner.Accept()
}
func (l *transientErrListener) Close() error { return l.inner.Close() }
func (l *transientErrListener) Addr() net.Addr { return l.inner.Addr() }
func TestVersionFilterRetriesTransientAcceptErrors(t *testing.T) {
// Regression test: previously the accept loop exited on any error
// from the inner listener, which meant a single transient EMFILE /
// EAGAIN under host resource pressure would tear the entire NFS
// server down. Inject a few fake transient errors and assert the
// filter still delivers the next real connection.
innerListener, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatal(err)
}
defer innerListener.Close()
injected := &transientErrListener{inner: innerListener, remaining: 3}
listener := newVersionFilterListener(injected)
delivered := make(chan struct{}, 1)
go func() {
c, aerr := listener.Accept()
if aerr != nil {
return
}
defer c.Close()
buf := make([]byte, 44)
if _, rerr := io.ReadFull(c, buf); rerr == nil {
delivered <- struct{}{}
}
}()
conn, err := net.Dial("tcp", innerListener.Addr().String())
if err != nil {
t.Fatal(err)
}
defer conn.Close()
if _, err := conn.Write(buildRPCCallFrame(1, nfsProgram, 3, 0)); err != nil {
t.Fatal(err)
}
// 3 transient errors × ~50ms backoff plus normal accept latency. Allow
// a generous bound so flakes on slow CI don't surface here, but still
// tight enough to catch a regression to "any error is terminal".
select {
case <-delivered:
case <-time.After(2 * time.Second):
t.Fatal("filter did not retry transient Accept() errors and recover")
}
}
func TestVersionFilterCloseReturnsPromptlyWithIdlePeekConns(t *testing.T) {
// Regression test: Close() used to wait on every handleConn goroutine
// via wg.Wait, but those goroutines could be stuck in
// filterFirstRPCFrame's Peek() until rpcVersionFilterPeekTimeout (10s)
// fired. An idle client that completed a TCP handshake but never sent
// a byte would stretch shutdown by up to that timeout per conn.
// Close() now eagerly closes any tracked in-flight raw conns, which
// forces Peek() to return immediately and lets handleConn finish.
//
// Black-box test: only observes Close() latency. With the regression
// in place Close() would block ~10s; with the fix it returns in well
// under a second.
innerListener, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatal(err)
}
listener := newVersionFilterListener(innerListener)
// Drive Accept once so the background accept loop is running.
go func() { _, _ = listener.Accept() }()
const idleConns = 4
dialed := make([]net.Conn, 0, idleConns)
defer func() {
for _, c := range dialed {
_ = c.Close()
}
}()
for i := 0; i < idleConns; i++ {
c, err := net.Dial("tcp", innerListener.Addr().String())
if err != nil {
t.Fatal(err)
}
dialed = append(dialed, c)
}
// Give handleConn time to invoke Peek for each idle conn — without
// this the test could race ahead and Close() while no goroutine has
// actually started peeking yet, masking the regression.
time.Sleep(100 * time.Millisecond)
// Close() must finish in well under rpcVersionFilterPeekTimeout (10s).
// 2s is a generous bound that still clearly distinguishes "broke the
// peek by closing the conn" from "waited for the peek deadline".
start := time.Now()
if err := listener.Close(); err != nil {
t.Errorf("Close: %v", err)
}
elapsed := time.Since(start)
if elapsed > 2*time.Second {
t.Errorf("Close took %v with %d idle pre-peek conns; should be sub-second once they're forcibly closed", elapsed, idleConns)
}
}
func TestVersionFilterPassesThroughNonV2RPC(t *testing.T) {
// Anything that isn't ONC RPC v2 isn't ours to classify — even if the
// bytes at hdr[16:24] happen to look like nfsProgram + vers=4, we
// shouldn't synthesize a PROG_MISMATCH advertising NFSv3 support for
// what could be a completely different protocol sharing the port.
innerListener, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatal(err)
}
defer innerListener.Close()
listener := newVersionFilterListener(innerListener)
delivered := make(chan struct{}, 1)
go func() {
c, aerr := listener.Accept()
if aerr != nil {
return
}
defer c.Close()
buf := make([]byte, 44)
if _, rerr := io.ReadFull(c, buf); rerr == nil {
delivered <- struct{}{}
}
}()
conn, err := net.Dial("tcp", innerListener.Addr().String())
if err != nil {
t.Fatal(err)
}
defer conn.Close()
// Build a CALL frame, then overwrite the rpcvers field with 99.
// Without the rpcvers guard the filter would still parse prog=NFS,
// vers=4 from the same buffer and reject with PROG_MISMATCH.
frame := buildRPCCallFrame(0xfeedbeef, nfsProgram, 4, 0)
binary.BigEndian.PutUint32(frame[12:16], 99) // bogus rpcvers
if _, err := conn.Write(frame); err != nil {
t.Fatal(err)
}
// Try to read a PROG_MISMATCH reply with a short deadline — none
// should arrive because the filter shouldn't pretend to know what
// this protocol is.
_ = conn.SetReadDeadline(time.Now().Add(500 * time.Millisecond))
hdr := make([]byte, 4)
if n, err := io.ReadFull(conn, hdr); err == nil && n == 4 {
if got := binary.BigEndian.Uint32(hdr); got == uint32(progMismatchBodyLen)|(1<<31) {
t.Fatal("filter sent PROG_MISMATCH for a non-v2 RPC frame")
}
}
// And the connection should reach the inner accept handler.
select {
case <-delivered:
case <-time.After(2 * time.Second):
t.Fatal("non-v2 RPC frame should pass through filter to inner accept")
}
}
func TestVersionFilterIgnoresShortFirstFragment(t *testing.T) {
// Peek(28) can read past the first fragment's body when the body is
// shorter than the 24-byte fixed RPC CALL header. Without a length
// check, the prog/vers fields would be sourced from bytes belonging to
// the *next* RPC (or a syntactic accident), and the filter could
// spuriously reject the connection. Send a 12-byte first fragment whose
// trailing peek-region bytes look like an NFSv4 CALL header, and assert
// the filter does NOT emit a PROG_MISMATCH reply.
innerListener, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatal(err)
}
defer innerListener.Close()
listener := newVersionFilterListener(innerListener)
go func() {
for {
c, aerr := listener.Accept()
if aerr != nil {
return
}
c.Close()
}
}()
conn, err := net.Dial("tcp", innerListener.Addr().String())
if err != nil {
t.Fatal(err)
}
defer conn.Close()
const shortBody = 12
payload := make([]byte, 4+24)
binary.BigEndian.PutUint32(payload[0:4], shortBody|(1<<31)) // last-fragment, body=12
// Bytes 4..16 are the actual fragment body (12 bytes — too short for a
// CALL header; the filter must not look at them as one).
// Bytes 16..28 sit past the fragment in the peek window. If we were to
// (incorrectly) read prog/vers from hdr[16:24], we'd see NFS+v4 here.
binary.BigEndian.PutUint32(payload[16:20], nfsProgram)
binary.BigEndian.PutUint32(payload[20:24], 4)
if _, err := conn.Write(payload); err != nil {
t.Fatal(err)
}
// If the filter erroneously rejected, it would send a 36-byte TCP RPC
// reply (4-byte frag marker + 32-byte PROG_MISMATCH body) within ms.
// Wait briefly and assert nothing PROG_MISMATCH-shaped came back.
_ = conn.SetReadDeadline(time.Now().Add(500 * time.Millisecond))
hdr := make([]byte, 4)
n, err := io.ReadFull(conn, hdr)
if err == nil && n == 4 {
if got := binary.BigEndian.Uint32(hdr); got == uint32(progMismatchBodyLen)|(1<<31) {
t.Fatal("filter sent PROG_MISMATCH on a short fragment whose trailing peek bytes only superficially resembled a v4 call")
}
}
// Anything else (timeout, EOF, or unrelated bytes) is fine — we only
// care that the filter did NOT misclassify the short fragment.
}
func TestVersionFilterDoesNotHeadOfLineBlockOnSlowConn(t *testing.T) {
// Regression test: the previous implementation peeked the first RPC
// frame inline in Accept(), so an idle TCP-only connect would block
// every later Accept() call for up to rpcVersionFilterPeekTimeout.
// The peek now runs in a per-conn goroutine; a fast follow-up connect
// must reach the inner accept handler well before the slow conn's
// peek deadline.
innerListener, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatal(err)
}
defer innerListener.Close()
listener := newVersionFilterListener(innerListener)
delivered := make(chan struct{}, 1)
go func() {
c, aerr := listener.Accept()
if aerr != nil {
return
}
defer c.Close()
buf := make([]byte, 44)
if _, rerr := io.ReadFull(c, buf); rerr == nil {
delivered <- struct{}{}
}
}()
// Slow client: connect, never write. Holds a goroutine inside the
// filter peeking until the deadline, but must not block the next conn.
slowConn, err := net.Dial("tcp", innerListener.Addr().String())
if err != nil {
t.Fatal(err)
}
defer slowConn.Close()
// Fast client: send a valid v3 frame straight away; this conn must be
// delivered to the inner accept handler without waiting for slowConn.
fastConn, err := net.Dial("tcp", innerListener.Addr().String())
if err != nil {
t.Fatal(err)
}
defer fastConn.Close()
if _, err := fastConn.Write(buildRPCCallFrame(11, nfsProgram, 3, 0)); err != nil {
t.Fatal(err)
}
// Bound the wait well below rpcVersionFilterPeekTimeout (10s) so a
// regression to inline peeking would clearly time out here.
select {
case <-delivered:
case <-time.After(2 * time.Second):
t.Fatal("fast conn should not be head-of-line blocked by slow conn's peek")
}
}
-250
View File
@@ -1,250 +0,0 @@
package nfs
import (
"context"
"errors"
"fmt"
"net"
"sync"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/util"
"github.com/seaweedfs/seaweedfs/weed/wdclient"
gonfs "github.com/willscott/go-nfs"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
)
type Option struct {
Filer pb.ServerAddress
BindIp string
Port int
FilerRootPath string
ReadOnly bool
AllowedClients []string
VolumeServerAccess string
GrpcDialOption grpc.DialOption
// PortmapBind, when non-empty, enables a built-in portmap v2 responder
// on <PortmapBind>:111 advertising the NFS v3 and MOUNT v3 services at
// Port. Empty (the default) disables portmap; clients must then bypass
// portmap with mount -o port=,mountport=,proto=tcp,mountproto=tcp.
PortmapBind string
}
type Server struct {
option *Option
exportRoot util.FullPath
exportID uint32
signature int32
handleLimit int
clientAuthorizer *clientAuthorizer
sharedReaderCache *filer.ReaderCache
chunkInvalidator chunkInvalidator
filerClient *wdclient.FilerClient
newUploader func() (chunkUploader, error)
withFilerClient filerClientExecutor
withInternalClient internalClientExecutor
rootFSOnce sync.Once
rootFS *seaweedFileSystem
}
func NewServer(option *Option) (*Server, error) {
if option == nil {
return nil, errors.New("nfs option is required")
}
if option.Port <= 0 {
return nil, fmt.Errorf("nfs port must be positive: %d", option.Port)
}
if option.FilerRootPath == "" {
option.FilerRootPath = "/"
}
if option.VolumeServerAccess == "" {
option.VolumeServerAccess = "direct"
}
if option.GrpcDialOption == nil {
option.GrpcDialOption = grpc.WithTransportCredentials(insecure.NewCredentials())
}
clientAuthorizer, err := newClientAuthorizer(option.AllowedClients)
if err != nil {
return nil, err
}
var filerClient *wdclient.FilerClient
if option.VolumeServerAccess != "filerProxy" {
var opts *wdclient.FilerClientOption
if option.VolumeServerAccess == "publicUrl" {
opts = &wdclient.FilerClientOption{UrlPreference: wdclient.PreferPublicUrl}
}
filerClient = wdclient.NewFilerClient([]pb.ServerAddress{option.Filer}, option.GrpcDialOption, "", opts)
}
exportRoot := normalizeExportRoot(util.FullPath(option.FilerRootPath))
signature := util.RandomInt32()
return &Server{
option: option,
exportRoot: exportRoot,
exportID: exportIDForRoot(exportRoot),
signature: signature,
handleLimit: 1 << 20,
clientAuthorizer: clientAuthorizer,
filerClient: filerClient,
newUploader: newChunkUploader,
withFilerClient: newFilerClientExecutor(option, signature),
withInternalClient: newInternalClientExecutor(option, signature),
}, nil
}
func (s *Server) Start() error {
listener, err := net.Listen("tcp", fmt.Sprintf("%s:%d", s.option.BindIp, s.option.Port))
if err != nil {
return fmt.Errorf("listen nfs on %s:%d: %w", s.option.BindIp, s.option.Port, err)
}
// MOUNT v3 over UDP runs alongside the TCP NFS listener on the same
// port. The kernel default for mountproto is UDP in many setups, so
// without this responder a plain `mount -t nfs <host>:<export> /mnt`
// gets EPROTONOSUPPORT during the MOUNT phase even though the TCP
// NFS path is fine.
mountUDP := newMountUDPServer(s.option.BindIp, s.option.Port, s)
if err := mountUDP.Start(); err != nil {
_ = listener.Close()
return fmt.Errorf("start mount udp: %w", err)
}
defer func() {
_ = mountUDP.Close()
}()
glog.V(0).Infof("MOUNT v3 UDP responder listening on %s:%d", s.option.BindIp, s.option.Port)
var portmap *portmapServer
if s.option.PortmapBind != "" {
portmap = newPortmapServer(s.option.PortmapBind, portmapPort, uint32(s.option.Port))
if pmErr := portmap.Start(); pmErr != nil {
_ = listener.Close()
return fmt.Errorf("start portmap: %w", pmErr)
}
glog.V(0).Infof("NFS portmap responder listening on %s:%d (NFS v3 tcp=%d, MOUNT v3 tcp=%d, MOUNT v3 udp=%d)",
s.option.PortmapBind, portmapPort, s.option.Port, s.option.Port, s.option.Port)
defer func() {
if portmap != nil {
_ = portmap.Close()
}
}()
}
s.logMountHint()
return s.serve(listener)
}
// logMountHint prints a copy-pasteable Linux mount command so operators can
// see at startup how to mount the export from a client.
//
// With -portmap.bind set, MOUNT is now answered over both TCP and UDP, so a
// plain `mount -t nfs host:/export /mnt` works — there is no longer any
// kernel-default mountproto path that fails. Without -portmap.bind the
// client still has to bypass portmap entirely via the explicit
// port=/mountport=/proto=/mountproto= options.
func (s *Server) logMountHint() {
exportPath := string(s.exportRoot)
if s.option.PortmapBind != "" {
glog.V(0).Infof("mount example: mount -t nfs -o nfsvers=3,nolock <host>:%s <mountpoint>", exportPath)
glog.V(0).Infof("(MOUNT v3 is served over both TCP and UDP, so no mountproto override is needed.)")
return
}
glog.V(0).Infof("mount example (bypasses portmap): mount -t nfs -o nfsvers=3,nolock,noacl,port=%d,mountport=%d,proto=tcp,mountproto=tcp <host>:%s <mountpoint>",
s.option.Port, s.option.Port, exportPath)
glog.V(0).Infof("tip: pass -portmap.bind to enable the built-in portmap responder on port 111 so plain `mount -t nfs host:%s /mnt` works.", exportPath)
}
func (s *Server) serve(listener net.Listener) error {
if s.filerClient != nil {
defer s.filerClient.Close()
}
if s.clientAuthorizer != nil && s.clientAuthorizer.enabled {
listener = &allowlistListener{
Listener: listener,
authorizer: s.clientAuthorizer,
}
}
listener = newVersionFilterListener(listener)
handler, err := s.newHandler()
if err != nil {
_ = listener.Close()
return err
}
followCtx, followCancel := context.WithCancel(context.Background())
defer followCancel()
followDone := make(chan struct{})
go func() {
defer close(followDone)
s.runMetadataInvalidationLoop(followCtx)
}()
defer func() {
followCancel()
<-followDone
}()
glog.V(0).Infof("Start Seaweed NFS Server filer=%s bind=%s export=%s exportId=%d readOnly=%t allowedClients=%d volumeServerAccess=%s",
s.option.Filer,
listener.Addr(),
s.exportRoot,
s.exportID,
s.option.ReadOnly,
len(s.option.AllowedClients),
s.option.VolumeServerAccess,
)
return gonfs.Serve(listener, handler)
}
func (s *Server) newHandler() (*Handler, error) {
if s == nil {
return nil, errors.New("nfs server is not configured")
}
return &Handler{
server: s,
rootFS: s.rootFilesystem(),
}, nil
}
// rootFilesystem returns a single seaweedFileSystem rooted at the
// configured export, building it on first call. Both the TCP handler
// (via newHandler) and the UDP MOUNT path use the same instance so
// they share the chunk reader cache and don't reconstruct a wrapper
// per request.
func (s *Server) rootFilesystem() *seaweedFileSystem {
s.rootFSOnce.Do(func() {
s.rootFS = newSeaweedFileSystem(s, s.exportRoot, s.sharedReaderCache)
if s.sharedReaderCache == nil {
s.sharedReaderCache = s.rootFS.readerCache
}
if s.chunkInvalidator == nil {
s.chunkInvalidator = s.sharedReaderCache
}
})
return s.rootFS
}
func (s *Server) WithFilerClient(streamingMode bool, fn func(filer_pb.SeaweedFilerClient) error) error {
if s == nil || s.withFilerClient == nil {
return errors.New("nfs filer client is not configured")
}
return s.withFilerClient(streamingMode, fn)
}
func (s *Server) LookupFn() wdclient.LookupFileIdFunctionType {
if s == nil {
return nil
}
if s.option != nil && s.option.VolumeServerAccess == "filerProxy" {
return func(ctx context.Context, fileID string) ([]string, error) {
return []string{fmt.Sprintf("http://%s/?proxyChunkId=%s", s.option.Filer.ToHttpAddress(), fileID)}, nil
}
}
if s.filerClient != nil {
return s.filerClient.GetLookupFileIdFunction()
}
return nil
}
File diff suppressed because it is too large Load Diff
-40
View File
@@ -1,40 +0,0 @@
package nfs
import (
"io"
"github.com/seaweedfs/seaweedfs/weed/operation"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
)
type chunkUploader interface {
UploadWithRetry(
filerClient filer_pb.FilerClient,
assignRequest *filer_pb.AssignVolumeRequest,
uploadOption *operation.UploadOption,
genFileUrlFn func(host, fileId string) string,
reader io.Reader,
) (fileId string, uploadResult *operation.UploadResult, err error, data []byte)
}
type operationChunkUploader struct {
uploader *operation.Uploader
}
func (u operationChunkUploader) UploadWithRetry(
filerClient filer_pb.FilerClient,
assignRequest *filer_pb.AssignVolumeRequest,
uploadOption *operation.UploadOption,
genFileUrlFn func(host, fileId string) string,
reader io.Reader,
) (string, *operation.UploadResult, error, []byte) {
return u.uploader.UploadWithRetry(filerClient, assignRequest, uploadOption, genFileUrlFn, reader)
}
func newChunkUploader() (chunkUploader, error) {
uploader, err := operation.NewUploader()
if err != nil {
return nil, err
}
return operationChunkUploader{uploader: uploader}, nil
}