mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2026-06-13 23:36:45 +03:00
rust(volume): strip grpc-port suffix from master URL before HTTP lookup (#9276)
* rust(volume): strip grpc-port suffix from master URL before HTTP lookup The volume server stores `master_url` in SeaweedFS's canonical `host:httpPort.grpcPort` form (e.g. `node-a:5300.5310`). When `lookup_volume` builds the master `/dir/lookup` URL, the appended gRPC port turns the URL into `http://node-a:5300.5310/...`, which reqwest rejects with "builder error". Every replicated write, batch-delete lookup, and proxy/redirect read then fails. Mirror Go's `pb.ServerAddress.ToHttpAddress()` with a new `to_http_address` helper and apply it inside `lookup_volume`, the single funnel for all three HTTP master lookups in the Rust volume server. Other consumers of `master_url` already go through gRPC and use `to_grpc_address` / `parse_grpc_address`. Includes a regression test that mocks a master HTTP server and calls `lookup_volume` with a `host:port.grpcPort` master URL — without the fix it reproduces the exact "lookup request failed: builder error" from issue #9274. Fixes #9274 * rust(volume): only strip dotted suffix from address when both ports are numeric Previously `to_http_address` rewrote any `host:foo.bar` to `host:foo`, which would silently drop the suffix on malformed config (e.g. a hostname like `host:abc.def` or `host:99999.19333`). Validate that both halves of the dotted suffix parse as `u16` before stripping — mirrors the validation that `to_grpc_address` already does in the inverse direction. Also slice the input directly instead of going through `format!`, since the result is just a prefix of `addr`. Adds a test that asserts non-numeric / out-of-range dotted suffixes are preserved unchanged. * rust(volume): strip grpc-port suffix from peer URLs in replicate / proxy / redirect In normal operation the master returns `Location.url` as plain `host:port` and the gRPC port arrives in a separate field. But the volume server already has defensive logic (`grpc_address_for_location`) for the `host:httpPort.grpcPort` form on those same URLs, which implies a code path where peer URLs do carry the suffix. Apply `to_http_address` to `loc.url` / `target.url` before building HTTP URLs in `do_replicated_request`, `proxy_request`, and `redirect_request` to keep replicate-write, proxy-read, and redirect paths from hitting the same `lookup request failed: builder error` mode that #9274 documented for master lookups. Adds a unit test exercising `redirect_request` with a `.grpcPort` suffix on `target.url`. * rust(volume): return Cow<str> from to_http_address to skip allocation on the no-suffix path Most addresses pass through `to_http_address` unchanged (master and peer URLs are normally plain `host:port`), so the previous String return type allocated on every call for nothing. Switch to `Cow<str>`: the common pass-through borrows from the input, and only the rewrite branch allocates. Call sites use the result via `format!`/`Display`, which both work transparently with `Cow<str>`. Adds a test asserting the variant is Borrowed in the no-rewrite cases and Owned only when the suffix is stripped. * rust(volume): cover bracketed IPv6 literals in to_http_address tests The current implementation already handles IPv6 correctly because `rfind(':')` lands on the colon after the closing bracket, leaving the dotted suffix logic unchanged. Add explicit test coverage so the behavior is locked down for dual-stack deployments — both the strip case (`[::1]:9333.19333` -> `[::1]:9333`) and the various passthrough cases (no suffix, non-numeric, out-of-range, trailing colon). * rust(volume): parse both ports in one tuple pattern match in to_http_address Combine the two `is_ok()` checks into a single `if let (Ok(_), Ok(_))` tuple match — equivalent semantics, slightly tighter expression of intent. No behavior change.
This commit is contained in:
@@ -16,7 +16,7 @@ use axum::response::{IntoResponse, Response};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::grpc_client::{build_grpc_endpoint, GRPC_MAX_MESSAGE_SIZE};
|
||||
use super::volume_server::{normalize_outgoing_http_url, VolumeServerState};
|
||||
use super::volume_server::{normalize_outgoing_http_url, to_http_address, VolumeServerState};
|
||||
use crate::config::ReadMode;
|
||||
use crate::metrics;
|
||||
use crate::pb::volume_server_pb;
|
||||
@@ -357,9 +357,12 @@ async fn lookup_volume(
|
||||
master_url: &str,
|
||||
volume_id: u32,
|
||||
) -> Result<Vec<VolumeLocation>, String> {
|
||||
// master_url may be in SeaweedFS's "host:httpPort.grpcPort" form (mirrors
|
||||
// Go's pb.ServerAddress); strip the gRPC port before building the HTTP URL.
|
||||
let master_http = to_http_address(master_url);
|
||||
let url = normalize_outgoing_http_url(
|
||||
scheme,
|
||||
&format!("{}/dir/lookup?volumeId={}", master_url, volume_id),
|
||||
&format!("{}/dir/lookup?volumeId={}", master_http, volume_id),
|
||||
)?;
|
||||
let resp = client
|
||||
.get(&url)
|
||||
@@ -528,9 +531,12 @@ async fn do_replicated_request(
|
||||
|
||||
let mut futures = Vec::new();
|
||||
for loc in remote_locations {
|
||||
// Defensively strip a `.grpcPort` suffix in case loc.url carries it
|
||||
// (e.g. from an older master); see lookup_volume for the same fix.
|
||||
let peer_http = to_http_address(&loc.url);
|
||||
let url = normalize_outgoing_http_url(
|
||||
&state.outgoing_http_scheme,
|
||||
&format!("{}{}?{}", loc.url, path, new_query),
|
||||
&format!("{}{}?{}", peer_http, path, new_query),
|
||||
)?;
|
||||
let client = state.http_client.clone();
|
||||
|
||||
@@ -698,13 +704,15 @@ async fn proxy_request(
|
||||
) -> Response {
|
||||
// Build target URL, adding proxied=true query param
|
||||
let path = info.path.trim_start_matches('/');
|
||||
// Defensively strip a `.grpcPort` suffix in case target.url carries it.
|
||||
let target_http = to_http_address(&target.url);
|
||||
|
||||
let raw_target = if info.original_query.is_empty() {
|
||||
format!("{}/{}?proxied=true", target.url, path)
|
||||
format!("{}/{}?proxied=true", target_http, path)
|
||||
} else {
|
||||
format!(
|
||||
"{}/{}?{}&proxied=true",
|
||||
target.url, path, info.original_query
|
||||
target_http, path, info.original_query
|
||||
)
|
||||
};
|
||||
let target_url = match normalize_outgoing_http_url(&state.outgoing_http_scheme, &raw_target) {
|
||||
@@ -771,9 +779,11 @@ fn redirect_request(info: &ProxyRequestInfo, target: &VolumeLocation, scheme: &s
|
||||
query_params.push("proxied=true".to_string());
|
||||
let query = query_params.join("&");
|
||||
|
||||
// Defensively strip a `.grpcPort` suffix in case target.url carries it.
|
||||
let target_http = to_http_address(&target.url);
|
||||
let raw_target = format!(
|
||||
"{}/{},{}?{}",
|
||||
target.url, &info.vid_str, &info.fid_str, query
|
||||
target_http, &info.vid_str, &info.fid_str, query
|
||||
);
|
||||
let location = match normalize_outgoing_http_url(scheme, &raw_target) {
|
||||
Ok(url) => url,
|
||||
@@ -3920,4 +3930,81 @@ mod tests {
|
||||
"https://volume.internal:8080/3,01637037d6?collection=photos&proxied=true"
|
||||
);
|
||||
}
|
||||
|
||||
/// Defensive coverage for peer-to-peer redirects: if a master ever returns
|
||||
/// `Location.url` already encoded in `host:httpPort.grpcPort` form,
|
||||
/// `redirect_request` must still emit a valid HTTP Location header.
|
||||
#[test]
|
||||
fn test_redirect_request_strips_grpc_port_from_target_url() {
|
||||
let info = ProxyRequestInfo {
|
||||
original_headers: HeaderMap::new(),
|
||||
original_query: String::new(),
|
||||
path: "/3,01637037d6".to_string(),
|
||||
vid_str: "3".to_string(),
|
||||
fid_str: "01637037d6".to_string(),
|
||||
};
|
||||
let target = VolumeLocation {
|
||||
url: "volume.internal:8080.18080".to_string(),
|
||||
public_url: "volume.public:8080.18080".to_string(),
|
||||
grpc_port: 18080,
|
||||
};
|
||||
|
||||
let response = redirect_request(&info, &target, "http");
|
||||
assert_eq!(response.status(), StatusCode::MOVED_PERMANENTLY);
|
||||
assert_eq!(
|
||||
response.headers().get(header::LOCATION).unwrap(),
|
||||
"http://volume.internal:8080/3,01637037d6?proxied=true"
|
||||
);
|
||||
}
|
||||
|
||||
/// Regression test for issue #9274.
|
||||
///
|
||||
/// SeaweedFS encodes a server's gRPC port by appending `.grpcPort` to
|
||||
/// the HTTP port: e.g. `master.local:9333.19333` (mirrors Go's
|
||||
/// `pb.ServerAddress`). When the volume server's `--mserver` flag is
|
||||
/// configured this way, replication writes look up the peer locations
|
||||
/// from the master over HTTP. Prior to the fix, the lookup URL became
|
||||
/// `http://host:9333.19333/dir/lookup?volumeId=...` — an invalid port
|
||||
/// that reqwest rejects with a "builder error", causing every
|
||||
/// replicated write to fail.
|
||||
#[tokio::test]
|
||||
async fn test_lookup_volume_strips_grpc_port_from_master_url() {
|
||||
use axum::{routing::get, Router};
|
||||
|
||||
let app = Router::new().route(
|
||||
"/dir/lookup",
|
||||
get(|axum::extract::Query(params): axum::extract::Query<std::collections::HashMap<String, String>>| async move {
|
||||
assert_eq!(params.get("volumeId").map(String::as_str), Some("31"));
|
||||
axum::Json(serde_json::json!({
|
||||
"volumeOrFileId": "31",
|
||||
"locations": [
|
||||
{"url": "10.0.0.2:5301", "publicUrl": "10.0.0.2:5301", "grpcPort": 5311}
|
||||
]
|
||||
}))
|
||||
}),
|
||||
);
|
||||
|
||||
let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||
let addr = listener.local_addr().unwrap();
|
||||
let server = tokio::spawn(async move {
|
||||
axum::serve(listener, app).await.unwrap();
|
||||
});
|
||||
|
||||
// Mirrors how main.rs receives masters: a `host:httpPort.grpcPort` token.
|
||||
// The grpc-port part is fictitious here (the master is HTTP-only in
|
||||
// this test) — what matters is that the volume server must not pass
|
||||
// it through to reqwest.
|
||||
let master_url = format!("127.0.0.1:{}.{}", addr.port(), 19999);
|
||||
let client = reqwest::Client::new();
|
||||
|
||||
let locations = super::lookup_volume(&client, "http", &master_url, 31)
|
||||
.await
|
||||
.expect("lookup_volume should succeed even when master_url carries a .grpcPort suffix");
|
||||
|
||||
assert_eq!(locations.len(), 1);
|
||||
assert_eq!(locations[0].url, "10.0.0.2:5301");
|
||||
assert_eq!(locations[0].grpc_port, 5311);
|
||||
|
||||
server.abort();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -134,6 +134,38 @@ pub fn normalize_outgoing_http_url(scheme: &str, raw_target: &str) -> Result<Str
|
||||
Ok(format!("{}://{}", scheme, raw_target))
|
||||
}
|
||||
|
||||
/// Convert a SeaweedFS server address to its HTTP `host:port` form.
|
||||
///
|
||||
/// Mirrors Go's `pb.ServerAddress.ToHttpAddress()`: SeaweedFS encodes a
|
||||
/// server's gRPC port by appending `.grpcPort` to the HTTP port (e.g.
|
||||
/// `host:9333.19333`). For HTTP requests we want only the HTTP `host:port`.
|
||||
/// - `host:port.grpcPort` -> `host:port`
|
||||
/// - `host:port` -> `host:port` (unchanged)
|
||||
/// - Anything that does not look like `host:port[.grpcPort]` is returned unchanged.
|
||||
///
|
||||
/// Returns a `Cow<str>` so the common (no-suffix) case borrows from `addr`
|
||||
/// without allocating; only the rewrite branch produces a new `String`.
|
||||
pub fn to_http_address(addr: &str) -> std::borrow::Cow<'_, str> {
|
||||
let Some(ports_sep_index) = addr.rfind(':') else {
|
||||
return std::borrow::Cow::Borrowed(addr);
|
||||
};
|
||||
let ports = &addr[ports_sep_index + 1..];
|
||||
if let Some(dot_idx) = ports.rfind('.') {
|
||||
let http_port = &ports[..dot_idx];
|
||||
let grpc_port = &ports[dot_idx + 1..];
|
||||
// Only strip the suffix when both parts parse as real ports — leave
|
||||
// anything else (e.g. "host:abc.def") untouched so bad config surfaces
|
||||
// rather than being silently rewritten. Mirrors the validation already
|
||||
// done in `to_grpc_address` for the inverse direction.
|
||||
if let (Ok(_), Ok(_)) = (http_port.parse::<u16>(), grpc_port.parse::<u16>()) {
|
||||
return std::borrow::Cow::Owned(
|
||||
addr[..ports_sep_index + 1 + dot_idx].to_string(),
|
||||
);
|
||||
}
|
||||
}
|
||||
std::borrow::Cow::Borrowed(addr)
|
||||
}
|
||||
|
||||
fn request_remote_addr(request: &Request) -> Option<SocketAddr> {
|
||||
request
|
||||
.extensions()
|
||||
@@ -392,3 +424,78 @@ pub fn build_public_router(state: Arc<VolumeServerState>) -> Router {
|
||||
.layer(middleware::from_fn(common_headers_middleware))
|
||||
.with_state(state)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_to_http_address_strips_grpc_port_suffix() {
|
||||
assert_eq!(to_http_address("10.0.0.1:9333.19333"), "10.0.0.1:9333");
|
||||
assert_eq!(
|
||||
to_http_address("master.local:9333.19333"),
|
||||
"master.local:9333"
|
||||
);
|
||||
assert_eq!(to_http_address("10.85.183.6:5300.6300"), "10.85.183.6:5300");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_to_http_address_passthrough_without_grpc_suffix() {
|
||||
assert_eq!(to_http_address("10.0.0.1:9333"), "10.0.0.1:9333");
|
||||
assert_eq!(to_http_address("master.local:9333"), "master.local:9333");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_to_http_address_returns_input_when_unparseable() {
|
||||
assert_eq!(to_http_address(""), "");
|
||||
assert_eq!(to_http_address("no-port"), "no-port");
|
||||
// Trailing colon: nothing after the separator, treat as unparseable.
|
||||
assert_eq!(to_http_address("host:"), "host:");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_to_http_address_borrows_when_unchanged_and_owns_when_stripped() {
|
||||
// The common case (no suffix) must not allocate.
|
||||
let result = to_http_address("10.0.0.1:9333");
|
||||
assert!(matches!(result, std::borrow::Cow::Borrowed(_)));
|
||||
// Stripping requires a new string.
|
||||
let result = to_http_address("10.0.0.1:9333.19333");
|
||||
assert!(matches!(result, std::borrow::Cow::Owned(_)));
|
||||
assert_eq!(result, "10.0.0.1:9333");
|
||||
// Unparseable / passthrough also borrows.
|
||||
let result = to_http_address("host:abc.def");
|
||||
assert!(matches!(result, std::borrow::Cow::Borrowed(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_to_http_address_keeps_non_numeric_dotted_suffix() {
|
||||
// The dotted form is only valid when both sides are real port numbers.
|
||||
// Otherwise the address is malformed config (e.g. a hostname like
|
||||
// "host:abc.def"), and silently rewriting it would just hide the bug.
|
||||
assert_eq!(to_http_address("host:abc.def"), "host:abc.def");
|
||||
assert_eq!(to_http_address("host:9333.notaport"), "host:9333.notaport");
|
||||
assert_eq!(to_http_address("host:notaport.19333"), "host:notaport.19333");
|
||||
// Out-of-range ports must not be silently truncated either.
|
||||
assert_eq!(to_http_address("host:99999.19333"), "host:99999.19333");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_to_http_address_handles_bracketed_ipv6_literals() {
|
||||
// The function uses `rfind(':')`, so for bracketed IPv6 the port
|
||||
// separator is correctly identified as the colon AFTER the closing
|
||||
// bracket — making IPv4 and IPv6 behave the same.
|
||||
assert_eq!(to_http_address("[::1]:9333.19333"), "[::1]:9333");
|
||||
assert_eq!(
|
||||
to_http_address("[2001:db8::10]:5300.6300"),
|
||||
"[2001:db8::10]:5300"
|
||||
);
|
||||
// Plain bracketed IPv6 without a dotted suffix is borrowed unchanged.
|
||||
let result = to_http_address("[2001:db8::1]:9333");
|
||||
assert!(matches!(result, std::borrow::Cow::Borrowed(_)));
|
||||
assert_eq!(result, "[2001:db8::1]:9333");
|
||||
// Non-numeric / out-of-range / missing suffix all preserve the input.
|
||||
assert_eq!(to_http_address("[::1]:"), "[::1]:");
|
||||
assert_eq!(to_http_address("[::1]:abc.def"), "[::1]:abc.def");
|
||||
assert_eq!(to_http_address("[::1]:99999.19333"), "[::1]:99999.19333");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user