From eafb681c8a4fd1d0f540dfc86adf7d110278be75 Mon Sep 17 00:00:00 2001 From: hayzam Date: Thu, 26 Mar 2026 21:04:44 +0530 Subject: [PATCH] cluster: disallow mismatching versions from joining --- internal/handlers/cluster/cluster.go | 101 ++++++++++++++++-- .../handlers/cluster/cluster_join_test.go | 23 ++++ internal/handlers/health.go | 8 +- web/src/lib/api/system/system.ts | 3 +- .../lib/components/custom/Cluster/Join.svelte | 15 ++- 5 files changed, 138 insertions(+), 12 deletions(-) diff --git a/internal/handlers/cluster/cluster.go b/internal/handlers/cluster/cluster.go index 5ac16202..395b799a 100644 --- a/internal/handlers/cluster/cluster.go +++ b/internal/handlers/cluster/cluster.go @@ -9,11 +9,13 @@ package clusterHandlers import ( + "encoding/json" "fmt" "net/http" "strings" "github.com/alchemillahq/sylve/internal" + "github.com/alchemillahq/sylve/internal/cmd" clusterServiceInterfaces "github.com/alchemillahq/sylve/internal/interfaces/services/cluster" "github.com/alchemillahq/sylve/internal/services/auth" "github.com/alchemillahq/sylve/internal/services/cluster" @@ -35,9 +37,10 @@ type JoinClusterRequest struct { } type AcceptJoinRequest struct { - NodeID string `json:"nodeId" binding:"required"` - NodeIP string `json:"nodeIp" binding:"required,ip"` - ClusterKey string `json:"clusterKey" binding:"required"` + NodeID string `json:"nodeId" binding:"required"` + NodeIP string `json:"nodeIp" binding:"required,ip"` + ClusterKey string `json:"clusterKey" binding:"required"` + NodeVersion string `json:"nodeVersion" binding:"required"` } type RemovePeerRequest struct { @@ -48,6 +51,24 @@ func joinLeaderAPIHost(leaderIP string) string { return cluster.ClusterAPIHost(leaderIP) } +type basicHealthData struct { + SylveVersion string `json:"sylveVersion"` +} + +func fetchNodeVersionFromHealth(healthURL string, payload any, headers map[string]string) (string, error) { + body, _, err := utils.HTTPPostJSONRead(healthURL, payload, headers) + if err != nil { + return "", err + } + + var healthResp internal.APIResponse[basicHealthData] + if err := json.Unmarshal(body, &healthResp); err != nil { + return "", fmt.Errorf("decode_health_response_failed: %w", err) + } + + return strings.TrimSpace(healthResp.Data.SylveVersion), nil +} + // @Summary Get Cluster // @Description Get cluster details with information about RAFT nodes too // @Tags Cluster @@ -196,7 +217,8 @@ func JoinCluster(aS *auth.Service, cS *cluster.Service, zS *zelta.Service, fsm r leaderAPIHost, ) - if err := utils.HTTPPostJSON(healthURL, req, headers); err != nil { + leaderVersion, err := fetchNodeVersionFromHealth(healthURL, req, headers) + if err != nil { c.JSON(http.StatusInternalServerError, internal.APIResponse[any]{ Status: "error", Message: "error_pinging_cluster_bad_leader_response", @@ -206,6 +228,27 @@ func JoinCluster(aS *auth.Service, cS *cluster.Service, zS *zelta.Service, fsm r return } + localVersion := strings.TrimSpace(cmd.Version) + if leaderVersion == "" { + c.JSON(http.StatusConflict, internal.APIResponse[any]{ + Status: "error", + Message: "cluster_version_mismatch", + Error: "leader_version_unavailable", + Data: nil, + }) + return + } + + if localVersion == "" || leaderVersion != localVersion { + c.JSON(http.StatusConflict, internal.APIResponse[any]{ + Status: "error", + Message: "cluster_version_mismatch", + Error: fmt.Sprintf("leader=%s,node=%s", leaderVersion, localVersion), + Data: nil, + }) + return + } + err = cS.StartAsJoiner(fsm, req.NodeIP, req.ClusterKey) if err != nil { c.JSON(http.StatusInternalServerError, internal.APIResponse[any]{ @@ -219,9 +262,10 @@ func JoinCluster(aS *auth.Service, cS *cluster.Service, zS *zelta.Service, fsm r acceptURL := fmt.Sprintf("https://%s/api/cluster/accept-join", leaderAPIHost) payload := map[string]any{ - "nodeId": req.NodeID, - "nodeIp": req.NodeIP, - "clusterKey": req.ClusterKey, + "nodeId": req.NodeID, + "nodeIp": req.NodeIP, + "clusterKey": req.ClusterKey, + "nodeVersion": localVersion, } if err := utils.HTTPPostJSON(acceptURL, payload, headers); err != nil { @@ -277,6 +321,49 @@ func AcceptJoin(cS *cluster.Service) gin.HandlerFunc { return } + localVersion := strings.TrimSpace(cmd.Version) + nodeVersion := strings.TrimSpace(req.NodeVersion) + if localVersion == "" || nodeVersion == "" || nodeVersion != localVersion { + c.JSON(http.StatusConflict, internal.APIResponse[any]{ + Status: "error", + Message: "cluster_version_mismatch", + Error: fmt.Sprintf("leader=%s,node=%s", localVersion, nodeVersion), + Data: nil, + }) + return + } + + joinerHealthURL := fmt.Sprintf("https://%s/api/health/basic", cluster.ClusterAPIHost(req.NodeIP)) + joinerVersion, err := fetchNodeVersionFromHealth( + joinerHealthURL, + map[string]any{"clusterKey": req.ClusterKey}, + map[string]string{}, + ) + if err != nil || joinerVersion == "" { + reason := "joiner_version_unavailable" + if err != nil { + reason = fmt.Sprintf("joiner_version_unavailable: %v", err) + } + + c.JSON(http.StatusConflict, internal.APIResponse[any]{ + Status: "error", + Message: "cluster_version_mismatch", + Error: reason, + Data: nil, + }) + return + } + + if joinerVersion != localVersion || joinerVersion != nodeVersion { + c.JSON(http.StatusConflict, internal.APIResponse[any]{ + Status: "error", + Message: "cluster_version_mismatch", + Error: fmt.Sprintf("leader=%s,node=%s", localVersion, joinerVersion), + Data: nil, + }) + return + } + if err := cS.AcceptJoin(req.NodeID, req.NodeIP, req.ClusterKey); err != nil { if strings.HasPrefix(err.Error(), "not_leader;") { c.JSON(http.StatusConflict, internal.APIResponse[any]{ diff --git a/internal/handlers/cluster/cluster_join_test.go b/internal/handlers/cluster/cluster_join_test.go index dfa08daf..9837d8ff 100644 --- a/internal/handlers/cluster/cluster_join_test.go +++ b/internal/handlers/cluster/cluster_join_test.go @@ -80,6 +80,29 @@ func TestAcceptJoinRejectsPayloadWithoutNodeIP(t *testing.T) { } } +func TestAcceptJoinRejectsVersionMismatch(t *testing.T) { + r := newClusterLifecycleValidationRouter() + + rr := performJSONRequest( + t, + r, + http.MethodPost, + "/cluster/accept-join", + []byte(`{"nodeId":"node-1","nodeIp":"10.0.0.2","clusterKey":"secret","nodeVersion":"0.0.0"}`), + ) + if rr.Code != http.StatusConflict { + t.Fatalf("expected status 409, got %d with body %s", rr.Code, rr.Body.String()) + } + + var resp handlerAPIResponse[any] + if err := json.Unmarshal(rr.Body.Bytes(), &resp); err != nil { + t.Fatalf("invalid json response: %v", err) + } + if resp.Message != "cluster_version_mismatch" { + t.Fatalf("expected cluster_version_mismatch, got %q", resp.Message) + } +} + func TestJoinLeaderAPIHostUsesClusterHTTPSPort(t *testing.T) { tests := []struct { name string diff --git a/internal/handlers/health.go b/internal/handlers/health.go index 97c44ea8..5b6116e9 100644 --- a/internal/handlers/health.go +++ b/internal/handlers/health.go @@ -12,6 +12,7 @@ import ( "net/http" "github.com/alchemillahq/sylve/internal" + "github.com/alchemillahq/sylve/internal/cmd" "github.com/alchemillahq/sylve/internal/services/system" "github.com/alchemillahq/sylve/pkg/utils" @@ -46,9 +47,10 @@ func BasicHealthCheckHandler(systemService *system.Service) gin.HandlerFunc { Status: "success", Message: "Basic health is OK", Data: gin.H{ - "hostname": h, - "initialized": b.Initialized, - "restarted": b.Restarted, + "hostname": h, + "initialized": b.Initialized, + "restarted": b.Restarted, + "sylveVersion": cmd.Version, }, }) } diff --git a/web/src/lib/api/system/system.ts b/web/src/lib/api/system/system.ts index 51284781..b24fe77d 100644 --- a/web/src/lib/api/system/system.ts +++ b/web/src/lib/api/system/system.ts @@ -5,7 +5,8 @@ import { z } from 'zod/v4'; export const BasicHealthSchema = z.object({ hostname: z.string().optional(), initialized: z.boolean().optional(), - restarted: z.boolean().optional() + restarted: z.boolean().optional(), + sylveVersion: z.string().optional() }); export type BasicHealth = z.infer; diff --git a/web/src/lib/components/custom/Cluster/Join.svelte b/web/src/lib/components/custom/Cluster/Join.svelte index 900e28b0..6cec1f24 100644 --- a/web/src/lib/components/custom/Cluster/Join.svelte +++ b/web/src/lib/components/custom/Cluster/Join.svelte @@ -27,6 +27,19 @@ let properties = $state(options); let loading = $state(false); + function getJoinErrorMessage(response: { message?: string; error?: string | string[] }): string { + const backendReportedMismatch = + response.message === 'cluster_version_mismatch' || + (typeof response.error === 'string' && response.error.includes('leader=')) || + (Array.isArray(response.error) && response.error.some((item) => item.includes('leader='))); + + if (backendReportedMismatch) { + return 'Version mismatch: this node and the leader must run the same Sylve version'; + } + + return 'Unable to join cluster'; + } + async function join() { let error = ''; @@ -64,7 +77,7 @@ if (response.error) { handleAPIError(response); - toast.error('Unable to join cluster', { + toast.error(getJoinErrorMessage(response), { position: 'bottom-center' }); return;