cluster: disallow mismatching versions from joining

This commit is contained in:
hayzam
2026-03-26 21:04:44 +05:30
parent e4ef66f73c
commit eafb681c8a
5 changed files with 138 additions and 12 deletions
+94 -7
View File
@@ -9,11 +9,13 @@
package clusterHandlers
import (
"encoding/json"
"fmt"
"net/http"
"strings"
"github.com/alchemillahq/sylve/internal"
"github.com/alchemillahq/sylve/internal/cmd"
clusterServiceInterfaces "github.com/alchemillahq/sylve/internal/interfaces/services/cluster"
"github.com/alchemillahq/sylve/internal/services/auth"
"github.com/alchemillahq/sylve/internal/services/cluster"
@@ -35,9 +37,10 @@ type JoinClusterRequest struct {
}
type AcceptJoinRequest struct {
NodeID string `json:"nodeId" binding:"required"`
NodeIP string `json:"nodeIp" binding:"required,ip"`
ClusterKey string `json:"clusterKey" binding:"required"`
NodeID string `json:"nodeId" binding:"required"`
NodeIP string `json:"nodeIp" binding:"required,ip"`
ClusterKey string `json:"clusterKey" binding:"required"`
NodeVersion string `json:"nodeVersion" binding:"required"`
}
type RemovePeerRequest struct {
@@ -48,6 +51,24 @@ func joinLeaderAPIHost(leaderIP string) string {
return cluster.ClusterAPIHost(leaderIP)
}
type basicHealthData struct {
SylveVersion string `json:"sylveVersion"`
}
func fetchNodeVersionFromHealth(healthURL string, payload any, headers map[string]string) (string, error) {
body, _, err := utils.HTTPPostJSONRead(healthURL, payload, headers)
if err != nil {
return "", err
}
var healthResp internal.APIResponse[basicHealthData]
if err := json.Unmarshal(body, &healthResp); err != nil {
return "", fmt.Errorf("decode_health_response_failed: %w", err)
}
return strings.TrimSpace(healthResp.Data.SylveVersion), nil
}
// @Summary Get Cluster
// @Description Get cluster details with information about RAFT nodes too
// @Tags Cluster
@@ -196,7 +217,8 @@ func JoinCluster(aS *auth.Service, cS *cluster.Service, zS *zelta.Service, fsm r
leaderAPIHost,
)
if err := utils.HTTPPostJSON(healthURL, req, headers); err != nil {
leaderVersion, err := fetchNodeVersionFromHealth(healthURL, req, headers)
if err != nil {
c.JSON(http.StatusInternalServerError, internal.APIResponse[any]{
Status: "error",
Message: "error_pinging_cluster_bad_leader_response",
@@ -206,6 +228,27 @@ func JoinCluster(aS *auth.Service, cS *cluster.Service, zS *zelta.Service, fsm r
return
}
localVersion := strings.TrimSpace(cmd.Version)
if leaderVersion == "" {
c.JSON(http.StatusConflict, internal.APIResponse[any]{
Status: "error",
Message: "cluster_version_mismatch",
Error: "leader_version_unavailable",
Data: nil,
})
return
}
if localVersion == "" || leaderVersion != localVersion {
c.JSON(http.StatusConflict, internal.APIResponse[any]{
Status: "error",
Message: "cluster_version_mismatch",
Error: fmt.Sprintf("leader=%s,node=%s", leaderVersion, localVersion),
Data: nil,
})
return
}
err = cS.StartAsJoiner(fsm, req.NodeIP, req.ClusterKey)
if err != nil {
c.JSON(http.StatusInternalServerError, internal.APIResponse[any]{
@@ -219,9 +262,10 @@ func JoinCluster(aS *auth.Service, cS *cluster.Service, zS *zelta.Service, fsm r
acceptURL := fmt.Sprintf("https://%s/api/cluster/accept-join", leaderAPIHost)
payload := map[string]any{
"nodeId": req.NodeID,
"nodeIp": req.NodeIP,
"clusterKey": req.ClusterKey,
"nodeId": req.NodeID,
"nodeIp": req.NodeIP,
"clusterKey": req.ClusterKey,
"nodeVersion": localVersion,
}
if err := utils.HTTPPostJSON(acceptURL, payload, headers); err != nil {
@@ -277,6 +321,49 @@ func AcceptJoin(cS *cluster.Service) gin.HandlerFunc {
return
}
localVersion := strings.TrimSpace(cmd.Version)
nodeVersion := strings.TrimSpace(req.NodeVersion)
if localVersion == "" || nodeVersion == "" || nodeVersion != localVersion {
c.JSON(http.StatusConflict, internal.APIResponse[any]{
Status: "error",
Message: "cluster_version_mismatch",
Error: fmt.Sprintf("leader=%s,node=%s", localVersion, nodeVersion),
Data: nil,
})
return
}
joinerHealthURL := fmt.Sprintf("https://%s/api/health/basic", cluster.ClusterAPIHost(req.NodeIP))
joinerVersion, err := fetchNodeVersionFromHealth(
joinerHealthURL,
map[string]any{"clusterKey": req.ClusterKey},
map[string]string{},
)
if err != nil || joinerVersion == "" {
reason := "joiner_version_unavailable"
if err != nil {
reason = fmt.Sprintf("joiner_version_unavailable: %v", err)
}
c.JSON(http.StatusConflict, internal.APIResponse[any]{
Status: "error",
Message: "cluster_version_mismatch",
Error: reason,
Data: nil,
})
return
}
if joinerVersion != localVersion || joinerVersion != nodeVersion {
c.JSON(http.StatusConflict, internal.APIResponse[any]{
Status: "error",
Message: "cluster_version_mismatch",
Error: fmt.Sprintf("leader=%s,node=%s", localVersion, joinerVersion),
Data: nil,
})
return
}
if err := cS.AcceptJoin(req.NodeID, req.NodeIP, req.ClusterKey); err != nil {
if strings.HasPrefix(err.Error(), "not_leader;") {
c.JSON(http.StatusConflict, internal.APIResponse[any]{
@@ -80,6 +80,29 @@ func TestAcceptJoinRejectsPayloadWithoutNodeIP(t *testing.T) {
}
}
func TestAcceptJoinRejectsVersionMismatch(t *testing.T) {
r := newClusterLifecycleValidationRouter()
rr := performJSONRequest(
t,
r,
http.MethodPost,
"/cluster/accept-join",
[]byte(`{"nodeId":"node-1","nodeIp":"10.0.0.2","clusterKey":"secret","nodeVersion":"0.0.0"}`),
)
if rr.Code != http.StatusConflict {
t.Fatalf("expected status 409, got %d with body %s", rr.Code, rr.Body.String())
}
var resp handlerAPIResponse[any]
if err := json.Unmarshal(rr.Body.Bytes(), &resp); err != nil {
t.Fatalf("invalid json response: %v", err)
}
if resp.Message != "cluster_version_mismatch" {
t.Fatalf("expected cluster_version_mismatch, got %q", resp.Message)
}
}
func TestJoinLeaderAPIHostUsesClusterHTTPSPort(t *testing.T) {
tests := []struct {
name string
+5 -3
View File
@@ -12,6 +12,7 @@ import (
"net/http"
"github.com/alchemillahq/sylve/internal"
"github.com/alchemillahq/sylve/internal/cmd"
"github.com/alchemillahq/sylve/internal/services/system"
"github.com/alchemillahq/sylve/pkg/utils"
@@ -46,9 +47,10 @@ func BasicHealthCheckHandler(systemService *system.Service) gin.HandlerFunc {
Status: "success",
Message: "Basic health is OK",
Data: gin.H{
"hostname": h,
"initialized": b.Initialized,
"restarted": b.Restarted,
"hostname": h,
"initialized": b.Initialized,
"restarted": b.Restarted,
"sylveVersion": cmd.Version,
},
})
}
+2 -1
View File
@@ -5,7 +5,8 @@ import { z } from 'zod/v4';
export const BasicHealthSchema = z.object({
hostname: z.string().optional(),
initialized: z.boolean().optional(),
restarted: z.boolean().optional()
restarted: z.boolean().optional(),
sylveVersion: z.string().optional()
});
export type BasicHealth = z.infer<typeof BasicHealthSchema>;
@@ -27,6 +27,19 @@
let properties = $state(options);
let loading = $state(false);
function getJoinErrorMessage(response: { message?: string; error?: string | string[] }): string {
const backendReportedMismatch =
response.message === 'cluster_version_mismatch' ||
(typeof response.error === 'string' && response.error.includes('leader=')) ||
(Array.isArray(response.error) && response.error.some((item) => item.includes('leader=')));
if (backendReportedMismatch) {
return 'Version mismatch: this node and the leader must run the same Sylve version';
}
return 'Unable to join cluster';
}
async function join() {
let error = '';
@@ -64,7 +77,7 @@
if (response.error) {
handleAPIError(response);
toast.error('Unable to join cluster', {
toast.error(getJoinErrorMessage(response), {
position: 'bottom-center'
});
return;