mirror of
https://github.com/AlchemillaHQ/Sylve.git
synced 2026-06-14 00:46:34 +03:00
cluster: disallow mismatching versions from joining
This commit is contained in:
@@ -9,11 +9,13 @@
|
||||
package clusterHandlers
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/alchemillahq/sylve/internal"
|
||||
"github.com/alchemillahq/sylve/internal/cmd"
|
||||
clusterServiceInterfaces "github.com/alchemillahq/sylve/internal/interfaces/services/cluster"
|
||||
"github.com/alchemillahq/sylve/internal/services/auth"
|
||||
"github.com/alchemillahq/sylve/internal/services/cluster"
|
||||
@@ -35,9 +37,10 @@ type JoinClusterRequest struct {
|
||||
}
|
||||
|
||||
type AcceptJoinRequest struct {
|
||||
NodeID string `json:"nodeId" binding:"required"`
|
||||
NodeIP string `json:"nodeIp" binding:"required,ip"`
|
||||
ClusterKey string `json:"clusterKey" binding:"required"`
|
||||
NodeID string `json:"nodeId" binding:"required"`
|
||||
NodeIP string `json:"nodeIp" binding:"required,ip"`
|
||||
ClusterKey string `json:"clusterKey" binding:"required"`
|
||||
NodeVersion string `json:"nodeVersion" binding:"required"`
|
||||
}
|
||||
|
||||
type RemovePeerRequest struct {
|
||||
@@ -48,6 +51,24 @@ func joinLeaderAPIHost(leaderIP string) string {
|
||||
return cluster.ClusterAPIHost(leaderIP)
|
||||
}
|
||||
|
||||
type basicHealthData struct {
|
||||
SylveVersion string `json:"sylveVersion"`
|
||||
}
|
||||
|
||||
func fetchNodeVersionFromHealth(healthURL string, payload any, headers map[string]string) (string, error) {
|
||||
body, _, err := utils.HTTPPostJSONRead(healthURL, payload, headers)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
var healthResp internal.APIResponse[basicHealthData]
|
||||
if err := json.Unmarshal(body, &healthResp); err != nil {
|
||||
return "", fmt.Errorf("decode_health_response_failed: %w", err)
|
||||
}
|
||||
|
||||
return strings.TrimSpace(healthResp.Data.SylveVersion), nil
|
||||
}
|
||||
|
||||
// @Summary Get Cluster
|
||||
// @Description Get cluster details with information about RAFT nodes too
|
||||
// @Tags Cluster
|
||||
@@ -196,7 +217,8 @@ func JoinCluster(aS *auth.Service, cS *cluster.Service, zS *zelta.Service, fsm r
|
||||
leaderAPIHost,
|
||||
)
|
||||
|
||||
if err := utils.HTTPPostJSON(healthURL, req, headers); err != nil {
|
||||
leaderVersion, err := fetchNodeVersionFromHealth(healthURL, req, headers)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, internal.APIResponse[any]{
|
||||
Status: "error",
|
||||
Message: "error_pinging_cluster_bad_leader_response",
|
||||
@@ -206,6 +228,27 @@ func JoinCluster(aS *auth.Service, cS *cluster.Service, zS *zelta.Service, fsm r
|
||||
return
|
||||
}
|
||||
|
||||
localVersion := strings.TrimSpace(cmd.Version)
|
||||
if leaderVersion == "" {
|
||||
c.JSON(http.StatusConflict, internal.APIResponse[any]{
|
||||
Status: "error",
|
||||
Message: "cluster_version_mismatch",
|
||||
Error: "leader_version_unavailable",
|
||||
Data: nil,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
if localVersion == "" || leaderVersion != localVersion {
|
||||
c.JSON(http.StatusConflict, internal.APIResponse[any]{
|
||||
Status: "error",
|
||||
Message: "cluster_version_mismatch",
|
||||
Error: fmt.Sprintf("leader=%s,node=%s", leaderVersion, localVersion),
|
||||
Data: nil,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
err = cS.StartAsJoiner(fsm, req.NodeIP, req.ClusterKey)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, internal.APIResponse[any]{
|
||||
@@ -219,9 +262,10 @@ func JoinCluster(aS *auth.Service, cS *cluster.Service, zS *zelta.Service, fsm r
|
||||
|
||||
acceptURL := fmt.Sprintf("https://%s/api/cluster/accept-join", leaderAPIHost)
|
||||
payload := map[string]any{
|
||||
"nodeId": req.NodeID,
|
||||
"nodeIp": req.NodeIP,
|
||||
"clusterKey": req.ClusterKey,
|
||||
"nodeId": req.NodeID,
|
||||
"nodeIp": req.NodeIP,
|
||||
"clusterKey": req.ClusterKey,
|
||||
"nodeVersion": localVersion,
|
||||
}
|
||||
|
||||
if err := utils.HTTPPostJSON(acceptURL, payload, headers); err != nil {
|
||||
@@ -277,6 +321,49 @@ func AcceptJoin(cS *cluster.Service) gin.HandlerFunc {
|
||||
return
|
||||
}
|
||||
|
||||
localVersion := strings.TrimSpace(cmd.Version)
|
||||
nodeVersion := strings.TrimSpace(req.NodeVersion)
|
||||
if localVersion == "" || nodeVersion == "" || nodeVersion != localVersion {
|
||||
c.JSON(http.StatusConflict, internal.APIResponse[any]{
|
||||
Status: "error",
|
||||
Message: "cluster_version_mismatch",
|
||||
Error: fmt.Sprintf("leader=%s,node=%s", localVersion, nodeVersion),
|
||||
Data: nil,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
joinerHealthURL := fmt.Sprintf("https://%s/api/health/basic", cluster.ClusterAPIHost(req.NodeIP))
|
||||
joinerVersion, err := fetchNodeVersionFromHealth(
|
||||
joinerHealthURL,
|
||||
map[string]any{"clusterKey": req.ClusterKey},
|
||||
map[string]string{},
|
||||
)
|
||||
if err != nil || joinerVersion == "" {
|
||||
reason := "joiner_version_unavailable"
|
||||
if err != nil {
|
||||
reason = fmt.Sprintf("joiner_version_unavailable: %v", err)
|
||||
}
|
||||
|
||||
c.JSON(http.StatusConflict, internal.APIResponse[any]{
|
||||
Status: "error",
|
||||
Message: "cluster_version_mismatch",
|
||||
Error: reason,
|
||||
Data: nil,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
if joinerVersion != localVersion || joinerVersion != nodeVersion {
|
||||
c.JSON(http.StatusConflict, internal.APIResponse[any]{
|
||||
Status: "error",
|
||||
Message: "cluster_version_mismatch",
|
||||
Error: fmt.Sprintf("leader=%s,node=%s", localVersion, joinerVersion),
|
||||
Data: nil,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
if err := cS.AcceptJoin(req.NodeID, req.NodeIP, req.ClusterKey); err != nil {
|
||||
if strings.HasPrefix(err.Error(), "not_leader;") {
|
||||
c.JSON(http.StatusConflict, internal.APIResponse[any]{
|
||||
|
||||
@@ -80,6 +80,29 @@ func TestAcceptJoinRejectsPayloadWithoutNodeIP(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestAcceptJoinRejectsVersionMismatch(t *testing.T) {
|
||||
r := newClusterLifecycleValidationRouter()
|
||||
|
||||
rr := performJSONRequest(
|
||||
t,
|
||||
r,
|
||||
http.MethodPost,
|
||||
"/cluster/accept-join",
|
||||
[]byte(`{"nodeId":"node-1","nodeIp":"10.0.0.2","clusterKey":"secret","nodeVersion":"0.0.0"}`),
|
||||
)
|
||||
if rr.Code != http.StatusConflict {
|
||||
t.Fatalf("expected status 409, got %d with body %s", rr.Code, rr.Body.String())
|
||||
}
|
||||
|
||||
var resp handlerAPIResponse[any]
|
||||
if err := json.Unmarshal(rr.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("invalid json response: %v", err)
|
||||
}
|
||||
if resp.Message != "cluster_version_mismatch" {
|
||||
t.Fatalf("expected cluster_version_mismatch, got %q", resp.Message)
|
||||
}
|
||||
}
|
||||
|
||||
func TestJoinLeaderAPIHostUsesClusterHTTPSPort(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"net/http"
|
||||
|
||||
"github.com/alchemillahq/sylve/internal"
|
||||
"github.com/alchemillahq/sylve/internal/cmd"
|
||||
"github.com/alchemillahq/sylve/internal/services/system"
|
||||
"github.com/alchemillahq/sylve/pkg/utils"
|
||||
|
||||
@@ -46,9 +47,10 @@ func BasicHealthCheckHandler(systemService *system.Service) gin.HandlerFunc {
|
||||
Status: "success",
|
||||
Message: "Basic health is OK",
|
||||
Data: gin.H{
|
||||
"hostname": h,
|
||||
"initialized": b.Initialized,
|
||||
"restarted": b.Restarted,
|
||||
"hostname": h,
|
||||
"initialized": b.Initialized,
|
||||
"restarted": b.Restarted,
|
||||
"sylveVersion": cmd.Version,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
@@ -5,7 +5,8 @@ import { z } from 'zod/v4';
|
||||
export const BasicHealthSchema = z.object({
|
||||
hostname: z.string().optional(),
|
||||
initialized: z.boolean().optional(),
|
||||
restarted: z.boolean().optional()
|
||||
restarted: z.boolean().optional(),
|
||||
sylveVersion: z.string().optional()
|
||||
});
|
||||
|
||||
export type BasicHealth = z.infer<typeof BasicHealthSchema>;
|
||||
|
||||
@@ -27,6 +27,19 @@
|
||||
let properties = $state(options);
|
||||
let loading = $state(false);
|
||||
|
||||
function getJoinErrorMessage(response: { message?: string; error?: string | string[] }): string {
|
||||
const backendReportedMismatch =
|
||||
response.message === 'cluster_version_mismatch' ||
|
||||
(typeof response.error === 'string' && response.error.includes('leader=')) ||
|
||||
(Array.isArray(response.error) && response.error.some((item) => item.includes('leader=')));
|
||||
|
||||
if (backendReportedMismatch) {
|
||||
return 'Version mismatch: this node and the leader must run the same Sylve version';
|
||||
}
|
||||
|
||||
return 'Unable to join cluster';
|
||||
}
|
||||
|
||||
async function join() {
|
||||
let error = '';
|
||||
|
||||
@@ -64,7 +77,7 @@
|
||||
|
||||
if (response.error) {
|
||||
handleAPIError(response);
|
||||
toast.error('Unable to join cluster', {
|
||||
toast.error(getJoinErrorMessage(response), {
|
||||
position: 'bottom-center'
|
||||
});
|
||||
return;
|
||||
|
||||
Reference in New Issue
Block a user