Export REST file_{read,write}_failures metrics on volume servers (#9215)

* Export gRPC `file_{read,write}_failures` metrics on volume servers.

Allows to track overall R/W errors in real time through Prometheus.
Will follow up with a PR for Seaweed's REST API.

* Export REST `file_{read,write}_failures` metrics on volume servers.
This commit is contained in:
Lisandro Pin
2026-04-24 20:45:21 +02:00
committed by GitHub
parent 352ffdffe1
commit 93247d6de4
2 changed files with 9 additions and 0 deletions
@@ -35,11 +35,13 @@ import (
const reqIsProxied = "proxied"
func NotFound(w http.ResponseWriter) {
stats.VolumeServerFileReadFailures.Inc()
stats.VolumeServerHandlerCounter.WithLabelValues(stats.ErrorGetNotFound).Inc()
w.WriteHeader(http.StatusNotFound)
}
func InternalError(w http.ResponseWriter) {
stats.VolumeServerFileReadFailures.Inc()
stats.VolumeServerHandlerCounter.WithLabelValues(stats.ErrorGetInternal).Inc()
w.WriteHeader(http.StatusInternalServerError)
}
@@ -11,6 +11,7 @@ import (
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/operation"
"github.com/seaweedfs/seaweedfs/weed/stats"
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
"github.com/seaweedfs/seaweedfs/weed/topology"
"github.com/seaweedfs/seaweedfs/weed/util/buffer_pool"
@@ -28,6 +29,7 @@ func (vs *VolumeServer) PostHandler(w http.ResponseWriter, r *http.Request) {
volumeId, ve := needle.NewVolumeId(vid)
if ve != nil {
glog.V(0).InfolnCtx(ctx, "NewVolumeId error:", ve)
stats.VolumeServerFileWriteFailures.Inc()
writeJsonError(w, r, http.StatusBadRequest, ve)
return
}
@@ -42,6 +44,7 @@ func (vs *VolumeServer) PostHandler(w http.ResponseWriter, r *http.Request) {
reqNeedle, originalSize, contentMd5, ne := needle.CreateNeedleFromRequest(r, vs.FixJpgOrientation, vs.fileSizeLimitBytes, bytesBuffer)
if ne != nil {
stats.VolumeServerFileWriteFailures.Inc()
writeJsonError(w, r, http.StatusBadRequest, ne)
return
}
@@ -50,6 +53,7 @@ func (vs *VolumeServer) PostHandler(w http.ResponseWriter, r *http.Request) {
// use context.WithoutCancel to avoid context cancellation when the client connection is closed
isUnchanged, writeError := topology.ReplicatedWrite(context.WithoutCancel(ctx), vs.GetMaster, vs.grpcDialOption, vs.store, volumeId, reqNeedle, r, contentMd5)
if writeError != nil {
stats.VolumeServerFileWriteFailures.Inc()
writeJsonError(w, r, http.StatusInternalServerError, writeError)
return
}
@@ -117,11 +121,13 @@ func (vs *VolumeServer) DeleteHandler(w http.ResponseWriter, r *http.Request) {
if n.IsChunkedManifest() {
chunkManifest, e := operation.LoadChunkManifest(n.Data, n.IsCompressed())
if e != nil {
stats.VolumeServerFileWriteFailures.Inc()
writeJsonError(w, r, http.StatusInternalServerError, fmt.Errorf("Load chunks manifest error: %v", e))
return
}
// make sure all chunks had deleted before delete manifest
if e := chunkManifest.DeleteChunks(vs.GetMaster, false, vs.grpcDialOption); e != nil {
stats.VolumeServerFileWriteFailures.Inc()
writeJsonError(w, r, http.StatusInternalServerError, fmt.Errorf("Delete chunks error: %v", e))
return
}
@@ -148,6 +154,7 @@ func writeDeleteResult(err error, count int64, w http.ResponseWriter, r *http.Re
m["size"] = count
writeJsonQuiet(w, r, http.StatusAccepted, m)
} else {
stats.VolumeServerFileWriteFailures.Inc()
writeJsonError(w, r, http.StatusInternalServerError, fmt.Errorf("Deletion Failed: %w", err))
}
}