Files
seaweedfs/weed/filer/postgres/postgres_collation.go
T
Chris Lu a24f4844d3 filer: keep S3 list order byte-lexicographic regardless of SQL name column collation (#9824)
* mysql: keep S3 list order byte-lexicographic regardless of name column collation

ORDER BY name and the name > ? pagination predicate follow the column
collation, so a case-insensitive filemeta.name (e.g. utf8mb3_general_ci)
returns S3 keys out of byte order and breaks clients that merge two sorted
listings.

Detect the live name collation at startup; only when it isn't binary, wrap
the list comparison, prefix, and ORDER BY in BINARY name so order and
pagination stay consistent. Correctly configured utf8mb4_bin tables keep
their indexed range scan unchanged, and the operator gets a warning to
convert the column.

* postgres: keep S3 list order byte-lexicographic regardless of name column collation

ORDER BY name and the name > $n pagination predicate follow the column or
database collation, so a locale-aware filemeta.name (e.g. the en_US.UTF-8
database default) returns S3 keys out of byte order and breaks clients that
merge two sorted listings.

Detect the live name collation at startup; only when it isn't byte-ordered,
wrap the list comparison, prefix, and ORDER BY in COLLATE "C" so order and
pagination stay consistent. A byte-ordered (C/POSIX/C.UTF-8) column keeps its
indexed range scan unchanged, and the operator gets a warning to declare the
column COLLATE "C".
2026-06-04 14:33:41 -07:00

54 lines
2.1 KiB
Go

package postgres
import (
"database/sql"
"strings"
"github.com/seaweedfs/seaweedfs/weed/filer/abstract_sql"
"github.com/seaweedfs/seaweedfs/weed/glog"
)
// ConfigureListOrdering switches the list queries to COLLATE "C" when the live
// filemeta.name collation is locale-aware, so S3 ListObjectsV2 stays
// lexicographic. The fallback costs a sort, so warn the operator to fix it.
func ConfigureListOrdering(db *sql.DB, gen *SqlGenPostgres) {
collation, isBinary, err := nameColumnCollation(db)
if err != nil {
glog.V(1).Infof("postgres: skip filemeta.name collation check: %v", err)
return
}
if isBinary {
return
}
gen.ForceBinaryCollation = true
glog.Warningf(`postgres: filemeta.name collation %q is not byte-ordered, so S3 list order is not byte-lexicographic and clients that merge sorted listings may report spurious diffs. Falling back to a slower COLLATE "C" sort; declare the name column COLLATE "C" (or use a C/C.UTF-8 database collation) for correct, indexed ordering.`, collation)
}
func nameColumnCollation(db *sql.DB) (collation string, isBinary bool, err error) {
// information_schema reports NULL for a column on the database default, so
// fall back to datcollate for the effective ordering.
row := db.QueryRow(`SELECT
(SELECT collation_name FROM information_schema.columns
WHERE table_schema = current_schema() AND table_name = $1 AND column_name = 'name'),
(SELECT datcollate FROM pg_database WHERE datname = current_database())`, abstract_sql.DEFAULT_TABLE)
var columnCollation, dbCollate sql.NullString
if err = row.Scan(&columnCollation, &dbCollate); err != nil {
return "", false, err
}
effective := columnCollation.String
if !columnCollation.Valid || effective == "" {
effective = dbCollate.String
}
return effective, isByteOrderedCollation(effective), nil
}
// isByteOrderedCollation reports whether a Postgres locale sorts by byte value
// (C, POSIX, C.UTF-8) rather than locale order (en_US.UTF-8, ICU, ...).
func isByteOrderedCollation(name string) bool {
switch strings.ToLower(strings.TrimSpace(name)) {
case "c", "posix", "c.utf-8", "c.utf8":
return true
}
return false
}