mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2026-06-13 23:36:45 +03:00
filer: keep S3 list order byte-lexicographic regardless of SQL name column collation (#9824)
* mysql: keep S3 list order byte-lexicographic regardless of name column collation ORDER BY name and the name > ? pagination predicate follow the column collation, so a case-insensitive filemeta.name (e.g. utf8mb3_general_ci) returns S3 keys out of byte order and breaks clients that merge two sorted listings. Detect the live name collation at startup; only when it isn't binary, wrap the list comparison, prefix, and ORDER BY in BINARY name so order and pagination stay consistent. Correctly configured utf8mb4_bin tables keep their indexed range scan unchanged, and the operator gets a warning to convert the column. * postgres: keep S3 list order byte-lexicographic regardless of name column collation ORDER BY name and the name > $n pagination predicate follow the column or database collation, so a locale-aware filemeta.name (e.g. the en_US.UTF-8 database default) returns S3 keys out of byte order and breaks clients that merge two sorted listings. Detect the live name collation at startup; only when it isn't byte-ordered, wrap the list comparison, prefix, and ORDER BY in COLLATE "C" so order and pagination stay consistent. A byte-ordered (C/POSIX/C.UTF-8) column keeps its indexed range scan unchanged, and the operator gets a warning to declare the column COLLATE "C".
This commit is contained in:
@@ -0,0 +1,41 @@
|
||||
package mysql
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"strings"
|
||||
|
||||
"github.com/seaweedfs/seaweedfs/weed/filer/abstract_sql"
|
||||
"github.com/seaweedfs/seaweedfs/weed/glog"
|
||||
)
|
||||
|
||||
// ConfigureListOrdering switches the list queries to BINARY byte ordering when
|
||||
// the live filemeta.name collation is not binary, so S3 ListObjectsV2 stays
|
||||
// lexicographic. The fallback costs a filesort, so warn the operator to fix it.
|
||||
func ConfigureListOrdering(db *sql.DB, gen *SqlGenMysql) {
|
||||
collation, isBinary, err := nameColumnCollation(db)
|
||||
if err != nil {
|
||||
glog.V(1).Infof("mysql: skip filemeta.name collation check: %v", err)
|
||||
return
|
||||
}
|
||||
if isBinary {
|
||||
return
|
||||
}
|
||||
gen.ForceBinaryCollation = true
|
||||
glog.Warningf("mysql: filemeta.name collation %q is not binary, so S3 list order is not byte-lexicographic and clients that merge sorted listings may report spurious diffs. Falling back to a slower BINARY sort; convert the name column to a *_bin collation (e.g. utf8mb4_bin) for correct, indexed ordering.", collation)
|
||||
}
|
||||
|
||||
func nameColumnCollation(db *sql.DB) (collation string, isBinary bool, err error) {
|
||||
row := db.QueryRow("SELECT COLLATION_NAME FROM information_schema.COLUMNS WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = ? AND COLUMN_NAME = 'name'", abstract_sql.DEFAULT_TABLE)
|
||||
var c sql.NullString
|
||||
if err = row.Scan(&c); err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
return c.String, isBinaryCollation(c.String), nil
|
||||
}
|
||||
|
||||
// isBinaryCollation reports whether a collation orders by byte value: a NULL
|
||||
// collation (BINARY column) or any *_bin collation.
|
||||
func isBinaryCollation(collation string) bool {
|
||||
c := strings.ToLower(strings.TrimSpace(collation))
|
||||
return c == "" || c == "binary" || strings.HasSuffix(c, "_bin")
|
||||
}
|
||||
@@ -11,6 +11,8 @@ type SqlGenMysql struct {
|
||||
CreateTableSqlTemplate string
|
||||
DropTableSqlTemplate string
|
||||
UpsertQueryTemplate string
|
||||
// Force byte ordering on a non-binary name column; see ConfigureListOrdering.
|
||||
ForceBinaryCollation bool
|
||||
}
|
||||
|
||||
// DefaultUpsertQuery keeps INSERTs idempotent so the inode-index KvPut
|
||||
@@ -48,12 +50,22 @@ func (gen *SqlGenMysql) GetSqlDeleteFolderChildren(tableName string) string {
|
||||
return fmt.Sprintf("DELETE FROM `%s` WHERE `dirhash` = ? AND `directory` = ?", tableName)
|
||||
}
|
||||
|
||||
// nameExpr forces byte ordering on a non-binary column at the cost of a filesort.
|
||||
func (gen *SqlGenMysql) nameExpr() string {
|
||||
if gen.ForceBinaryCollation {
|
||||
return "BINARY `name`"
|
||||
}
|
||||
return "`name`"
|
||||
}
|
||||
|
||||
func (gen *SqlGenMysql) GetSqlListExclusive(tableName string) string {
|
||||
return fmt.Sprintf("SELECT `name`, `meta` FROM `%s` WHERE `dirhash` = ? AND `name` > ? AND `directory` = ? AND `name` LIKE ? ORDER BY `name` ASC LIMIT ?", tableName)
|
||||
name := gen.nameExpr()
|
||||
return fmt.Sprintf("SELECT `name`, `meta` FROM `%s` WHERE `dirhash` = ? AND %s > ? AND `directory` = ? AND %s LIKE ? ORDER BY %s ASC LIMIT ?", tableName, name, name, name)
|
||||
}
|
||||
|
||||
func (gen *SqlGenMysql) GetSqlListInclusive(tableName string) string {
|
||||
return fmt.Sprintf("SELECT `name`, `meta` FROM `%s` WHERE `dirhash` = ? AND `name` >= ? AND `directory` = ? AND `name` LIKE ? ORDER BY `name` ASC LIMIT ?", tableName)
|
||||
name := gen.nameExpr()
|
||||
return fmt.Sprintf("SELECT `name`, `meta` FROM `%s` WHERE `dirhash` = ? AND %s >= ? AND `directory` = ? AND %s LIKE ? ORDER BY %s ASC LIMIT ?", tableName, name, name, name)
|
||||
}
|
||||
|
||||
func (gen *SqlGenMysql) GetSqlCreateTable(tableName string) string {
|
||||
|
||||
@@ -26,3 +26,45 @@ func TestEmptyUpsertTemplateFallsBackToPlainInsert(t *testing.T) {
|
||||
t.Fatalf("plain INSERT path should not contain ON DUPLICATE KEY UPDATE, got: %s", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListSqlDefaultOrderingFollowsColumn(t *testing.T) {
|
||||
gen := &SqlGenMysql{}
|
||||
for _, got := range []string{gen.GetSqlListExclusive("filemeta"), gen.GetSqlListInclusive("filemeta")} {
|
||||
if strings.Contains(got, "BINARY") {
|
||||
t.Fatalf("default list query should not force BINARY, got: %s", got)
|
||||
}
|
||||
if !strings.Contains(got, "ORDER BY `name` ASC") {
|
||||
t.Fatalf("expected plain name ordering, got: %s", got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestListSqlBinaryOrderingOnNonBinaryColumn(t *testing.T) {
|
||||
gen := &SqlGenMysql{ForceBinaryCollation: true}
|
||||
for _, got := range []string{gen.GetSqlListExclusive("filemeta"), gen.GetSqlListInclusive("filemeta")} {
|
||||
if !strings.Contains(got, "ORDER BY BINARY `name` ASC") {
|
||||
t.Fatalf("expected BINARY ordering, got: %s", got)
|
||||
}
|
||||
if !strings.Contains(got, "BINARY `name` LIKE ?") {
|
||||
t.Fatalf("expected BINARY prefix filter, got: %s", got)
|
||||
}
|
||||
if strings.Contains(got, "AND `name` > ?") || strings.Contains(got, "AND `name` >= ?") {
|
||||
t.Fatalf("pagination comparison must also be BINARY, got: %s", got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsBinaryCollation(t *testing.T) {
|
||||
binary := []string{"", "binary", "utf8mb4_bin", "utf8mb3_bin", "latin1_bin", "UTF8MB4_BIN"}
|
||||
for _, c := range binary {
|
||||
if !isBinaryCollation(c) {
|
||||
t.Fatalf("expected %q to be treated as binary", c)
|
||||
}
|
||||
}
|
||||
ci := []string{"utf8mb4_general_ci", "utf8mb3_general_ci", "utf8mb4_0900_ai_ci", "latin1_swedish_ci"}
|
||||
for _, c := range ci {
|
||||
if isBinaryCollation(c) {
|
||||
t.Fatalf("expected %q to be treated as non-binary", c)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -70,11 +70,12 @@ func (store *MysqlStore) initialize(dsn string, upsertQuery string, enableUpsert
|
||||
} else if upsertQuery == "" {
|
||||
upsertQuery = DefaultUpsertQuery
|
||||
}
|
||||
store.SqlGenerator = &SqlGenMysql{
|
||||
gen := &SqlGenMysql{
|
||||
CreateTableSqlTemplate: "",
|
||||
DropTableSqlTemplate: "DROP TABLE `%s`",
|
||||
UpsertQueryTemplate: upsertQuery,
|
||||
}
|
||||
store.SqlGenerator = gen
|
||||
|
||||
store.RetryableErrorCallback = func(err error) bool {
|
||||
var mysqlError *mysql.MySQLError
|
||||
@@ -155,6 +156,8 @@ func (store *MysqlStore) initialize(dsn string, upsertQuery string, enableUpsert
|
||||
return fmt.Errorf("connect to %s error:%v", maskedDSN(cfg), err)
|
||||
}
|
||||
|
||||
ConfigureListOrdering(store.DB, gen)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -63,11 +63,12 @@ func (store *MysqlStore2) initialize(createTable, upsertQuery string, enableUpse
|
||||
} else if upsertQuery == "" {
|
||||
upsertQuery = mysql.DefaultUpsertQuery
|
||||
}
|
||||
store.SqlGenerator = &mysql.SqlGenMysql{
|
||||
gen := &mysql.SqlGenMysql{
|
||||
CreateTableSqlTemplate: createTable,
|
||||
DropTableSqlTemplate: "DROP TABLE `%s`",
|
||||
UpsertQueryTemplate: upsertQuery,
|
||||
}
|
||||
store.SqlGenerator = gen
|
||||
|
||||
sqlUrl := fmt.Sprintf(CONNECTION_URL_PATTERN, user, password, hostname, port, database)
|
||||
adaptedSqlUrl := fmt.Sprintf(CONNECTION_URL_PATTERN, user, "<ADAPTED>", hostname, port, database)
|
||||
@@ -96,5 +97,7 @@ func (store *MysqlStore2) initialize(createTable, upsertQuery string, enableUpse
|
||||
return fmt.Errorf("init table %s: %v", abstract_sql.DEFAULT_TABLE, err)
|
||||
}
|
||||
|
||||
mysql.ConfigureListOrdering(store.DB, gen)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
package postgres
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"strings"
|
||||
|
||||
"github.com/seaweedfs/seaweedfs/weed/filer/abstract_sql"
|
||||
"github.com/seaweedfs/seaweedfs/weed/glog"
|
||||
)
|
||||
|
||||
// ConfigureListOrdering switches the list queries to COLLATE "C" when the live
|
||||
// filemeta.name collation is locale-aware, so S3 ListObjectsV2 stays
|
||||
// lexicographic. The fallback costs a sort, so warn the operator to fix it.
|
||||
func ConfigureListOrdering(db *sql.DB, gen *SqlGenPostgres) {
|
||||
collation, isBinary, err := nameColumnCollation(db)
|
||||
if err != nil {
|
||||
glog.V(1).Infof("postgres: skip filemeta.name collation check: %v", err)
|
||||
return
|
||||
}
|
||||
if isBinary {
|
||||
return
|
||||
}
|
||||
gen.ForceBinaryCollation = true
|
||||
glog.Warningf(`postgres: filemeta.name collation %q is not byte-ordered, so S3 list order is not byte-lexicographic and clients that merge sorted listings may report spurious diffs. Falling back to a slower COLLATE "C" sort; declare the name column COLLATE "C" (or use a C/C.UTF-8 database collation) for correct, indexed ordering.`, collation)
|
||||
}
|
||||
|
||||
func nameColumnCollation(db *sql.DB) (collation string, isBinary bool, err error) {
|
||||
// information_schema reports NULL for a column on the database default, so
|
||||
// fall back to datcollate for the effective ordering.
|
||||
row := db.QueryRow(`SELECT
|
||||
(SELECT collation_name FROM information_schema.columns
|
||||
WHERE table_schema = current_schema() AND table_name = $1 AND column_name = 'name'),
|
||||
(SELECT datcollate FROM pg_database WHERE datname = current_database())`, abstract_sql.DEFAULT_TABLE)
|
||||
var columnCollation, dbCollate sql.NullString
|
||||
if err = row.Scan(&columnCollation, &dbCollate); err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
effective := columnCollation.String
|
||||
if !columnCollation.Valid || effective == "" {
|
||||
effective = dbCollate.String
|
||||
}
|
||||
return effective, isByteOrderedCollation(effective), nil
|
||||
}
|
||||
|
||||
// isByteOrderedCollation reports whether a Postgres locale sorts by byte value
|
||||
// (C, POSIX, C.UTF-8) rather than locale order (en_US.UTF-8, ICU, ...).
|
||||
func isByteOrderedCollation(name string) bool {
|
||||
switch strings.ToLower(strings.TrimSpace(name)) {
|
||||
case "c", "posix", "c.utf-8", "c.utf8":
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -11,6 +11,8 @@ type SqlGenPostgres struct {
|
||||
CreateTableSqlTemplate string
|
||||
DropTableSqlTemplate string
|
||||
UpsertQueryTemplate string
|
||||
// Force byte ordering on a locale-aware name column; see ConfigureListOrdering.
|
||||
ForceBinaryCollation bool
|
||||
}
|
||||
|
||||
// DefaultUpsertQuery keeps INSERTs idempotent so a duplicate-key failure
|
||||
@@ -46,12 +48,22 @@ func (gen *SqlGenPostgres) GetSqlDeleteFolderChildren(tableName string) string {
|
||||
return fmt.Sprintf(`DELETE FROM "%s" WHERE dirhash=$1 AND directory=$2`, tableName)
|
||||
}
|
||||
|
||||
// nameExpr forces byte ordering on a locale-aware column via COLLATE "C".
|
||||
func (gen *SqlGenPostgres) nameExpr() string {
|
||||
if gen.ForceBinaryCollation {
|
||||
return `name COLLATE "C"`
|
||||
}
|
||||
return "name"
|
||||
}
|
||||
|
||||
func (gen *SqlGenPostgres) GetSqlListExclusive(tableName string) string {
|
||||
return fmt.Sprintf(`SELECT NAME, meta FROM "%s" WHERE dirhash=$1 AND name>$2 AND directory=$3 AND name like $4 ORDER BY NAME ASC LIMIT $5`, tableName)
|
||||
name := gen.nameExpr()
|
||||
return fmt.Sprintf(`SELECT NAME, meta FROM "%s" WHERE dirhash=$1 AND %s>$2 AND directory=$3 AND %s like $4 ORDER BY %s ASC LIMIT $5`, tableName, name, name, name)
|
||||
}
|
||||
|
||||
func (gen *SqlGenPostgres) GetSqlListInclusive(tableName string) string {
|
||||
return fmt.Sprintf(`SELECT NAME, meta FROM "%s" WHERE dirhash=$1 AND name>=$2 AND directory=$3 AND name like $4 ORDER BY NAME ASC LIMIT $5`, tableName)
|
||||
name := gen.nameExpr()
|
||||
return fmt.Sprintf(`SELECT NAME, meta FROM "%s" WHERE dirhash=$1 AND %s>=$2 AND directory=$3 AND %s like $4 ORDER BY %s ASC LIMIT $5`, tableName, name, name, name)
|
||||
}
|
||||
|
||||
func (gen *SqlGenPostgres) GetSqlCreateTable(tableName string) string {
|
||||
|
||||
@@ -23,3 +23,45 @@ func TestEmptyUpsertTemplateFallsBackToPlainInsert(t *testing.T) {
|
||||
t.Fatalf("plain INSERT path should not contain ON CONFLICT, got: %s", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListSqlDefaultOrderingFollowsColumn(t *testing.T) {
|
||||
gen := &SqlGenPostgres{}
|
||||
for _, got := range []string{gen.GetSqlListExclusive("filemeta"), gen.GetSqlListInclusive("filemeta")} {
|
||||
if strings.Contains(got, "COLLATE") {
|
||||
t.Fatalf("default list query should not force a collation, got: %s", got)
|
||||
}
|
||||
if !strings.Contains(got, "ORDER BY name ASC") {
|
||||
t.Fatalf("expected plain name ordering, got: %s", got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestListSqlBinaryOrderingOnNonBinaryColumn(t *testing.T) {
|
||||
gen := &SqlGenPostgres{ForceBinaryCollation: true}
|
||||
for _, got := range []string{gen.GetSqlListExclusive("filemeta"), gen.GetSqlListInclusive("filemeta")} {
|
||||
if !strings.Contains(got, `ORDER BY name COLLATE "C" ASC`) {
|
||||
t.Fatalf(`expected COLLATE "C" ordering, got: %s`, got)
|
||||
}
|
||||
if !strings.Contains(got, `name COLLATE "C" like $4`) {
|
||||
t.Fatalf(`expected COLLATE "C" prefix filter, got: %s`, got)
|
||||
}
|
||||
if strings.Contains(got, "AND name>$2") || strings.Contains(got, "AND name>=$2") {
|
||||
t.Fatalf("pagination comparison must also be byte-ordered, got: %s", got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsByteOrderedCollation(t *testing.T) {
|
||||
byteOrdered := []string{"C", "POSIX", "c", "C.UTF-8", "C.utf8"}
|
||||
for _, c := range byteOrdered {
|
||||
if !isByteOrderedCollation(c) {
|
||||
t.Fatalf("expected %q to be treated as byte-ordered", c)
|
||||
}
|
||||
}
|
||||
locale := []string{"en_US.UTF-8", "en_US.utf8", "en-US-x-icu", "und-x-icu", ""}
|
||||
for _, c := range locale {
|
||||
if isByteOrderedCollation(c) {
|
||||
t.Fatalf("expected %q to be treated as locale-aware", c)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -62,11 +62,12 @@ func (store *PostgresStore) initialize(upsertQuery string, enableUpsert bool, us
|
||||
} else if upsertQuery == "" {
|
||||
upsertQuery = DefaultUpsertQuery
|
||||
}
|
||||
store.SqlGenerator = &SqlGenPostgres{
|
||||
gen := &SqlGenPostgres{
|
||||
CreateTableSqlTemplate: "",
|
||||
DropTableSqlTemplate: `drop table "%s"`,
|
||||
UpsertQueryTemplate: upsertQuery,
|
||||
}
|
||||
store.SqlGenerator = gen
|
||||
|
||||
// pgx-optimized connection string with better timeouts and connection handling
|
||||
sqlUrl := "connect_timeout=30"
|
||||
@@ -116,5 +117,7 @@ func (store *PostgresStore) initialize(upsertQuery string, enableUpsert bool, us
|
||||
}
|
||||
store.DB = db
|
||||
|
||||
ConfigureListOrdering(store.DB, gen)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -68,11 +68,12 @@ func (store *PostgresStore2) initialize(createTable, upsertQuery string, enableU
|
||||
} else if upsertQuery == "" {
|
||||
upsertQuery = postgres.DefaultUpsertQuery
|
||||
}
|
||||
store.SqlGenerator = &postgres.SqlGenPostgres{
|
||||
gen := &postgres.SqlGenPostgres{
|
||||
CreateTableSqlTemplate: createTable,
|
||||
DropTableSqlTemplate: `drop table "%s"`,
|
||||
UpsertQueryTemplate: upsertQuery,
|
||||
}
|
||||
store.SqlGenerator = gen
|
||||
|
||||
// pgx-optimized connection string with better timeouts and connection handling
|
||||
sqlUrl := "connect_timeout=30"
|
||||
@@ -126,5 +127,7 @@ func (store *PostgresStore2) initialize(createTable, upsertQuery string, enableU
|
||||
return fmt.Errorf("init table %s: %v", abstract_sql.DEFAULT_TABLE, err)
|
||||
}
|
||||
|
||||
postgres.ConfigureListOrdering(store.DB, gen)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user