From caadd6ca792898f12f130d9b5fc418efa24c5392 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Wed, 10 Jun 2026 13:26:30 -0700 Subject: [PATCH] ci(s3tables): stop Lakekeeper flaking on Docker Hub pull timeouts (#9920) * ci(s3tables): drop docker pre-pull from Lakekeeper job The lakekeeper repro is pure Go against the local weed binary; the job kept failing on Docker Hub timeouts pulling python:3 and localstack images the test never runs. Also drop the stale python-in-docker comments left from the old harness. * ci(s3tables): serve python:3 from GHA cache in the STS job Retried pulls still die when both mirror.gcr.io and registry-1.docker.io are unreachable from the runner. Cache the saved image tarball under a weekly key: an exact hit skips the registry entirely, a miss pulls fresh and refreshes the cache, and a stale tarball from a previous week is the fallback when Docker Hub is down. * ci(spark): pre-pull the spark tag the test actually runs The workflow warmed apache/spark:3.5.8 with retries while the testcontainers setup runs apache/spark:3.5.1, so the real image was pulled at test time with no retry at all. --- .github/workflows/s3-spark-tests.yml | 2 +- .github/workflows/s3-tables-tests.yml | 40 +++++++++++++-------- test/s3tables/lakekeeper/lakekeeper_test.go | 2 -- 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/.github/workflows/s3-spark-tests.yml b/.github/workflows/s3-spark-tests.yml index 8154a323b..6214a68f8 100644 --- a/.github/workflows/s3-spark-tests.yml +++ b/.github/workflows/s3-spark-tests.yml @@ -45,7 +45,7 @@ jobs: - name: Pre-pull Spark image run: | pull() { for i in 1 2 3; do docker pull "$1" && return 0; sleep 15; done; return 1; } - pull apache/spark:3.5.8 + pull apache/spark:3.5.1 - name: Run S3 Spark integration tests working-directory: test/s3/spark diff --git a/.github/workflows/s3-tables-tests.yml b/.github/workflows/s3-tables-tests.yml index bbc28e412..2b4404eee 100644 --- a/.github/workflows/s3-tables-tests.yml +++ b/.github/workflows/s3-tables-tests.yml @@ -574,10 +574,34 @@ jobs: echo '{"registry-mirrors": ["https://mirror.gcr.io"]}' | sudo tee /etc/docker/daemon.json sudo systemctl restart docker - - name: Pre-pull Python image + - name: Week stamp for image cache key + id: week + run: echo "week=$(date -u +%G-%V)" >> "$GITHUB_OUTPUT" + + - name: Restore python:3 image cache + id: python-image + uses: actions/cache@v5 + with: + path: /tmp/python3-image.tar + key: python3-image-${{ steps.week.outputs.week }} + restore-keys: | + python3-image- + + - name: Load or pull python:3 run: | + if [ "${{ steps.python-image.outputs.cache-hit }}" = "true" ]; then + docker load -i /tmp/python3-image.tar + exit 0 + fi pull() { for i in 1 2 3; do docker pull "$1" && return 0; sleep 15; done; return 1; } - pull python:3 + if pull python:3; then + docker save -o /tmp/python3-image.tar python:3 + elif [ -f /tmp/python3-image.tar ]; then + # Docker Hub unreachable; fall back to last week's cached image + docker load -i /tmp/python3-image.tar + else + exit 1 + fi - name: Run go mod tidy run: go mod tidy @@ -639,18 +663,6 @@ jobs: go-version-file: 'go.mod' id: go - - name: Configure Docker Hub mirror - run: | - echo '{"registry-mirrors": ["https://mirror.gcr.io"]}' | sudo tee /etc/docker/daemon.json - sudo systemctl restart docker - - - name: Pre-pull images - run: | - pull() { for i in 1 2 3; do docker pull "$1" && return 0; sleep 15; done; return 1; } - pull python:3 - # localstack is optional; best-effort pull - for i in 1 2 3; do docker pull localstack/localstack:latest && break; sleep 15; done - - name: Run go mod tidy run: go mod tidy diff --git a/test/s3tables/lakekeeper/lakekeeper_test.go b/test/s3tables/lakekeeper/lakekeeper_test.go index 41059c4ce..7076cf187 100644 --- a/test/s3tables/lakekeeper/lakekeeper_test.go +++ b/test/s3tables/lakekeeper/lakekeeper_test.go @@ -68,7 +68,6 @@ func TestLakekeeperIntegration(t *testing.T) { env.StartSeaweedFS(t) fmt.Printf(">>> SeaweedFS started.\n") - // Run python script in docker to test STS and S3 operations runLakekeeperRepro(t, env) } @@ -80,7 +79,6 @@ func TestLakekeeperTableBucketIntegration(t *testing.T) { env.StartSeaweedFS(t) fmt.Printf(">>> SeaweedFS started.\n") - // Run python script in docker to test STS and S3 Tables operations runLakekeeperTableBucketRepro(t, env) }