From 0362dc81fe82d22fa911cd8a1d599fbb4b470d48 Mon Sep 17 00:00:00 2001 From: kennytm Date: Fri, 12 Apr 2024 10:10:53 +0800 Subject: [PATCH] lightning: return 0 early on empty parquet files (#52519) close pingcap/tidb#52518 --- pkg/lightning/mydump/loader.go | 2 +- pkg/lightning/mydump/loader_test.go | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/pkg/lightning/mydump/loader.go b/pkg/lightning/mydump/loader.go index 5003d79637..30c1e316d9 100644 --- a/pkg/lightning/mydump/loader.go +++ b/pkg/lightning/mydump/loader.go @@ -826,7 +826,7 @@ func SampleFileCompressRatio(ctx context.Context, fileMeta SourceFileMeta, store // SampleParquetDataSize samples the data size of the parquet file. func SampleParquetDataSize(ctx context.Context, fileMeta SourceFileMeta, store storage.ExternalStorage) (int64, error) { totalRowCount, err := ReadParquetFileRowCountByFile(ctx, store, fileMeta) - if err != nil { + if totalRowCount == 0 || err != nil { return 0, err } diff --git a/pkg/lightning/mydump/loader_test.go b/pkg/lightning/mydump/loader_test.go index a4c66161c7..0734371b85 100644 --- a/pkg/lightning/mydump/loader_test.go +++ b/pkg/lightning/mydump/loader_test.go @@ -1108,7 +1108,7 @@ func TestSampleFileCompressRatio(t *testing.T) { require.InDelta(t, ratio, 5000.0/float64(bf.Len()), 1e-5) } -func TestSampleParquetDataSize(t *testing.T) { +func testSampleParquetDataSize(t *testing.T, count int) { s := newTestMydumpLoaderSuite(t) store, err := storage.NewLocalStorage(s.sourceDir) require.NoError(t, err) @@ -1133,7 +1133,7 @@ func TestSampleParquetDataSize(t *testing.T) { t.Logf("seed: %d. To reproduce the random behaviour, manually set `rand.New(rand.NewSource(seed))`", seed) rnd := rand.New(rand.NewSource(seed)) totalRowSize := 0 - for i := 0; i < 1000; i++ { + for i := 0; i < count; i++ { kl := rnd.Intn(20) + 1 key := make([]byte, kl) kl, err = rnd.Read(key) @@ -1167,6 +1167,11 @@ func TestSampleParquetDataSize(t *testing.T) { require.InDelta(t, totalRowSize, size, float64(totalRowSize)/10) } +func TestSampleParquetDataSize(t *testing.T) { + t.Run("count=1000", func(t *testing.T) { testSampleParquetDataSize(t, 1000) }) + t.Run("count=0", func(t *testing.T) { testSampleParquetDataSize(t, 0) }) +} + func TestSetupOptions(t *testing.T) { // those functions are only used in other components, add this to avoid they // be deleted mistakenly.