lightning: return 0 early on empty parquet files (#52519)

close pingcap/tidb#52518
This commit is contained in:
kennytm
2024-04-12 10:10:53 +08:00
committed by GitHub
parent 572e5c48d9
commit 0362dc81fe
2 changed files with 8 additions and 3 deletions

View File

@ -826,7 +826,7 @@ func SampleFileCompressRatio(ctx context.Context, fileMeta SourceFileMeta, store
// SampleParquetDataSize samples the data size of the parquet file.
func SampleParquetDataSize(ctx context.Context, fileMeta SourceFileMeta, store storage.ExternalStorage) (int64, error) {
totalRowCount, err := ReadParquetFileRowCountByFile(ctx, store, fileMeta)
if err != nil {
if totalRowCount == 0 || err != nil {
return 0, err
}

View File

@ -1108,7 +1108,7 @@ func TestSampleFileCompressRatio(t *testing.T) {
require.InDelta(t, ratio, 5000.0/float64(bf.Len()), 1e-5)
}
func TestSampleParquetDataSize(t *testing.T) {
func testSampleParquetDataSize(t *testing.T, count int) {
s := newTestMydumpLoaderSuite(t)
store, err := storage.NewLocalStorage(s.sourceDir)
require.NoError(t, err)
@ -1133,7 +1133,7 @@ func TestSampleParquetDataSize(t *testing.T) {
t.Logf("seed: %d. To reproduce the random behaviour, manually set `rand.New(rand.NewSource(seed))`", seed)
rnd := rand.New(rand.NewSource(seed))
totalRowSize := 0
for i := 0; i < 1000; i++ {
for i := 0; i < count; i++ {
kl := rnd.Intn(20) + 1
key := make([]byte, kl)
kl, err = rnd.Read(key)
@ -1167,6 +1167,11 @@ func TestSampleParquetDataSize(t *testing.T) {
require.InDelta(t, totalRowSize, size, float64(totalRowSize)/10)
}
func TestSampleParquetDataSize(t *testing.T) {
t.Run("count=1000", func(t *testing.T) { testSampleParquetDataSize(t, 1000) })
t.Run("count=0", func(t *testing.T) { testSampleParquetDataSize(t, 0) })
}
func TestSetupOptions(t *testing.T) {
// those functions are only used in other components, add this to avoid they
// be deleted mistakenly.