lightning: return 0 early on empty parquet files (#52519)
close pingcap/tidb#52518
This commit is contained in:
@ -826,7 +826,7 @@ func SampleFileCompressRatio(ctx context.Context, fileMeta SourceFileMeta, store
|
||||
// SampleParquetDataSize samples the data size of the parquet file.
|
||||
func SampleParquetDataSize(ctx context.Context, fileMeta SourceFileMeta, store storage.ExternalStorage) (int64, error) {
|
||||
totalRowCount, err := ReadParquetFileRowCountByFile(ctx, store, fileMeta)
|
||||
if err != nil {
|
||||
if totalRowCount == 0 || err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
|
||||
@ -1108,7 +1108,7 @@ func TestSampleFileCompressRatio(t *testing.T) {
|
||||
require.InDelta(t, ratio, 5000.0/float64(bf.Len()), 1e-5)
|
||||
}
|
||||
|
||||
func TestSampleParquetDataSize(t *testing.T) {
|
||||
func testSampleParquetDataSize(t *testing.T, count int) {
|
||||
s := newTestMydumpLoaderSuite(t)
|
||||
store, err := storage.NewLocalStorage(s.sourceDir)
|
||||
require.NoError(t, err)
|
||||
@ -1133,7 +1133,7 @@ func TestSampleParquetDataSize(t *testing.T) {
|
||||
t.Logf("seed: %d. To reproduce the random behaviour, manually set `rand.New(rand.NewSource(seed))`", seed)
|
||||
rnd := rand.New(rand.NewSource(seed))
|
||||
totalRowSize := 0
|
||||
for i := 0; i < 1000; i++ {
|
||||
for i := 0; i < count; i++ {
|
||||
kl := rnd.Intn(20) + 1
|
||||
key := make([]byte, kl)
|
||||
kl, err = rnd.Read(key)
|
||||
@ -1167,6 +1167,11 @@ func TestSampleParquetDataSize(t *testing.T) {
|
||||
require.InDelta(t, totalRowSize, size, float64(totalRowSize)/10)
|
||||
}
|
||||
|
||||
func TestSampleParquetDataSize(t *testing.T) {
|
||||
t.Run("count=1000", func(t *testing.T) { testSampleParquetDataSize(t, 1000) })
|
||||
t.Run("count=0", func(t *testing.T) { testSampleParquetDataSize(t, 0) })
|
||||
}
|
||||
|
||||
func TestSetupOptions(t *testing.T) {
|
||||
// those functions are only used in other components, add this to avoid they
|
||||
// be deleted mistakenly.
|
||||
|
||||
Reference in New Issue
Block a user