[fix](spark-load) no need to filter row group when doing spark load (#13116)

1. Fix issue #13115 
2. Modify the method of `get_next_block` or `GenericReader`, to return "read_rows" explicitly.
    Some columns in block may not be filled in reader, if the first column is not filled, use `block->rows()` can not return real row numbers.
3. Add more checks for broker load test cases.
This commit is contained in:
Mingyu Chen
2022-10-05 23:00:56 +08:00
committed by GitHub
parent 90512ebd59
commit d286aa7bf7
16 changed files with 48 additions and 20 deletions

View File

@ -98,7 +98,7 @@ int ArrowReaderWrap::get_column_index(std::string column_name) {
}
}
Status ArrowReaderWrap::get_next_block(vectorized::Block* block, bool* eof) {
Status ArrowReaderWrap::get_next_block(vectorized::Block* block, size_t* read_row, bool* eof) {
size_t rows = 0;
bool tmp_eof = false;
do {
@ -107,7 +107,7 @@ Status ArrowReaderWrap::get_next_block(vectorized::Block* block, bool* eof) {
// We need to make sure the eof is set to true iff block is empty.
if (tmp_eof) {
*eof = (rows == 0);
return Status::OK();
break;
}
}
@ -129,6 +129,7 @@ Status ArrowReaderWrap::get_next_block(vectorized::Block* block, bool* eof) {
rows += num_elements;
_arrow_batch_cur_idx += num_elements;
} while (!tmp_eof && rows < _state->batch_size());
*read_row = rows;
return Status::OK();
}