[opt](hudi) using native reader to read the base file with no log file (#20988)

Two optimizations:
1. Insert string bytes directly to remove decoding&encoding process.
2. Use native reader to read the hudi base file if it has no log file. Use `explain` to show how many splits are read natively.
This commit is contained in:
Ashin Gau
2023-06-20 11:20:21 +08:00
committed by GitHub
parent 7e01f074e2
commit 923f7edad0
11 changed files with 88 additions and 12 deletions

View File

@ -50,6 +50,11 @@ public class MockJniScanner extends JniScanner {
this.j = j;
}
@Override
public boolean canGetStringAsBytes() {
return false;
}
@Override
public boolean isNull() {
return false;

View File

@ -27,6 +27,9 @@ import java.util.List;
* Column value in vector column
*/
public interface ColumnValue {
// Get bytes directly when reading string value to avoid decoding&encoding
boolean canGetStringAsBytes();
boolean isNull();
boolean getBoolean();

View File

@ -118,6 +118,11 @@ public class ScanPredicate {
}
}
@Override
public boolean canGetStringAsBytes() {
return false;
}
@Override
public String toString() {
return inspectObject().toString();

View File

@ -597,7 +597,11 @@ public class VectorColumn {
case CHAR:
case VARCHAR:
case STRING:
appendStringAndOffset(o.getString());
if (o.canGetStringAsBytes()) {
appendBytesAndOffset(o.getBytes());
} else {
appendStringAndOffset(o.getString());
}
break;
case BINARY:
appendBytesAndOffset(o.getBytes());