[opt](hudi) using native reader to read the base file with no log file (#20988)
Two optimizations: 1. Insert string bytes directly to remove decoding&encoding process. 2. Use native reader to read the hudi base file if it has no log file. Use `explain` to show how many splits are read natively.
This commit is contained in:
@ -50,6 +50,11 @@ public class MockJniScanner extends JniScanner {
|
||||
this.j = j;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canGetStringAsBytes() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isNull() {
|
||||
return false;
|
||||
|
||||
@ -27,6 +27,9 @@ import java.util.List;
|
||||
* Column value in vector column
|
||||
*/
|
||||
public interface ColumnValue {
|
||||
// Get bytes directly when reading string value to avoid decoding&encoding
|
||||
boolean canGetStringAsBytes();
|
||||
|
||||
boolean isNull();
|
||||
|
||||
boolean getBoolean();
|
||||
|
||||
@ -118,6 +118,11 @@ public class ScanPredicate {
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canGetStringAsBytes() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return inspectObject().toString();
|
||||
|
||||
@ -597,7 +597,11 @@ public class VectorColumn {
|
||||
case CHAR:
|
||||
case VARCHAR:
|
||||
case STRING:
|
||||
appendStringAndOffset(o.getString());
|
||||
if (o.canGetStringAsBytes()) {
|
||||
appendBytesAndOffset(o.getBytes());
|
||||
} else {
|
||||
appendStringAndOffset(o.getString());
|
||||
}
|
||||
break;
|
||||
case BINARY:
|
||||
appendBytesAndOffset(o.getBytes());
|
||||
|
||||
Reference in New Issue
Block a user