[feature](multi-catalog) support map&struct type in parquet&orc reader (#17087)
Support parsing map&struct type in parquet&orc reader. ## Remaining Problems 1. Doris use array type to build the key and value column of a `map`, but doesn't fill the offsets in value column, so the offsets in value column is wasted. 2. Parquet support reading only key or value column in `map`, this PR hasn't supported yet. 3. Parquet support reading partial columns in `struct`, this PR hasn't supported yet.
This commit is contained in:
@ -669,6 +669,11 @@ public class HiveMetaStoreClientHelper {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The nested column has inner columns, and each column is separated a comma. The inner column maybe a nested
|
||||
* column too, so we cannot simply split by the comma. We need to match the angle brackets,
|
||||
* and deal with the inner column recursively.
|
||||
*/
|
||||
private static int findNextNestedField(String commaSplitFields) {
|
||||
int numLess = 0;
|
||||
for (int i = 0; i < commaSplitFields.length(); i++) {
|
||||
@ -714,12 +719,14 @@ public class HiveMetaStoreClientHelper {
|
||||
default:
|
||||
break;
|
||||
}
|
||||
// resolve schema like array<int>
|
||||
if (lowerCaseType.startsWith("array")) {
|
||||
if (lowerCaseType.indexOf("<") == 5 && lowerCaseType.lastIndexOf(">") == lowerCaseType.length() - 1) {
|
||||
Type innerType = hiveTypeToDorisType(lowerCaseType.substring(6, lowerCaseType.length() - 1));
|
||||
return ArrayType.create(innerType, true);
|
||||
}
|
||||
}
|
||||
// resolve schema like map<text, int>
|
||||
if (lowerCaseType.startsWith("map")) {
|
||||
if (lowerCaseType.indexOf("<") == 3 && lowerCaseType.lastIndexOf(">") == lowerCaseType.length() - 1) {
|
||||
String keyValue = lowerCaseType.substring(4, lowerCaseType.length() - 1);
|
||||
@ -730,6 +737,7 @@ public class HiveMetaStoreClientHelper {
|
||||
}
|
||||
}
|
||||
}
|
||||
// resolve schema like struct<col1: text, col2: int>
|
||||
if (lowerCaseType.startsWith("struct")) {
|
||||
if (lowerCaseType.indexOf("<") == 6 && lowerCaseType.lastIndexOf(">") == lowerCaseType.length() - 1) {
|
||||
String listFields = lowerCaseType.substring(7, lowerCaseType.length() - 1);
|
||||
|
||||
Reference in New Issue
Block a user