[Feature](avro) Support Apache Avro file format (#19990)
support read avro file by hdfs() or s3() .
```sql
select * from s3(
"uri" = "http://127.0.0.1:9312/test2/person.avro",
"ACCESS_KEY" = "ak",
"SECRET_KEY" = "sk",
"FORMAT" = "avro");
+--------+--------------+-------------+-----------------+
| name | boolean_type | double_type | long_type |
+--------+--------------+-------------+-----------------+
| Alyssa | 1 | 10.0012 | 100000000221133 |
| Ben | 0 | 5555.999 | 4009990000 |
| lisi | 0 | 5992225.999 | 9099933330 |
+--------+--------------+-------------+-----------------+
select * from hdfs(
"uri" = "hdfs://127.0.0.1:9000/input/person2.avro",
"fs.defaultFS" = "hdfs://127.0.0.1:9000",
"hadoop.username" = "doris",
"format" = "avro");
+--------+--------------+-------------+-----------+
| name | boolean_type | double_type | long_type |
+--------+--------------+-------------+-----------+
| Alyssa | 1 | 8888.99999 | 89898989 |
+--------+--------------+-------------+-----------+
```
current avro reader only support common data type, the complex data types will be supported later.
This commit is contained in:
@ -55,6 +55,10 @@ under the License.
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpclient</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-databind</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
</project>
|
||||
|
||||
@ -21,6 +21,7 @@ package org.apache.doris.common.jni;
|
||||
import org.apache.doris.common.jni.vec.ColumnType;
|
||||
import org.apache.doris.common.jni.vec.ColumnValue;
|
||||
import org.apache.doris.common.jni.vec.ScanPredicate;
|
||||
import org.apache.doris.common.jni.vec.TableSchema;
|
||||
import org.apache.doris.common.jni.vec.VectorTable;
|
||||
|
||||
import java.io.IOException;
|
||||
@ -43,6 +44,9 @@ public abstract class JniScanner {
|
||||
// Scan data and save as vector table
|
||||
protected abstract int getNext() throws IOException;
|
||||
|
||||
// parse table schema
|
||||
protected abstract TableSchema parseTableSchema() throws UnsupportedOperationException;
|
||||
|
||||
protected void initTableInfo(ColumnType[] requiredTypes, String[] requiredFields, ScanPredicate[] predicates,
|
||||
int batchSize) {
|
||||
this.types = requiredTypes;
|
||||
@ -63,6 +67,11 @@ public abstract class JniScanner {
|
||||
return vectorTable;
|
||||
}
|
||||
|
||||
public String getTableSchema() throws IOException {
|
||||
TableSchema tableSchema = parseTableSchema();
|
||||
return tableSchema.getTableSchema();
|
||||
}
|
||||
|
||||
public long getNextBatchMeta() throws IOException {
|
||||
if (vectorTable == null) {
|
||||
vectorTable = new VectorTable(types, fields, predicates, batchSize);
|
||||
@ -95,7 +104,7 @@ public abstract class JniScanner {
|
||||
return vectorTable.getMetaAddress();
|
||||
}
|
||||
|
||||
protected void resetTable() {
|
||||
public void resetTable() {
|
||||
if (vectorTable != null) {
|
||||
vectorTable.reset();
|
||||
}
|
||||
@ -105,7 +114,7 @@ public abstract class JniScanner {
|
||||
vectorTable.releaseColumn(fieldId);
|
||||
}
|
||||
|
||||
protected void releaseTable() {
|
||||
public void releaseTable() {
|
||||
if (vectorTable != null) {
|
||||
vectorTable.close();
|
||||
}
|
||||
|
||||
@ -21,6 +21,7 @@ package org.apache.doris.common.jni;
|
||||
import org.apache.doris.common.jni.vec.ColumnType;
|
||||
import org.apache.doris.common.jni.vec.ColumnValue;
|
||||
import org.apache.doris.common.jni.vec.ScanPredicate;
|
||||
import org.apache.doris.common.jni.vec.TableSchema;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
@ -143,7 +144,7 @@ public class MockJniScanner extends JniScanner {
|
||||
|
||||
private static final Logger LOG = Logger.getLogger(MockJniScanner.class);
|
||||
|
||||
private final int mockRows;
|
||||
private int mockRows;
|
||||
private int readRows = 0;
|
||||
private final MockColumnValue columnValue = new MockColumnValue();
|
||||
|
||||
@ -195,4 +196,9 @@ public class MockJniScanner extends JniScanner {
|
||||
readRows += rows;
|
||||
return rows;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TableSchema parseTableSchema() throws UnsupportedOperationException {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@ -0,0 +1,83 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.doris.common.jni.vec;
|
||||
|
||||
import org.apache.doris.thrift.TPrimitiveType;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Used to parse the file structure of table-value-function type.
|
||||
* like avro file.
|
||||
*/
|
||||
public class TableSchema {
|
||||
private final List<SchemaColumn> schemaColumns;
|
||||
private final ObjectMapper objectMapper;
|
||||
|
||||
public TableSchema(List<SchemaColumn> schemaColumns) {
|
||||
this.schemaColumns = schemaColumns;
|
||||
this.objectMapper = new ObjectMapper();
|
||||
}
|
||||
|
||||
public String getTableSchema() throws IOException {
|
||||
try {
|
||||
return objectMapper.writeValueAsString(schemaColumns);
|
||||
} catch (JsonProcessingException e) {
|
||||
throw new IOException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public static class SchemaColumn {
|
||||
private String name;
|
||||
private int type;
|
||||
private SchemaColumn childColumn;
|
||||
|
||||
public SchemaColumn() {
|
||||
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public SchemaColumn getChildColumn() {
|
||||
return childColumn;
|
||||
}
|
||||
|
||||
public int getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public void setType(TPrimitiveType type) {
|
||||
this.type = type.getValue();
|
||||
}
|
||||
|
||||
public void addChildColumn(SchemaColumn childColumn) {
|
||||
this.childColumn = childColumn;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user