doris/gensrc/thrift/PaloExternalDataSourceService.thrift

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

namespace java org.apache.doris.thrift
namespace cpp doris

include "Exprs.thrift"
include "Opcodes.thrift"
include "Status.thrift"
include "Types.thrift"

// A result set column descriptor.
// this definition id different from column desc in palo, the column desc in palo only support scalar type, does not support map, array
// so that should convert palo column desc into ExtColumnDesc
struct TExtColumnDesc {
  // The column name as given in the Create .. statement. Always set.
  1: optional string name
  // The column type. Always set.
  2: optional Types.TTypeDesc type
}

// Metadata used to describe the schema (column names, types, comments)
// of result sets.
struct TExtTableSchema {
  // List of columns. Always set.
  1: optional list<TExtColumnDesc> cols
}

struct TExtLiteral {
  1: required Exprs.TExprNodeType node_type
  2: optional Exprs.TBoolLiteral bool_literal
  3: optional Exprs.TDateLiteral date_literal
  4: optional Exprs.TFloatLiteral float_literal
  5: optional Exprs.TIntLiteral int_literal
  6: optional Exprs.TStringLiteral string_literal
  7: optional Exprs.TDecimalLiteral decimal_literal
  8: optional Exprs.TLargeIntLiteral large_int_literal
}

// Binary predicates that can be pushed to the external data source and
// are of the form <col> <op> <val>. Sources can choose to accept or reject
// predicates via the return value of prepare(), see TPrepareResult.
// The column and the value are guaranteed to be type compatible in Impala,
// but they are not necessarily the same type, so the data source
// implementation may need to do an implicit cast.
// > < = != >= <=
struct TExtBinaryPredicate {
  // Column on which the predicate is applied. Always set.
  1: optional TExtColumnDesc col
  // Comparison operator. Always set.
  2: optional Opcodes.TExprOpcode op
  // Value on the right side of the binary predicate. Always set.
  3: optional TExtLiteral value
}

struct TExtInPredicate {
  1: optional bool is_not_in
  // Column on which the predicate is applied. Always set.
  2: optional TExtColumnDesc col
  // Value on the right side of the binary predicate. Always set.
  3: optional list<TExtLiteral> values
}

struct TExtLikePredicate {
  1: optional TExtColumnDesc col
  2: optional TExtLiteral value
}

struct TExtIsNullPredicate {
  1: optional bool is_not_null
  2: optional TExtColumnDesc col
}

struct TExtFunction {
  1: optional string func_name
  // input parameter column descs
  2: optional list<TExtColumnDesc> cols
  // input parameter column literals
  3: optional list<TExtLiteral> values
}

// a union of all predicates
struct TExtPredicate {
  1: required Exprs.TExprNodeType node_type
  2: optional TExtBinaryPredicate binary_predicate
  3: optional TExtInPredicate in_predicate
  4: optional TExtLikePredicate like_predicate
  5: optional TExtIsNullPredicate is_null_predicate
  6: optional TExtFunction ext_function
}

// A union over all possible return types for a column of data
// Currently only used by ExternalDataSource types
//
struct TExtColumnData {
  // One element in the list for every row in the column indicating if there is
  // a value in the vals list or a null.
  1: required list<bool> is_null;

  // Only one is set, only non-null values are set. this indicates one column data for a row batch
  2: optional list<bool> bool_vals;
  3: optional list<byte> byte_vals;
  4: optional list<i16> short_vals;
  5: optional list<i32> int_vals;
  6: optional list<i64> long_vals;
  7: optional list<double> double_vals;
  8: optional list<string> string_vals;
  9: optional list<binary> binary_vals;
}

// Serialized batch of rows returned by getNext().
// one row batch contains mult rows, and the result is arranged in column style
struct TExtRowBatch {
  // Each TColumnData contains the data for an entire column. Always set.
  1: optional list<TExtColumnData> cols

  // The number of rows returned. For count(*) queries, there may not be
  // any materialized columns so cols will be an empty list and this value
  // will indicate how many rows are returned. When there are materialized
  // columns, this number should be the same as the size of each
  // TColumnData.is_null list.
  2: optional i64 num_rows
}

// Parameters to prepare().
struct TExtPrepareParams {
  // The name of the table. Always set.
  1: optional string table_name

  // A string specified for the table that is passed to the external data source.
  // Always set, may be an empty string.
  2: optional string init_string

  // A list of conjunctive (AND) clauses, each of which contains a list of
  // disjunctive (OR) binary predicates. Always set, may be an empty list.
  3: optional list<list<TExtPredicate>> predicates
}

// Returned by prepare().
struct TExtPrepareResult {
  1: required Status.TStatus status

  // Estimate of the total number of rows returned when applying the predicates indicated
  // by accepted_conjuncts. Not set if the data source does not support providing
  // this statistic.
  2: optional i64 num_rows_estimate

  // Accepted conjuncts. References the 'predicates' parameter in the prepare()
  // call. It contains the 0-based indices of the top-level list elements (the
  // AND elements) that the library will be able to apply during the scan. Those
  // elements that aren’t referenced in accepted_conjuncts will be evaluated by
  // Impala itself.
  3: optional list<i32> accepted_conjuncts
}

// Parameters to open().
struct TExtOpenParams {
  // A unique identifier for the query. Always set.
  1: optional Types.TUniqueId query_id

  // The name of the table. Always set.
  2: optional string table_name

  // A string specified for the table that is passed to the external data source.
  // Always set, may be an empty string.
  3: optional map<string,string> properties

  // The authenticated user name. Always set.
  4: optional string authenticated_user_name

  // The schema of the rows that the scan needs to return. Always set.
  5: optional TExtTableSchema row_schema

  // The expected size of the row batches it returns in the subsequent getNext() calls.
  // Always set.
  6: optional i32 batch_size

  7: optional list<list<TExtPredicate>> predicates

  // The query limit, if specified.
  8: optional i64 limit
}

// Returned by open().
struct TExtOpenResult {
  1: required Status.TStatus status

  // An opaque handle used in subsequent getNext()/close() calls. Required.
  2: optional string scan_handle
  3: optional list<i32> accepted_conjuncts
}

// Parameters to getNext()
struct TExtGetNextParams {
  // The opaque handle returned by the previous open() call. Always set.
  1: optional string scan_handle    // es search context id
  2: optional i64 offset            // es should check the offset to prevent duplicate rpc calls
}

// Returned by getNext().
struct TExtGetNextResult {
  1: required Status.TStatus status

  // If true, reached the end of the result stream; subsequent calls to
  // getNext() won’t return any more results. Required.
  2: optional bool eos

  // A batch of rows to return, if any exist. The number of rows in the batch
  // should be less than or equal to the batch_size specified in TOpenParams.
  3: optional TExtRowBatch rows
}

// Parameters to close()
struct TExtCloseParams {
  // The opaque handle returned by the previous open() call. Always set.
  1: optional string scan_handle
}

// Returned by close().
struct TExtCloseResult {
  1: required Status.TStatus status
}

// 这个data source可以认为是palo统一的外部data source的入口
service TExtDataSourceService {
    // 1. palo be call this api to send index, type, shard id to es
    // 2. es will open a search context and prepare data, will return a context id
    TExtOpenResult open(1: TExtOpenParams params);
    // 1. palo be will send a search context id to es
    // 2. es will find the search context and find a batch rows and send to palo
    // 3. palo will run the remaining predicates when receving data
    // 4. es should check the offset when receive the request
    TExtGetNextResult getNext(1: TExtGetNextParams params);
    // 1. es will release the context when receiving the data
    TExtCloseResult close(1: TExtCloseParams params);
}