diff --git a/deps/oblib/src/common/object/ob_obj_compare.cpp b/deps/oblib/src/common/object/ob_obj_compare.cpp index a3937327cb..7052253d95 100644 --- a/deps/oblib/src/common/object/ob_obj_compare.cpp +++ b/deps/oblib/src/common/object/ob_obj_compare.cpp @@ -1406,6 +1406,32 @@ int ObObjCmpFuncs::cmp_func(const ObObj &obj1, \ return result; \ } + +#define DEFINE_CMP_FUNC_JSON_EXTEND() \ + template <> inline \ + int ObObjCmpFuncs::cmp_func(const ObObj &obj1, \ + const ObObj &/*obj2*/, \ + const ObCompareCtx &/*cmp_ctx*/) \ + { \ + int result = INT_TO_CR(-1); \ + \ + return result; \ + } + +#define DEFINE_CMP_OP_FUNC_JSON_EXTEND(op, op_str) \ + template <> inline \ + int ObObjCmpFuncs::cmp_op_func(const ObObj &obj1, \ + const ObObj &obj2, \ + const ObCompareCtx &cmp_ctx) \ + { \ + OBJ_TYPE_CLASS_CHECK(obj1, ObJsonTC); \ + OBJ_TYPE_CLASS_CHECK(obj2, ObExtendTC); \ + UNUSED(cmp_ctx); \ + int result = INT_TO_CR(-1); \ + \ + return result; \ + } + // geometrytc vs geometrytc #define DEFINE_CMP_OP_FUNC_GEOMETRY_GEOMETRY(op, op_str) \ template <> inline \ @@ -2715,6 +2741,15 @@ int ObObjCmpFuncs::cmp_func(const ObObj &obj1, \ DEFINE_CMP_OP_FUNC_EXT_XXX(CO_NE, CO_NE); \ DEFINE_CMP_FUNC_EXT_XXX() +#define DEFINE_CMP_FUNCS_JSON_EXTEND() \ + DEFINE_CMP_OP_FUNC_JSON_EXTEND(CO_EQ, ==); \ + DEFINE_CMP_OP_FUNC_JSON_EXTEND(CO_LE, <=); \ + DEFINE_CMP_OP_FUNC_JSON_EXTEND(CO_LT, < ); \ + DEFINE_CMP_OP_FUNC_JSON_EXTEND(CO_GE, >=); \ + DEFINE_CMP_OP_FUNC_JSON_EXTEND(CO_GT, > ); \ + DEFINE_CMP_OP_FUNC_JSON_EXTEND(CO_NE, !=); \ + DEFINE_CMP_FUNC_JSON_EXTEND() + #define DEFINE_CMP_FUNCS_UNKNOWN_UNKNOWN() \ DEFINE_CMP_FUNCS(ObUnknownTC, unknown); @@ -2813,6 +2848,7 @@ DEFINE_CMP_FUNCS_NULL_XXX(); DEFINE_CMP_FUNCS_XXX_NULL(); DEFINE_CMP_FUNCS_XXX_EXT(); DEFINE_CMP_FUNCS_EXT_XXX(); +DEFINE_CMP_FUNCS_JSON_EXTEND(); #define DEFINE_CMP_FUNCS_ENTRY(tc1, tc2) \ { \ @@ -4429,7 +4465,7 @@ const obj_cmp_func ObObjCmpFuncs::cmp_funcs[ObMaxTC][ObMaxTC][CO_MAX] = DEFINE_CMP_FUNCS_ENTRY_NULL, // time DEFINE_CMP_FUNCS_ENTRY_NULL, // year DEFINE_CMP_FUNCS_ENTRY_NULL, // string - DEFINE_CMP_FUNCS_ENTRY_NULL, //extend + DEFINE_CMP_FUNCS_ENTRY(ObJsonTC, ObExtendTC), //extend DEFINE_CMP_FUNCS_ENTRY_NULL, // unknown DEFINE_CMP_FUNCS_ENTRY_NULL, // text DEFINE_CMP_FUNCS_ENTRY_NULL, // bit diff --git a/deps/oblib/src/common/object/ob_object.cpp b/deps/oblib/src/common/object/ob_object.cpp index a5c9b9a1d1..9e8ddfe840 100644 --- a/deps/oblib/src/common/object/ob_object.cpp +++ b/deps/oblib/src/common/object/ob_object.cpp @@ -1016,6 +1016,95 @@ OB_DEF_SERIALIZE_SIZE(ObLobLocatorV2) return size_ + sizeof(size_) + sizeof(has_lob_header_); } +ObDocId::ObDocId() + : tablet_id_(ObTabletID::INVALID_TABLET_ID), + seq_id_(0) +{ + static_assert(sizeof(ObDocId) == OB_DOC_ID_COLUMN_BYTE_LENGTH, "size of ObDocId isn't equal to OB_DOC_ID_COLUMN_BYTE_LENGTH"); +} + +ObDocId::ObDocId(const uint64_t tablet_id, const uint64_t seq_id) + : tablet_id_(tablet_id), + seq_id_(seq_id) +{ + static_assert(sizeof(ObDocId) == OB_DOC_ID_COLUMN_BYTE_LENGTH, "size of ObDocId isn't equal to OB_DOC_ID_COLUMN_BYTE_LENGTH"); +} + +bool ObDocId::operator==(const ObDocId &other) const +{ + return tablet_id_ == other.tablet_id_ && seq_id_ == other.seq_id_; +} + +bool ObDocId::operator!=(const ObDocId &other) const +{ + return !(operator==(other)); +} + +bool ObDocId::operator <(const ObDocId &other) const +{ + bool bool_ret = false; + + if (tablet_id_ < other.tablet_id_) { + bool_ret= true; + } else if (tablet_id_ > other.tablet_id_) { + bool_ret = false; + } else if (seq_id_ < other.seq_id_) { + bool_ret= true; + } else if (seq_id_ > other.seq_id_) { + bool_ret = false; + } + + return bool_ret; +} + +bool ObDocId::operator >(const ObDocId &other) const +{ + bool bool_ret = false; + + if (tablet_id_ < other.tablet_id_) { + bool_ret = false; + } else if (tablet_id_ > other.tablet_id_) { + bool_ret= true; + } else if (seq_id_ < other.seq_id_) { + bool_ret = false; + } else if (seq_id_ > other.seq_id_) { + bool_ret= true; + } + + return bool_ret; + +} + +void ObDocId::reset() +{ + tablet_id_ = ObTabletID::INVALID_TABLET_ID; + seq_id_ = 0; +} + +bool ObDocId::is_valid() const +{ + return ObTabletID(tablet_id_).is_valid() && seq_id_ > 0; +} + +ObString ObDocId::get_string() const +{ + return ObString(OB_DOC_ID_COLUMN_BYTE_LENGTH, reinterpret_cast(this)); +} + +int ObDocId::from_string(const ObString &doc_id) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(doc_id.ptr()) || OB_UNLIKELY(doc_id.length() < OB_DOC_ID_COLUMN_BYTE_LENGTH)) { + ret = OB_INVALID_ARGUMENT; + COMMON_LOG(WARN, "invalid document id", K(ret)); + } else { + const ObDocId *doc_id_ptr = reinterpret_cast(doc_id.ptr()); + tablet_id_ = doc_id_ptr->tablet_id_; + seq_id_ = doc_id_ptr->seq_id_; + } + return ret; +} + #define PRINT_META() //#define PRINT_META() BUF_PRINTO(obj.get_meta()); J_COLON(); diff --git a/deps/oblib/src/common/object/ob_object.h b/deps/oblib/src/common/object/ob_object.h index bf5f2991ce..f88a230ba3 100644 --- a/deps/oblib/src/common/object/ob_object.h +++ b/deps/oblib/src/common/object/ob_object.h @@ -1150,6 +1150,29 @@ public: bool has_lob_header_; // for observer 4.0 compatibility }; +class ObDocId final +{ +public: + ObDocId(); + ObDocId(const uint64_t tablet_id, const uint64_t seq_id); + ~ObDocId() = default; + + void reset(); + bool is_valid() const; + ObString get_string() const; + int from_string(const ObString &doc_id); + + bool operator ==(const ObDocId &other) const; + bool operator !=(const ObDocId &other) const; + bool operator <(const ObDocId &other) const; + bool operator >(const ObDocId &other) const; + + TO_STRING_KV(K_(tablet_id), K_(seq_id)); +public: + uint64_t tablet_id_; + uint64_t seq_id_; +}; + struct ObObjPrintParams { ObObjPrintParams (const ObTimeZoneInfo *tz_info, ObCollationType cs_type): diff --git a/deps/oblib/src/common/row/ob_row_iterator.h b/deps/oblib/src/common/row/ob_row_iterator.h index 3aa1f6924c..a2dd2b7ce0 100644 --- a/deps/oblib/src/common/row/ob_row_iterator.h +++ b/deps/oblib/src/common/row/ob_row_iterator.h @@ -29,6 +29,7 @@ public: ObTableScanIterator = 1, ObLocalIndexLookupIterator = 2, ObGroupLookupOp = 3, + ObTextRetrievalOp = 4, }; public: ObNewRowIterator() : type_(Other) {} diff --git a/deps/oblib/src/common/rowkey/ob_rowkey_info.cpp b/deps/oblib/src/common/rowkey/ob_rowkey_info.cpp index ec1690073c..cb4f6c20d7 100644 --- a/deps/oblib/src/common/rowkey/ob_rowkey_info.cpp +++ b/deps/oblib/src/common/rowkey/ob_rowkey_info.cpp @@ -87,6 +87,7 @@ ObRowkeyColumn& ObRowkeyColumn::operator=(const ObRowkeyColumn &other) this->order_ = other.order_; this->fulltext_flag_ = other.fulltext_flag_; this->spatial_flag_ = other.spatial_flag_; + this->multivalue_flag_ = other.multivalue_flag_; return *this; } @@ -94,7 +95,11 @@ bool ObRowkeyColumn::operator==(const ObRowkeyColumn &other) const { return this->column_id_ == other.column_id_ && - this->is_equal_except_column_id(other); + this->type_ == other.type_ && + this->order_ == other.order_ && + this->fulltext_flag_ == other.fulltext_flag_ && + this->spatial_flag_ == other.spatial_flag_ && + this->multivalue_flag_ == other.multivalue_flag_; } ObRowkeyInfo::ObRowkeyInfo() @@ -455,22 +460,6 @@ int ObRowkeyInfo::get_column_ids(ObBitSet<> &column_ids) const return ret; } -int ObRowkeyInfo::get_fulltext_column(uint64_t &column_id) const -{ - int ret = OB_SUCCESS; - if (OB_ISNULL(columns_)) { - ret = OB_NOT_INIT; - COMMON_LOG(WARN, "columns is null"); - } - for (int64_t i = 0; OB_SUCC(ret) && i < size_; i++) { - if (columns_[i].fulltext_flag_) { - column_id = columns_[i].column_id_; - break; - } - } - return ret; -} - int ObRowkeyInfo::get_spatial_col_id_by_type(uint64_t &column_id, ObObjType type) const { int ret = OB_SUCCESS; diff --git a/deps/oblib/src/common/rowkey/ob_rowkey_info.h b/deps/oblib/src/common/rowkey/ob_rowkey_info.h index f763a9a47a..f645b4e128 100644 --- a/deps/oblib/src/common/rowkey/ob_rowkey_info.h +++ b/deps/oblib/src/common/rowkey/ob_rowkey_info.h @@ -48,6 +48,7 @@ struct ObRowkeyColumn ObOrderType order_; bool fulltext_flag_; bool spatial_flag_; + bool multivalue_flag_; NEED_SERIALIZE_AND_DESERIALIZE; }; @@ -92,7 +93,6 @@ public: int get_index(const uint64_t column_id, int64_t &index, ObRowkeyColumn &column) const; int get_index(const uint64_t column_id, int64_t &index) const; int is_rowkey_column(const uint64_t column_id, bool &is_rowkey) const; - int get_fulltext_column(uint64_t &column_id) const; int get_spatial_cellid_col_id(uint64_t &column_id) const; int get_spatial_mbr_col_id(uint64_t &column_id) const; int set_column(const int64_t idx, const ObRowkeyColumn &column); @@ -108,6 +108,7 @@ public: NEED_SERIALIZE_AND_DESERIALIZE; private: int expand(const int64_t size); + int get_fulltext_col_id_by_type(const ObObjType type, uint64_t &column_id) const; int get_spatial_col_id_by_type(uint64_t &column_id, ObObjType type) const; static const int64_t DEFAULT_ROWKEY_COLUMN_ARRAY_CAPACITY = 8; ObRowkeyColumn *columns_; diff --git a/deps/oblib/src/lib/allocator/ob_malloc.cpp b/deps/oblib/src/lib/allocator/ob_malloc.cpp index 7ebf8559ce..9685746415 100644 --- a/deps/oblib/src/lib/allocator/ob_malloc.cpp +++ b/deps/oblib/src/lib/allocator/ob_malloc.cpp @@ -63,7 +63,7 @@ void *oceanbase::common::ob_malloc_align(const int64_t align, const int64_t nbyt { const int min_align = 16; const int64_t alignment = - align <= min_align ? min_align : align_up2(align, 16); + align <= min_align ? min_align : lib::align_up2(align, 16); char *ptr = static_cast(oceanbase::common::ob_malloc(nbyte + alignment, attr)); char *align_ptr = NULL; if (NULL != ptr) { diff --git a/deps/oblib/src/lib/allocator/ob_malloc.h b/deps/oblib/src/lib/allocator/ob_malloc.h index aa5e79317f..844df86588 100644 --- a/deps/oblib/src/lib/allocator/ob_malloc.h +++ b/deps/oblib/src/lib/allocator/ob_malloc.h @@ -259,10 +259,24 @@ extern "C" void ob_zfree(void *ptr); ({ \ T* ret = NULL; \ if (OB_NOT_NULL(pool)) { \ - void *buf = (pool)->alloc(sizeof(T)); \ - if (OB_NOT_NULL(buf)) \ + void *_buf_ = (pool)->alloc(sizeof(T)); \ + if (OB_NOT_NULL(_buf_)) \ { \ - ret = new(buf) T(__VA_ARGS__); \ + ret = new(_buf_) T(__VA_ARGS__); \ + } \ + } \ + ret; \ + }) + +#define OB_NEW_ARRAY(T, pool, count) \ + ({ \ + T* ret = NULL; \ + if (OB_NOT_NULL(pool) && count > 0) { \ + int64_t _size_ = sizeof(T) * count; \ + void *_buf_ = (pool)->alloc(_size_); \ + if (OB_NOT_NULL(_buf_)) \ + { \ + ret = new(_buf_) T[count]; \ } \ } \ ret; \ diff --git a/deps/oblib/src/lib/charset/ob_charset.cpp b/deps/oblib/src/lib/charset/ob_charset.cpp index 851572bff0..ba50e5e126 100644 --- a/deps/oblib/src/lib/charset/ob_charset.cpp +++ b/deps/oblib/src/lib/charset/ob_charset.cpp @@ -2485,10 +2485,20 @@ int ObCharset::tolower(const ObCollationType collation_type, ObIAllocator &allocator) { int ret = OB_SUCCESS; - const ObCharsetInfo *cs_info = NULL; - if (OB_ISNULL(cs_info = get_charset(collation_type))) { + if (OB_FAIL(tolower(get_charset(collation_type), src, dst, allocator))) { + LOG_WARN("fail to casedown string", K(ret), K(collation_type), K(src)); + } + return ret; +} + +int ObCharset::tolower(const ObCharsetInfo *cs_info, + const ObString &src, ObString &dst, + ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(cs_info)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid collation type", K(ret), K(collation_type)); + LOG_WARN("invalid collation type", K(ret), KP(cs_info)); } else { int casemulti = cs_info->casedn_multiply; if (1 == casemulti) { diff --git a/deps/oblib/src/lib/charset/ob_charset.h b/deps/oblib/src/lib/charset/ob_charset.h index b81730d9d4..04b8014932 100644 --- a/deps/oblib/src/lib/charset/ob_charset.h +++ b/deps/oblib/src/lib/charset/ob_charset.h @@ -475,6 +475,9 @@ public: static int tolower(const ObCollationType collation_type, const ObString &src, ObString &dst, ObIAllocator &allocator); + static int tolower(const ObCharsetInfo *cs, + const ObString &src, ObString &dst, + ObIAllocator &allocator); static bool case_insensitive_equal(const ObString &one, const ObString &another, diff --git a/deps/oblib/src/lib/charset/ob_ctype.h b/deps/oblib/src/lib/charset/ob_ctype.h index a538047279..8368c497da 100644 --- a/deps/oblib/src/lib/charset/ob_ctype.h +++ b/deps/oblib/src/lib/charset/ob_ctype.h @@ -345,6 +345,7 @@ struct ObCharsetInfo unsigned char casedn_multiply; unsigned int mbminlen; unsigned int mbmaxlen; + unsigned int mbmaxlenlen; ob_wc_t min_sort_char; ob_wc_t max_sort_char; /* For LIKE optimization */ unsigned char pad_char; @@ -402,7 +403,9 @@ static inline unsigned int ob_ismbchar(const ObCharsetInfo *cs, const unsigned c } #define ob_mbcharlen(s, a) ((s)->cset->mbcharlen((s),(a))) +#define ob_mbcharlen_2(s, a, b) ((s)->cset->mbcharlen((s), ((((a)&0xFF) << 8) + ((b)&0xFF)))) +#define ob_mbmaxlenlen(s) ((s)->mbmaxlenlen) typedef struct ob_uni_ctype { @@ -527,6 +530,8 @@ unsigned int ob_instr_mb(const ObCharsetInfo *cs, const char *s, size_t s_length, ob_match_t *match, unsigned int nmatch); +uint ob_mbcharlen_ptr(const struct ObCharsetInfo *cs, const char *s, const char *e); + void ob_hash_sort_simple(const ObCharsetInfo *cs, const unsigned char *key, size_t len, ulong *nr1, ulong *nr2, diff --git a/deps/oblib/src/lib/charset/ob_ctype_bin.cc b/deps/oblib/src/lib/charset/ob_ctype_bin.cc index 143dc6c95a..c5ad3a55a9 100644 --- a/deps/oblib/src/lib/charset/ob_ctype_bin.cc +++ b/deps/oblib/src/lib/charset/ob_ctype_bin.cc @@ -467,6 +467,7 @@ ObCharsetInfo ob_charset_bin = 1, 1, 1, + 1, 0, 255, 0, diff --git a/deps/oblib/src/lib/charset/ob_ctype_gb18030.cc b/deps/oblib/src/lib/charset/ob_ctype_gb18030.cc index c9c213de1e..086d9f778d 100644 --- a/deps/oblib/src/lib/charset/ob_ctype_gb18030.cc +++ b/deps/oblib/src/lib/charset/ob_ctype_gb18030.cc @@ -1015,6 +1015,7 @@ ObCharsetInfo ob_charset_gb18030_chinese_ci = { 2, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ + 2, /* mbmaxlenlen */ 0, /* min_sort_char */ 0xFE39FE39, /* max_sort_char */ ' ', /* pad char */ @@ -1050,6 +1051,7 @@ ObCharsetInfo ob_charset_gb18030_chinese_cs = { 2, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ + 2, /* mbmaxlenlen */ 0, /* min_sort_char */ 0xFE39FE39, /* max_sort_char */ ' ', /* pad char */ @@ -1085,6 +1087,7 @@ ObCharsetInfo ob_charset_gb18030_bin = { 2, 1, 4, + 2, /* mbmaxlenlen */ 0, 0xFEFEFEFE, ' ', @@ -1642,6 +1645,7 @@ ObCharsetInfo ob_charset_gb18030_2022_bin = 2, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ + 2, /* mbmaxlenlen */ 0, /* min_sort_char */ 0xFEFEFEFE, /* max_sort_char */ ' ', /* pad char */ @@ -1677,6 +1681,7 @@ ObCharsetInfo ob_charset_gb18030_2022_pinyin_ci = 2, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ + 2, /* mbmaxlenlen */ 0, /* min_sort_char */ 0xFE39FE39, /* max_sort_char */ ' ', /* pad char */ @@ -1712,6 +1717,7 @@ ObCharsetInfo ob_charset_gb18030_2022_pinyin_cs = 2, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ + 2, /* mbmaxlenlen */ 0, /* min_sort_char */ 0xFE39FE39, /* max_sort_char */ ' ', /* pad char */ @@ -1747,6 +1753,7 @@ ObCharsetInfo ob_charset_gb18030_2022_radical_ci = 2, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ + 2, /* mbmaxlenlen */ 0, /* min_sort_char */ 0xFE39FE39, /* max_sort_char */ ' ', /* pad char */ @@ -1782,6 +1789,7 @@ ObCharsetInfo ob_charset_gb18030_2022_radical_cs = 2, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ + 2, /* mbmaxlenlen */ 0, /* min_sort_char */ 0xFE39FE39, /* max_sort_char */ ' ', /* pad char */ @@ -1817,6 +1825,7 @@ ObCharsetInfo ob_charset_gb18030_2022_stroke_ci = 2, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ + 2, /* mbmaxlenlen */ 0, /* min_sort_char */ 0xFE39FE39, /* max_sort_char */ ' ', /* pad char */ @@ -1852,6 +1861,7 @@ ObCharsetInfo ob_charset_gb18030_2022_stroke_cs = 2, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ + 2, /* mbmaxlenlen */ 0, /* min_sort_char */ 0xFE39FE39, /* max_sort_char */ ' ', /* pad char */ diff --git a/deps/oblib/src/lib/charset/ob_ctype_gbk.cc b/deps/oblib/src/lib/charset/ob_ctype_gbk.cc index bad0b2df89..3eee1e4763 100644 --- a/deps/oblib/src/lib/charset/ob_ctype_gbk.cc +++ b/deps/oblib/src/lib/charset/ob_ctype_gbk.cc @@ -393,6 +393,7 @@ ObCharsetInfo ob_charset_gbk_chinese_ci= 1, 1, 2, + 1, 0, 0xA967, ' ', @@ -425,6 +426,7 @@ ObCharsetInfo ob_charset_gbk_bin= 1, 1, 2, + 1, 0, 0xFEFE, ' ', diff --git a/deps/oblib/src/lib/charset/ob_ctype_latin1.cc b/deps/oblib/src/lib/charset/ob_ctype_latin1.cc index f64c7dd474..7a892bf980 100644 --- a/deps/oblib/src/lib/charset/ob_ctype_latin1.cc +++ b/deps/oblib/src/lib/charset/ob_ctype_latin1.cc @@ -93,6 +93,7 @@ ObCharsetInfo ob_charset_latin1 = { 1, 1, 1, + 1, 0, 0xFF, ' ', @@ -126,6 +127,7 @@ ObCharsetInfo ob_charset_latin1_bin = { 1, 1, 1, + 1, 0, 0xFF, ' ', diff --git a/deps/oblib/src/lib/charset/ob_ctype_mb.cc b/deps/oblib/src/lib/charset/ob_ctype_mb.cc index 56a717db0d..8a7f9a1002 100644 --- a/deps/oblib/src/lib/charset/ob_ctype_mb.cc +++ b/deps/oblib/src/lib/charset/ob_ctype_mb.cc @@ -290,6 +290,16 @@ unsigned int ob_instr_mb(const ObCharsetInfo *cs, return 0; } +uint ob_mbcharlen_ptr(const struct ObCharsetInfo *cs, const char *s, const char *e) +{ + uint len = ob_mbcharlen(cs, (uchar)*s); + if (len == 0 && ob_mbmaxlenlen(cs) == 2 && s + 1 < e) { + len = ob_mbcharlen_2(cs, (uchar)*s, (uchar) * (s + 1)); + assert(len == 0 || len == 2 || len == 4); + } + return len; +} + size_t ob_numchars_mb(const ObCharsetInfo *cs __attribute__((unused)), const char *pos, const char *end) { size_t count= 0; diff --git a/deps/oblib/src/lib/charset/ob_ctype_uca.cc b/deps/oblib/src/lib/charset/ob_ctype_uca.cc index 8256f5a96a..d3283082d7 100644 --- a/deps/oblib/src/lib/charset/ob_ctype_uca.cc +++ b/deps/oblib/src/lib/charset/ob_ctype_uca.cc @@ -2916,6 +2916,7 @@ ObCharsetInfo ob_charset_utf8mb4_unicode_ci= 1, 1, 4, + 1, 9, 0xFFFF, ' ', @@ -2950,6 +2951,7 @@ ObCharsetInfo ob_charset_utf16_unicode_ci= 1, 2, 4, + 1, 9, 0xFFFF, ' ', @@ -2980,6 +2982,7 @@ ObCharsetInfo ob_charset_utf8mb4_0900_ai_ci = { 1, 1, 4, + 1, 9, 0x10FFFF, ' ', @@ -3012,6 +3015,7 @@ ObCharsetInfo ob_charset_utf8mb4_zh_0900_as_cs = { 1, 1, 4, + 1, 32, 0x10FFFF, ' ', @@ -3044,6 +3048,7 @@ ObCharsetInfo ob_charset_utf8mb4_zh2_0900_as_cs = { 1, 1, 4, + 1, 32, 0x10FFFF, ' ', @@ -3076,6 +3081,7 @@ ObCharsetInfo ob_charset_utf8mb4_zh3_0900_as_cs = { 1, 1, 4, + 1, 32, 0x10FFFF, ' ', @@ -3142,6 +3148,7 @@ ObCharsetInfo ob_charset_utf8mb4_0900_bin = { 1, // casedn_multiply 1, // mbminlen 4, // mbmaxlen + 1, // mbmaxlenlen 0, // min_sort_char 0x10FFFF, // max_sort_char ' ', // pad char diff --git a/deps/oblib/src/lib/charset/ob_ctype_uca_tab.h b/deps/oblib/src/lib/charset/ob_ctype_uca_tab.h index c43fd3769b..b0c5c57271 100644 --- a/deps/oblib/src/lib/charset/ob_ctype_uca_tab.h +++ b/deps/oblib/src/lib/charset/ob_ctype_uca_tab.h @@ -19524,4 +19524,4 @@ NULL ,NULL ,NULL ,NULL , NULL ,NULL ,NULL ,NULL , NULL ,NULL ,NULL ,NULL , NULL ,NULL ,NULL ,NULL -}; \ No newline at end of file +}; diff --git a/deps/oblib/src/lib/charset/ob_ctype_utf16.cc b/deps/oblib/src/lib/charset/ob_ctype_utf16.cc index 366e1c498c..089d8bd64b 100644 --- a/deps/oblib/src/lib/charset/ob_ctype_utf16.cc +++ b/deps/oblib/src/lib/charset/ob_ctype_utf16.cc @@ -1207,6 +1207,7 @@ ObCharsetInfo ob_charset_utf16_bin= 1, 2, 4, + 1, 0, 0xFFFF, ' ', @@ -1240,6 +1241,7 @@ ObCharsetInfo ob_charset_utf16_general_ci= 1, 2, 4, + 1, 0, 0xFFFF, ' ', diff --git a/deps/oblib/src/lib/charset/ob_ctype_utf8.cc b/deps/oblib/src/lib/charset/ob_ctype_utf8.cc index ed73455b62..935274f408 100644 --- a/deps/oblib/src/lib/charset/ob_ctype_utf8.cc +++ b/deps/oblib/src/lib/charset/ob_ctype_utf8.cc @@ -1023,6 +1023,7 @@ ObCharsetInfo ob_charset_utf8mb4_general_ci= 1, 1, 4, + 1, 0, 0xFFFF, ' ', @@ -1057,6 +1058,7 @@ ObCharsetInfo ob_charset_utf8mb4_bin= 1, 1, 4, + 1, 0, 0xFFFF, ' ', diff --git a/deps/oblib/src/lib/geo/ob_geo_common.h b/deps/oblib/src/lib/geo/ob_geo_common.h index e4860e271e..cc60451cc7 100644 --- a/deps/oblib/src/lib/geo/ob_geo_common.h +++ b/deps/oblib/src/lib/geo/ob_geo_common.h @@ -68,6 +68,21 @@ enum class ObGeoRelationType T_DFULLYWITHIN = 5 }; +// will define in other file, later +enum class ObDomainOpType +{ + T_INVALID = 0, + T_JSON_MEMBER_OF = 1, + T_JSON_CONTAINS = 2, + T_JSON_OVERLAPS = 3, + T_GEO_COVERS, + T_GEO_INTERSECTS, + T_GEO_DWITHIN, + T_GEO_DFULLYWITHIN, + T_GEO_COVEREDBY, + T_DOMAIN_OP_END, +}; + class ObGeoWkbByteOrderUtil { public: diff --git a/deps/oblib/src/lib/geo/ob_s2adapter.cpp b/deps/oblib/src/lib/geo/ob_s2adapter.cpp index 555bdc4448..174d944f12 100644 --- a/deps/oblib/src/lib/geo/ob_s2adapter.cpp +++ b/deps/oblib/src/lib/geo/ob_s2adapter.cpp @@ -25,7 +25,7 @@ namespace oceanbase { namespace common { -int ObSpatialMBR::filter(const ObSpatialMBR &other, ObGeoRelationType type, bool &pass_through) const +int ObSpatialMBR::filter(const ObSpatialMBR &other, ObDomainOpType type, bool &pass_through) const { INIT_SUCC(ret); if (is_geog_) { @@ -37,23 +37,23 @@ int ObSpatialMBR::filter(const ObSpatialMBR &other, ObGeoRelationType type, bool LOG_WARN("fail to generate other latlng rectangle", K(ret)); } else { switch (type) { - case ObGeoRelationType::T_COVERS: { + case ObDomainOpType::T_GEO_COVERS: { pass_through = !other_rect.Contains(this_rect); break; } - case ObGeoRelationType::T_DWITHIN: - case ObGeoRelationType::T_INTERSECTS: { + case ObDomainOpType::T_GEO_DWITHIN: + case ObDomainOpType::T_GEO_INTERSECTS: { pass_through = !this_rect.Intersects(other_rect); break; } - case ObGeoRelationType::T_COVEREDBY: { + case ObDomainOpType::T_GEO_COVEREDBY: { pass_through = !this_rect.Contains(other_rect); break; } - case ObGeoRelationType::T_DFULLYWITHIN: { + case ObDomainOpType::T_GEO_DFULLYWITHIN: { ret = OB_NOT_SUPPORTED; LOG_WARN("not support within geo relation type", K(ret), K(type)); break; @@ -75,23 +75,23 @@ int ObSpatialMBR::filter(const ObSpatialMBR &other, ObGeoRelationType type, bool LOG_WARN("fail to generate other latlng rectangle", K(ret)); } else { switch (type) { - case ObGeoRelationType::T_COVERS: { + case ObDomainOpType::T_GEO_COVERS: { pass_through = !other_rect.Contains(this_rect); break; } - case ObGeoRelationType::T_DWITHIN: - case ObGeoRelationType::T_INTERSECTS: { + case ObDomainOpType::T_GEO_DWITHIN: + case ObDomainOpType::T_GEO_INTERSECTS: { pass_through = !this_rect.Intersects(other_rect); break; } - case ObGeoRelationType::T_COVEREDBY: { + case ObDomainOpType::T_GEO_COVEREDBY: { pass_through = !this_rect.Contains(other_rect); break; } - case ObGeoRelationType::T_DFULLYWITHIN: { + case ObDomainOpType::T_GEO_DFULLYWITHIN: { ret = OB_NOT_SUPPORTED; LOG_WARN("not support within geo relation type", K(ret), K(type)); break; @@ -133,7 +133,7 @@ int ObSpatialMBR::to_char(char *buf, int64_t &buf_len) const } int ObSpatialMBR::from_string(ObString &mbr_str, - ObGeoRelationType type, + ObDomainOpType type, ObSpatialMBR &spa_mbr, bool is_point) { @@ -220,7 +220,7 @@ OB_DEF_DESERIALIZE(ObSpatialMBR) OB_UNIS_DECODE(x_max_); OB_UNIS_DECODE(mbr_type); if (OB_SUCC(ret)) { - mbr_type_ = static_cast(mbr_type); + mbr_type_ = static_cast(mbr_type); } OB_UNIS_DECODE(is_point_); OB_UNIS_DECODE(is_geog_); diff --git a/deps/oblib/src/lib/geo/ob_s2adapter.h b/deps/oblib/src/lib/geo/ob_s2adapter.h index 331ff3ba8b..aa76541e7a 100644 --- a/deps/oblib/src/lib/geo/ob_s2adapter.h +++ b/deps/oblib/src/lib/geo/ob_s2adapter.h @@ -44,7 +44,7 @@ public: x_max_(NAN), y_min_(NAN), y_max_(NAN) {}; - ObSpatialMBR(ObGeoRelationType rel_type) + ObSpatialMBR(ObDomainOpType rel_type) : x_min_(NAN), x_max_(NAN), y_min_(NAN), @@ -52,7 +52,7 @@ public: mbr_type_(rel_type), is_point_(false), is_geog_(false) {} - ObSpatialMBR(double x_min, double x_max, double y_min, double y_max, ObGeoRelationType rel_type) + ObSpatialMBR(double x_min, double x_max, double y_min, double y_max, ObDomainOpType rel_type) : x_min_(x_min), x_max_(x_max), y_min_(y_min), @@ -73,13 +73,13 @@ public: } int to_char(char *buf, int64_t &buf_len) const; static int from_string(ObString &mbr_str, - ObGeoRelationType type, + ObDomainOpType type, ObSpatialMBR &spa_mbr, bool is_point = false); - int filter(const ObSpatialMBR &other, ObGeoRelationType type, bool &pass_through) const; + int filter(const ObSpatialMBR &other, ObDomainOpType type, bool &pass_through) const; OB_INLINE bool is_point() const { return is_point_; }; OB_INLINE bool is_geog() const { return is_geog_; }; - OB_INLINE ObGeoRelationType get_type() const { return mbr_type_; }; + OB_INLINE ObDomainOpType get_type() const { return mbr_type_; }; OB_INLINE double get_xmin() const { return x_min_; }; OB_INLINE double get_xmax() const { return x_max_; }; OB_INLINE double get_ymin() const { return y_min_; }; @@ -93,7 +93,7 @@ public: double x_max_; double y_min_; double y_max_; - ObGeoRelationType mbr_type_; + ObDomainOpType mbr_type_; bool is_point_; bool is_geog_; }; diff --git a/deps/oblib/src/lib/json_type/ob_json_bin.h b/deps/oblib/src/lib/json_type/ob_json_bin.h index e332c48d0f..5849fec498 100644 --- a/deps/oblib/src/lib/json_type/ob_json_bin.h +++ b/deps/oblib/src/lib/json_type/ob_json_bin.h @@ -449,7 +449,7 @@ public: cursor_ = &local_cursor_; } - explicit ObJsonBin(const char *data, const int64_t length, ObIAllocator *allocator) + explicit ObJsonBin(const char *data, const int64_t length, ObIAllocator *allocator = nullptr) : ObIJsonBase(allocator), allocator_(allocator), meta_(), @@ -488,6 +488,38 @@ public: virtual ~ObJsonBin() { destroy(); } + + ObJsonBin(const ObJsonBin& other) + : ObJsonBin() + { + assign(other); + } + + void assign(const ObJsonBin& other) { + meta_ = other.meta_; + cursor_ = other.cursor_; + local_cursor_ = other.local_cursor_; + pos_ = other.pos_; + // ToDo: check again, why need assign ? + // node_stack_ = other.node_stack_; + data_ = other.data_; + int_val_ = other.int_val_; + number_ = other.number_; + prec_ = other.prec_; + scale_ = other.scale_; + ctx_ = other.ctx_; + is_alloc_ctx_ = other.is_alloc_ctx_; + is_seek_only_ = other.is_seek_only_; + is_schema_ = other.is_schema_; + } + + ObJsonBin& operator=(const ObJsonBin& other) + { + new (this) ObJsonBin(); + assign(other); + return *this; + } + OB_INLINE bool get_boolean() const override { return static_cast(uint_val_); } OB_INLINE double get_double() const override { return double_val_; } OB_INLINE float get_float() const override { return float_val_; }; @@ -1053,8 +1085,8 @@ private: bool is_seek_only_; bool is_schema_; - - DISALLOW_COPY_AND_ASSIGN(ObJsonBin); + // ToDo:refine + // DISALLOW_COPY_AND_ASSIGN(ObJsonBin); }; class ObJsonVar { @@ -1100,6 +1132,15 @@ private: }; +struct ObJsonBinCompare { + int operator()(const ObJsonBin& left, const ObJsonBin& right) + { + int result = 0; + left.compare(right, result); + return result > 0 ? 1 : 0; + } +}; + } // namespace common } // namespace oceanbase #endif // OCEANBASE_SQL_OB_JSON_BIN diff --git a/deps/oblib/src/lib/ob_define.h b/deps/oblib/src/lib/ob_define.h index 19a8c89bfe..67a3c2a22b 100644 --- a/deps/oblib/src/lib/ob_define.h +++ b/deps/oblib/src/lib/ob_define.h @@ -687,6 +687,15 @@ const char *const OB_PARTITION_SHARDING_NONE = "NONE"; const char *const OB_PARTITION_SHARDING_PARTITION = "PARTITION"; const char *const OB_PARTITION_SHARDING_ADAPTIVE = "ADAPTIVE"; +// fulltext search +const char *const OB_DOC_ID_COLUMN_NAME = "__doc_id"; +const char *const OB_WORD_SEGMENT_COLUMN_NAME_PREFIX = "__word_segment"; +const char *const OB_WORD_COUNT_COLUMN_NAME_PREFIX = "__word_count"; +const char *const OB_DOC_LENGTH_COLUMN_NAME_PREFIX = "__doc_length"; +const int64_t OB_DOC_ID_COLUMN_BYTE_LENGTH = (sizeof(uint64_t) * 2); +constexpr int64_t OB_WORD_SEGMENT_COLUMN_NAME_PREFIX_LEN = sizeof("__word_segment") - 1; +constexpr int64_t OB_WORD_COUNT_COLUMN_NAME_PREFIX_LEN = sizeof("__word_count") - 1; +const char OB_FT_COL_ID_DELIMITER = '_'; // backup and restore const int64_t OB_MAX_CLUSTER_NAME_LENGTH = OB_MAX_APP_NAME_LENGTH; @@ -1818,7 +1827,7 @@ const int64_t OB_MAX_CAST_CHAR_MEDIUMTEXT_LENGTH = 4194303; const char *const SYS_DATE = "$SYS_DATE"; const char *const OB_DEFAULT_COMPRESS_FUNC_NAME = "none"; -const char *const OB_DEFAULT_FULLTEXT_PARSER_NAME = "TAOBAO_CHN"; +const char *const OB_DEFAULT_FULLTEXT_PARSER_NAME = "space"; const int64_t OB_MYSQL_LOGIN_USER_NAME_MAX_LEN = 48; const int64_t OB_MAX_CONFIG_NAME_LEN = 128; diff --git a/deps/oblib/src/lib/ob_name_def.h b/deps/oblib/src/lib/ob_name_def.h index 484b293fb5..198df6a5c7 100644 --- a/deps/oblib/src/lib/ob_name_def.h +++ b/deps/oblib/src/lib/ob_name_def.h @@ -790,7 +790,10 @@ #define N_PART_ID "part_id" #define N_INNER_GET "inner_get" #define N_MATCH_AGAINST "match_against" +#define N_DOC_ID "doc_id" #define N_WORD_SEGMENT "word_segment" +#define N_WORD_COUNT "word_count" +#define N_DOC_LENGTH "doc_length" #define N_SELF_JOIN "self_join" #define N_DES_HEX_STR "DES_HEX_STR" #define N_YEAR "year" @@ -1096,6 +1099,7 @@ #define N_INEER_IS_TRUE "inner_is_true" #define N_INNER_DECODE_LIKE "inner_decode_like" +#define N_BM25 "bm25" #define N_EXTRACT_CERT_EXPIRED_TIME "extract_cert_expired_time" #define N_INNER_ROW_CMP_VALUE "inner_row_cmp_value" #define N_SYS_LAST_REFRESH_SCN "last_refresh_scn" diff --git a/deps/oblib/src/lib/ob_plugin.h b/deps/oblib/src/lib/ob_plugin.h new file mode 100644 index 0000000000..c80944f1e5 --- /dev/null +++ b/deps/oblib/src/lib/ob_plugin.h @@ -0,0 +1,296 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OB_PLUGIN_H_ +#define OB_PLUGIN_H_ + +#include + +#include "lib/ob_errno.h" +#include "lib/charset/ob_ctype.h" +#include "lib/utility/ob_print_utils.h" + +#define OB_PLUGIN_AUTHOR_OCEANBASE "OceanBase Corporation" + +#define OB_PLUGIN_EXPORT + +#define OB_PLUGIN_INTERFACE_VERSION 0x01 // plugin interface version + +#define OB_PLUGIN_NAME_LENGTH 128 // plugin name length +#define OB_PLUGIN_FILE_NAME_LENGTH 512 // plugin file name length +#define OB_PLUGIN_SYMBOL_NAME_LENGTH 1024 // symbol name length in plunin dynamic library + +#define OB_PLUGIN_STR_(str) #str +#define OB_PLUGIN_STR(str) OB_PLUGIN_STR_(str) + +#define OB_PLUGIN_PREFIX OB_PLUGIN_STR(ob_builtin_) +#define OB_PLUGIN_VERSION_SUFFIX OB_PLUGIN_STR(_plugin_version) +#define OB_PLUGIN_SIZE_SUFFIX OB_PLUGIN_STR(_sizeof_plugin) +#define OB_PLUGIN_SUFFIX OB_PLUGIN_STR(_plugin) + +#define OB_PLUGIN_VERSION_SYMBOL(name) ob_builtin_##name##_plugin_version +#define OB_PLUGIN_SIZE_SYMBOL(name) ob_builtin_##name##_sizeof_plugin +#define OB_PLUGIN_SYMBOL(name) ob_builtin_##name##_plugin + +#define OB_DECLARE_PLUGIN_(name, version, size, plugin) \ + OB_PLUGIN_EXPORT int64_t version = OB_PLUGIN_INTERFACE_VERSION; \ + OB_PLUGIN_EXPORT int64_t size = sizeof(oceanbase::lib::ObPlugin); \ + OB_PLUGIN_EXPORT oceanbase::lib::ObPlugin plugin = + +// this is used to define a plugin +// +// - C/C++ code for example, +// +// OB_DECLARE_PLUGIN(example_plugin) +// { +// OB_FT_PARSER_PLUGIN, // type +// "ExamplePlugin", // name +// "OceanBase Corporation", // author +// "This is a plugin example.", // brief specification +// 0x00001, // version +// OB_MULAN_V2_LICENSE, // license +// &example_plugin, // plugin instance +// }; +#define OB_DECLARE_PLUGIN(name) \ + OB_DECLARE_PLUGIN_(name, \ + ob_builtin_##name##_plugin_version, \ + ob_builtin_##name##_sizeof_plugin, \ + ob_builtin_##name##_plugin) + +#define OB_GET_PLUGIN_VALUE_FUNC(name, value_type, value_name) \ + virtual int get_plugin_##name(value_type &value) const override \ + { \ + value = value_name; \ + return oceanbase::common::OB_SUCCESS; \ + } + +#define OB_GET_PLUGIN_FUNC(value_name) \ + virtual int get_plugin(oceanbase::lib::ObPlugin *&plugin) const override \ + { \ + plugin = &(value_name); \ + return oceanbase::common::OB_SUCCESS; \ + } + +#define OB_GET_PLUGIN_VERSION_FUNC(value_name) \ + OB_GET_PLUGIN_VALUE_FUNC(version, int64_t, value_name) +#define OB_GET_PLUGIN_SIZE_FUNC(value_name) \ + OB_GET_PLUGIN_VALUE_FUNC(size, int64_t, value_name) + +#define OB_DECLARE_BUILDIN_PLUGIN_HANDLER(plugin_handler, plugin_name) \ +class plugin_handler final : public oceanbase::lib::ObIPluginHandler \ +{ \ +public: \ + plugin_handler() = default; \ + ~plugin_handler() = default; \ + OB_GET_PLUGIN_FUNC(ob_builtin_##plugin_name##_plugin); \ + OB_GET_PLUGIN_VERSION_FUNC(ob_builtin_##plugin_name##_plugin_version); \ + OB_GET_PLUGIN_SIZE_FUNC(ob_builtin_##plugin_name##_sizeof_plugin); \ + VIRTUAL_TO_STRING_KV(KCSTRING("##plugin_handler##")); \ +}; + +namespace oceanbase +{ +namespace lib +{ + +class ObIPluginDesc; + +// define plugin type +enum class ObPluginType : uint64_t +{ + OB_FT_PARSER_PLUGIN = 1, // fulltext parser plugin + OB_MAX_PLUGIN_TYPE = 2, // max plugin type +}; + +// define plugin license +enum class ObPluginLicenseType : uint64_t +{ + OB_MULAN_V2_LICENSE = 1, // Mulan PubL v2 license + OB_MAX_PLUGIN_LICENSE_TYPE = 2, // max plugin license type +}; + +class ObPluginParam final +{ +public: + ObPluginParam() : desc_(nullptr) {} + ~ObPluginParam() { reset(); } + + inline bool is_valid() const { return nullptr != desc_; } + inline void reset() { desc_ = nullptr; } + + TO_STRING_KV(KP_(desc)); +public: + ObIPluginDesc *desc_; +}; + +// descriptor interface of base plugin +class ObIPluginDesc +{ +public: + ObIPluginDesc() = default; + virtual ~ObIPluginDesc() = default; + +public: + // plugin initialize function + virtual int init(ObPluginParam *param) = 0; + // plugin de-initialize function + virtual int deinit(ObPluginParam *param) = 0; +}; + +// ob plugin description structure +class ObPlugin final +{ +public: + static const int64_t PLUGIN_VERSION = 0x01; +public: + ObPlugin() + : type_(ObPluginType::OB_MAX_PLUGIN_TYPE), + name_(nullptr), + author_(nullptr), + spec_(nullptr), + version_(PLUGIN_VERSION), + license_(ObPluginLicenseType::OB_MAX_PLUGIN_LICENSE_TYPE), + desc_(nullptr) + {} + ObPlugin( + const ObPluginType &type, + const char *name, + const char *author, + const char *spec, + const int64_t version, + const ObPluginLicenseType &license, + ObIPluginDesc *desc) + : type_(type), + name_(name), + author_(author), + spec_(spec), + version_(version), + license_(license), + desc_(desc) + {} + + ~ObPlugin() = default; + inline bool is_valid() const + { + return (ObPluginType::OB_FT_PARSER_PLUGIN <= type_ && type_ < ObPluginType::OB_MAX_PLUGIN_TYPE) + && nullptr != name_ + && nullptr != author_ + && nullptr != spec_ + && PLUGIN_VERSION == version_ + && (ObPluginLicenseType::OB_MULAN_V2_LICENSE <= license_ + && license_ < ObPluginLicenseType::OB_MAX_PLUGIN_LICENSE_TYPE) + && nullptr != desc_; + } + TO_STRING_KV(KP_(type), KCSTRING_(name), KCSTRING_(author), KCSTRING_(spec), K_(version), + K_(license), KP_(desc)); +public: + ObPluginType type_; // type of the plugin + const char *name_; // name for the plugin + const char *author_; // author for the plugin + const char *spec_; // brief specification of the plugin + int64_t version_; // version for the plugin + ObPluginLicenseType license_; // license for the plugin + ObIPluginDesc *desc_; // the plugin descriptor +}; + +class ObIPluginHandler +{ +public: + ObIPluginHandler() = default; + virtual ~ObIPluginHandler() = default; + + virtual int get_plugin(lib::ObPlugin *&plugin) const = 0; + virtual int get_plugin_version(int64_t &version) const = 0; + virtual int get_plugin_size(int64_t &size) const = 0; + + DECLARE_PURE_VIRTUAL_TO_STRING; +}; + +class ObFTParserParam final +{ +public: + class ObIAddWord + { + public: + ObIAddWord() = default; + virtual ~ObIAddWord() = default; + virtual int operator()(ObFTParserParam *param, const char *word, const int64_t word_len) = 0; + virtual int64_t get_add_word_count() const = 0; + DECLARE_PURE_VIRTUAL_TO_STRING; + }; +public: + ObFTParserParam() + : allocator_(nullptr), + add_word_(nullptr), + cs_(nullptr), + fulltext_(nullptr), + ft_length_(0), + parser_version_(-1) + {} + ~ObFTParserParam() = default; + + inline bool is_valid() const + { + return nullptr != allocator_ + && nullptr != add_word_ + && nullptr != cs_ + && nullptr != fulltext_ + && 0 < ft_length_ + && 0 <= parser_version_; + } + inline int add_word(ObFTParserParam *param, const char *word, int64_t word_len) + { + return (*add_word_)(param, word, word_len); + } + inline void reset() + { + allocator_ = nullptr; + add_word_ = nullptr; + cs_ = nullptr; + fulltext_ = nullptr; + ft_length_ = 0; + parser_version_ = 0; + } + + TO_STRING_KV(KP_(allocator), KP_(add_word), KP_(cs), K_(fulltext), K_(ft_length), K_(parser_version)); +public: + common::ObIAllocator *allocator_; + ObIAddWord *add_word_; + const ObCharsetInfo *cs_; + const char *fulltext_; + int64_t ft_length_; + int64_t parser_version_; +}; + +// fulltext parser descriptor interface for domain index +// - splitting a document into many tokenizations. +class ObIFTParserDesc : public ObIPluginDesc +{ +public: + ObIFTParserDesc() = default; + virtual ~ObIFTParserDesc() = default; + + /** + * split fulltext into multiple word segments + * + * @param[in] fulltext, the document to be tokenized. + * @param[out] words, the word segmentation after splitting. + * + * @return error code, such as, OB_SUCCESS, OB_INVALID_ARGUMENT, ... + */ + virtual int segment(ObFTParserParam *param) const = 0; +}; + +} // end namespace lib +} // end namespace oceanbase + +#endif // OB_PLUGIN_H_ diff --git a/deps/oblib/src/lib/oblog/ob_log_module.h b/deps/oblib/src/lib/oblog/ob_log_module.h index 49f51cbf08..7cea7d77ac 100644 --- a/deps/oblib/src/lib/oblog/ob_log_module.h +++ b/deps/oblib/src/lib/oblog/ob_log_module.h @@ -162,6 +162,7 @@ DEFINE_LOG_SUB_MOD(IMC) DEFINE_LOG_SUB_MOD(DUP_TABLE) DEFINE_LOG_SUB_MOD(TABLELOCK) // tablelock DEFINE_LOG_SUB_MOD(BLKMGR) // block manager +DEFINE_LOG_SUB_MOD(FTS) // fulltext search LOG_MOD_END(STORAGE) // statement of clog's sub-modules @@ -741,6 +742,10 @@ LOG_MOD_END(PL) info_string, ##args) #define _STORAGE_BLKMGR_LOG(level, _fmt_, args...) _OB_SUB_MOD_LOG(STORAGE, BLKMGR, level, \ _fmt_, ##args) +#define STORAGE_FTS_LOG(level, info_string, args...) OB_SUB_MOD_LOG(STORAGE, FTS, level, \ + info_string, ##args) +#define _STORAGE_FTS_LOG(level, _fmt_, args...) _OB_SUB_MOD_LOG(STORAGE, FTS, level, \ + _fmt_, ##args) #define SQL_ENG_LOG(level, info_string, args...) OB_SUB_MOD_LOG(SQL, ENG, level, \ info_string, ##args) @@ -1084,6 +1089,8 @@ LOG_MOD_END(PL) #define _RS_COMPACTION_LOG_RET(level, errcode, args...) { int ret = errcode; _RS_COMPACTION_LOG(level, ##args); } #define STORAGE_BLKMGR_LOG_RET(level, errcode, args...) { int ret = errcode; STORAGE_BLKMGR_LOG(level, ##args); } #define _STORAGE_BLKMGR_LOG_RET(level, errcode, args...) { int ret = errcode; _STORAGE_BLKMGR_LOG(level, ##args); } +#define STORAGE_FTS_LOG_RET(level, errcode, args...) { int ret = errcode; STORAGE_BLKMGR_LOG(level, ##args); } +#define _STORAGE_FTS_LOG_RET(level, errcode, args...) { int ret = errcode; _STORAGE_BLKMGR_LOG(level, ##args); } #define SQL_ENG_LOG_RET(level, errcode, args...) { int ret = errcode; SQL_ENG_LOG(level, ##args); } #define _SQL_ENG_LOG_RET(level, errcode, args...) { int ret = errcode; _SQL_ENG_LOG(level, ##args); } #define SQL_EXE_LOG_RET(level, errcode, args...) { int ret = errcode; SQL_EXE_LOG(level, ##args); } diff --git a/deps/oblib/src/lib/oblog/ob_log_module.ipp b/deps/oblib/src/lib/oblog/ob_log_module.ipp index ee0485c8cb..099d8c929b 100644 --- a/deps/oblib/src/lib/oblog/ob_log_module.ipp +++ b/deps/oblib/src/lib/oblog/ob_log_module.ipp @@ -123,6 +123,7 @@ REG_LOG_SUB_MOD(STORAGE, REPLAY) REG_LOG_SUB_MOD(STORAGE, IMC) REG_LOG_SUB_MOD(STORAGE, TABLELOCK) REG_LOG_SUB_MOD(STORAGE, BLKMGR) +REG_LOG_SUB_MOD(STORAGE, FTS) // reigst CLOG's sub-modules REG_LOG_SUB_MOD(CLOG, EXTLOG) diff --git a/deps/oblib/src/lib/string/ob_string.h b/deps/oblib/src/lib/string/ob_string.h index 6c9c4864ff..452f84fa4f 100644 --- a/deps/oblib/src/lib/string/ob_string.h +++ b/deps/oblib/src/lib/string/ob_string.h @@ -371,6 +371,18 @@ public: return match; } + inline bool suffix_match(const char *str) const + { + bool match = false; + if (OB_NOT_NULL(str)) { + const int64_t len = strlen(str); + if (len <= data_length_ && 0 == MEMCMP(str, ptr_ + data_length_ - len, len)) { + match = true; + } + } + return match; + } + inline bool prefix_match(const char *str) const { obstr_size_t len = 0; diff --git a/deps/oblib/unittest/lib/charset/test_charset.cpp b/deps/oblib/unittest/lib/charset/test_charset.cpp index b229792885..d8a3c1cb3f 100644 --- a/deps/oblib/unittest/lib/charset/test_charset.cpp +++ b/deps/oblib/unittest/lib/charset/test_charset.cpp @@ -15,11 +15,15 @@ #include #include #include +#include "gtest/gtest.h" + +#define protected public +#define private public + #include "lib/allocator/page_arena.h" #include "lib/charset/ob_charset.h" #include "lib/string/ob_string.h" #include "lib/utility/ob_print_utils.h" -#include "gtest/gtest.h" #include "unicode_map.h" #include "common/data_buffer.h" #include "lib/oblog/ob_log_module.h" @@ -825,6 +829,31 @@ TEST_F(TestCharset, check_gb18030_2022) } } +TEST_F(TestCharset, check_mbmaxlenlen) +{ + for (int64_t type = ObCollationType::CS_TYPE_INVALID; type < ObCollationType::CS_TYPE_MAX; ++type) { + if (nullptr != ObCharset::charset_arr[type]) { + const uint mbmaxlenlen = ob_mbmaxlenlen(ObCharset::charset_arr[type]); + const char *cs_name = ObCharset::charset_name(static_cast(type)); + std::cout << "charset=" << cs_name << ", mbmaxlenlen=" << mbmaxlenlen << ", type=" << type << std::endl; + if (ObCharset::is_gb18030_2022(type) + || CS_TYPE_GB18030_CHINESE_CI == type + || CS_TYPE_GB18030_CHINESE_CS == type + || CS_TYPE_GB18030_BIN == type + || CS_TYPE_GB18030_ZH_0900_AS_CS == type + || CS_TYPE_GB18030_ZH2_0900_AS_CS == type + || CS_TYPE_GB18030_ZH3_0900_AS_CS == type + || CS_TYPE_GB18030_2022_ZH_0900_AS_CS == type + || CS_TYPE_GB18030_2022_ZH2_0900_AS_CS == type + || CS_TYPE_GB18030_2022_ZH3_0900_AS_CS == type) { + ASSERT_EQ(2, mbmaxlenlen); + } else { + ASSERT_EQ(1, mbmaxlenlen); + } + } + } +} + TEST_F(TestCharset, foreach_char) { const char *data = "豫章故郡,洪都新府。星分翼轸,地接衡庐。襟三江而带五湖,控蛮荆而引瓯越。物华天宝,龙光射牛斗之墟" "人杰地灵,徐孺下陈蕃之榻。雄州雾列,俊采星驰。台隍枕夷夏之交,宾主尽东南之美。都督阎公之雅望,棨戟遥临" diff --git a/mittest/mtlenv/mock_tenant_module_env.h b/mittest/mtlenv/mock_tenant_module_env.h index 98f4e68204..402359cd14 100644 --- a/mittest/mtlenv/mock_tenant_module_env.h +++ b/mittest/mtlenv/mock_tenant_module_env.h @@ -734,6 +734,7 @@ int MockTenantModuleEnv::init() MTL_BIND2(mtl_new_default, table::ObHTableLockMgr::mtl_init, nullptr, nullptr, nullptr, table::ObHTableLockMgr::mtl_destroy); MTL_BIND2(mtl_new_default, omt::ObTenantSrs::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default); MTL_BIND2(mtl_new_default, table::ObTableApiSessPoolMgr::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default); + MTL_BIND2(ObTenantFTPluginMgr::mtl_new, mtl_init_default, nullptr, nullptr, nullptr, mtl_destroy_default); MTL_BIND2(mtl_new_default, ObIndexUsageInfoMgr::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default); MTL_BIND2(mtl_new_default, storage::ObTabletMemtableMgrPool::mtl_init, nullptr, nullptr, nullptr, mtl_destroy_default); MTL_BIND2(mtl_new_default, ObTenantSnapshotService::mtl_init, mtl_start_default, mtl_stop_default, nullptr, mtl_destroy_default); diff --git a/src/objit/include/objit/expr/ob_iraw_expr.h b/src/objit/include/objit/expr/ob_iraw_expr.h index 86190e0c43..01c484acfa 100644 --- a/src/objit/include/objit/expr/ob_iraw_expr.h +++ b/src/objit/include/objit/expr/ob_iraw_expr.h @@ -50,6 +50,7 @@ public: EXPR_SET_OP, EXPR_EXEC_PARAM, EXPR_PL_QUERY_REF, + EXPR_MATCH_AGAINST, }; typedef ObExprVisitor Visitor; @@ -105,6 +106,7 @@ public: inline bool is_win_func_expr() const { return EXPR_WINDOW == expr_class_; } inline bool is_pseudo_column_expr() const { return EXPR_PSEUDO_COLUMN == expr_class_; } inline bool is_alias_ref_expr() const { return EXPR_ALIAS_REF == expr_class_; } + inline bool is_match_against_expr() const { return EXPR_MATCH_AGAINST == expr_class_; } inline bool is_terminal_expr() const { return is_var_expr() || is_op_pseudo_column_expr() || is_const_or_param_expr() diff --git a/src/observer/ob_server.cpp b/src/observer/ob_server.cpp index 518f16fda4..9f05af91a4 100644 --- a/src/observer/ob_server.cpp +++ b/src/observer/ob_server.cpp @@ -105,6 +105,7 @@ #include "logservice/palf/election/interface/election.h" #include "share/ob_ddl_sim_point.h" #include "storage/ddl/ob_ddl_redo_log_writer.h" +#include "storage/fts/ob_fts_plugin_mgr.h" #include "observer/ob_server_utils.h" #include "observer/table_load/ob_table_load_partition_calc.h" #include "observer/virtual_table/ob_mds_event_buffer.h" @@ -404,6 +405,8 @@ int ObServer::init(const ObServerOptions &opts, const ObPLogWriterCfg &log_cfg) LOG_ERROR("init bandwidth_throttle failed", KR(ret)); } else if (OB_FAIL(ObClockGenerator::init())) { LOG_ERROR("init create clock generator failed", KR(ret)); + } else if (OB_FAIL(ObTenantFTPluginMgr::register_plugins())) { + LOG_ERROR("init fulltext plugins failed", K(ret)); } else if (OB_FAIL(init_storage())) { LOG_ERROR("init storage failed", KR(ret)); } else if (OB_FAIL(init_tx_data_cache())) { @@ -754,6 +757,10 @@ void ObServer::destroy() multi_tenant_.destroy(); FLOG_INFO("wait destroy multi tenant success"); + FLOG_INFO("begin to unregister fulltext plugins"); + ObTenantFTPluginMgr::unregister_plugins(); + FLOG_INFO("fulltext plugins unregistered"); + FLOG_INFO("begin to destroy query retry ctrl"); ObQueryRetryCtrl::destroy(); FLOG_INFO("query retry ctrl destroy"); diff --git a/src/observer/omt/ob_multi_tenant.cpp b/src/observer/omt/ob_multi_tenant.cpp index 4bc2717a9b..21699182ca 100644 --- a/src/observer/omt/ob_multi_tenant.cpp +++ b/src/observer/omt/ob_multi_tenant.cpp @@ -76,6 +76,7 @@ #include "storage/compaction/ob_tenant_freeze_info_mgr.h" #include "storage/tx_storage/ob_checkpoint_service.h" #include "storage/meta_mem/ob_tenant_meta_mem_mgr.h" +#include "storage/fts/ob_fts_plugin_mgr.h" #include "storage/tx_storage/ob_tenant_memory_printer.h" #include "storage/tx/ob_id_service.h" #include "storage/compaction/ob_tenant_compaction_progress.h" @@ -575,6 +576,7 @@ int ObMultiTenant::init(ObAddr myaddr, MTL_BIND2(mtl_new_default, ObTenantSrs::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default); MTL_BIND2(mtl_new_default, table::ObTableApiSessPoolMgr::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default); MTL_BIND2(mtl_new_default, ObTenantSnapshotService::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default); + MTL_BIND2(ObTenantFTPluginMgr::mtl_new, mtl_init_default, nullptr, nullptr, nullptr, mtl_destroy_default); MTL_BIND2(mtl_new_default, ObIndexUsageInfoMgr::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default); MTL_BIND2(mtl_new_default, storage::ObTabletMemtableMgrPool::mtl_init, nullptr, nullptr, nullptr, mtl_destroy_default); MTL_BIND2(mtl_new_default, rootserver::ObMViewMaintenanceService::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default); diff --git a/src/observer/table/ob_table_scan_executor.cpp b/src/observer/table/ob_table_scan_executor.cpp index f505146a48..e7dad21afa 100644 --- a/src/observer/table/ob_table_scan_executor.cpp +++ b/src/observer/table/ob_table_scan_executor.cpp @@ -130,12 +130,12 @@ int ObTableApiScanExecutor::prepare_das_task() if (OB_ISNULL(lookup_tablet_loc)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("lookup tablet loc is nullptr", K(ret), KPC(lookup_table_loc->loc_meta_)); - } else if (OB_FAIL(scan_op->set_lookup_ctdef(scan_spec_.get_ctdef().lookup_ctdef_))) { - LOG_WARN("set lookup ctdef failed", K(ret)); - } else if (OB_FAIL(scan_op->set_lookup_rtdef(tsc_rtdef_.lookup_rtdef_))) { - LOG_WARN("set lookup rtdef failed", K(ret)); - } else if (OB_FAIL(scan_op->set_lookup_tablet_id(lookup_tablet_loc->tablet_id_))) { - LOG_WARN("set lookup tablet id failed", K(ret), KPC(lookup_tablet_loc)); + } else if (OB_FAIL(scan_op->reserve_related_buffer(1))) { + LOG_WARN("failed to set related scan cnt", K(ret)); + } else if (OB_FAIL(scan_op->set_related_task_info(scan_spec_.get_ctdef().lookup_ctdef_, + tsc_rtdef_.lookup_rtdef_, + lookup_tablet_loc->tablet_id_))) { + LOG_WARN("set related task info failed", K(ret)); } else { lookup_table_loc->is_reading_ = true; } diff --git a/src/observer/table_load/ob_table_load_service.cpp b/src/observer/table_load/ob_table_load_service.cpp index e63666bc43..3647d61e9b 100644 --- a/src/observer/table_load/ob_table_load_service.cpp +++ b/src/observer/table_load/ob_table_load_service.cpp @@ -410,6 +410,8 @@ int ObTableLoadService::check_support_direct_load( const ObTableSchema *table_schema = nullptr; bool trigger_enabled = false; bool has_udt_column = false; + bool has_fts_index = false; + bool has_multivalue_index = false; if (OB_FAIL( ObTableLoadSchema::get_table_schema(tenant_id, table_id, schema_guard, table_schema))) { LOG_WARN("fail to get table schema", KR(ret), K(tenant_id), K(table_id)); @@ -431,6 +433,22 @@ int ObTableLoadService::check_support_direct_load( FORWARD_USER_ERROR_MSG(ret, "direct-load does not support non-user table"); } } + // check if exists full-text search index + else if (OB_FAIL(table_schema->check_has_fts_index(schema_guard, has_fts_index))) { + LOG_WARN("fail to check has full-text search index", K(ret)); + } else if (has_fts_index) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("direct-load does not support table has full-text search index", KR(ret)); + FORWARD_USER_ERROR_MSG(ret, "direct-load does not support table has full-text search index"); + } + // check if exists multi-value index + else if (OB_FAIL(table_schema->check_has_multivalue_index(schema_guard, has_multivalue_index))) { + LOG_WARN("fail to check has multivalue index", K(ret)); + } else if (has_multivalue_index) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("direct-load does not support table has multi-value index", KR(ret)); + FORWARD_USER_ERROR_MSG(ret, "direct-load does not support table has multi-value index"); + } // check if exists generated column else if (OB_UNLIKELY(table_schema->has_generated_column())) { ret = OB_NOT_SUPPORTED; diff --git a/src/observer/virtual_table/ob_table_index.cpp b/src/observer/virtual_table/ob_table_index.cpp index d48dd72d9a..26ba13c6a3 100644 --- a/src/observer/virtual_table/ob_table_index.cpp +++ b/src/observer/virtual_table/ob_table_index.cpp @@ -575,25 +575,24 @@ int ObTableIndex::add_normal_indexes(const ObTableSchema &table_schema, "index_table_id", simple_index_infos_.at(index_tid_array_idx_).table_id_); } else { - bool is_ctxcat_fulltext = false; - ObArray ft_gen_column_ids; - ObArray dep_column_ids; - if ((INDEX_TYPE_DOMAIN_CTXCAT == index_schema->get_index_type())) { - if (OB_FAIL(index_schema->get_generated_column_ids(ft_gen_column_ids))) { - LOG_WARN("get generated column ids failed", K(ret)); - } else if (1 == ft_gen_column_ids.count()) { - // 对于全文索引表,只有一列生成列, 可以有多个依赖列 - is_ctxcat_fulltext = true; - } - } - - if (OB_FAIL(ret)) { - } else if (is_ctxcat_fulltext) { + const bool is_fts_index = index_schema->is_fts_index(); + uint64_t doc_id_col_id = OB_INVALID_ID; + uint64_t ft_col_id = OB_INVALID_ID; + if (index_schema->is_built_in_fts_index()) { + is_sub_end = true; + } else if (is_fts_index && OB_FAIL(index_schema->get_fulltext_column_ids(doc_id_col_id, ft_col_id))) { + LOG_WARN("get generated column ids failed", K(ret)); + } else if (is_fts_index) { + ObArray dep_column_ids; const ObColumnSchemaV2 *gen_column_schema = NULL; if (OB_INVALID_ID == static_cast(ft_dep_col_idx_)) { ft_dep_col_idx_ = 0; } - if (OB_ISNULL(gen_column_schema = table_schema.get_column_schema(ft_gen_column_ids[0]))) { + if (OB_UNLIKELY(doc_id_col_id <= OB_APP_MIN_COLUMN_ID || OB_INVALID_ID == doc_id_col_id + || ft_col_id <= OB_APP_MIN_COLUMN_ID || OB_INVALID_ID == ft_col_id)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid doc id or fulltext column id", K(ret), K(doc_id_col_id), K(ft_col_id)); + } else if (OB_ISNULL(gen_column_schema = table_schema.get_column_schema(ft_col_id))) { ret = OB_SCHEMA_ERROR; SERVER_LOG(WARN, "fail to get data table column schema", K(ret)); } else if (OB_FAIL(gen_column_schema->get_cascaded_column_ids(dep_column_ids))) { @@ -1045,15 +1044,7 @@ int ObTableIndex::add_fulltext_index_column(const ObString &database_name, } // non_unique case OB_APP_MIN_COLUMN_ID + 5: { - int64_t non_unique = 0; - if (INDEX_TYPE_UNIQUE_GLOBAL == index_schema->get_index_type() - || INDEX_TYPE_UNIQUE_LOCAL == index_schema->get_index_type() - || index_schema->is_spatial_index()) { - non_unique = 0; - } else { - non_unique = 1; - } - cells[cell_idx].set_int(non_unique); + cells[cell_idx].set_int(1/*non_unique*/); break; } //index_schema diff --git a/src/rootserver/CMakeLists.txt b/src/rootserver/CMakeLists.txt index ace6f776cf..fa368b05c0 100644 --- a/src/rootserver/CMakeLists.txt +++ b/src/rootserver/CMakeLists.txt @@ -121,6 +121,7 @@ ob_set_subtarget(ob_rootserver ddl_task ddl_task/ob_ddl_single_replica_executor.cpp ddl_task/ob_ddl_task.cpp ddl_task/ob_drop_index_task.cpp + ddl_task/ob_drop_fts_index_task.cpp ddl_task/ob_drop_primary_key_task.cpp ddl_task/ob_index_build_task.cpp ddl_task/ob_build_mview_task.cpp @@ -154,6 +155,7 @@ ob_set_subtarget(ob_rootserver freeze freeze/ob_major_freeze_helper.cpp freeze/ob_major_freeze_util.cpp freeze/ob_major_merge_progress_util.cpp + freeze/ob_fts_checksum_validate_util.cpp ) ob_set_subtarget(ob_rootserver restore diff --git a/src/rootserver/ddl_task/ob_build_mview_task.cpp b/src/rootserver/ddl_task/ob_build_mview_task.cpp index 3b2fdd43cf..b614d83aa9 100644 --- a/src/rootserver/ddl_task/ob_build_mview_task.cpp +++ b/src/rootserver/ddl_task/ob_build_mview_task.cpp @@ -128,7 +128,7 @@ int ObBuildMViewTask::init(const ObDDLTaskRecord &task_record) } else if (!task_record.is_valid()) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", KR(ret), K(task_record)); - } else if (OB_FAIL(deserlize_params_from_message(task_record.tenant_id_, task_record.message_.ptr(), task_record.message_.length(), pos))) { + } else if (OB_FAIL(deserialize_params_from_message(task_record.tenant_id_, task_record.message_.ptr(), task_record.message_.length(), pos))) { LOG_WARN("deserialize params from message failed", KR(ret)); } else if (OB_FAIL(ObMultiVersionSchemaService::get_instance().get_tenant_schema_guard( task_record.tenant_id_, schema_guard, schema_version))) { @@ -599,7 +599,7 @@ int ObBuildMViewTask::serialize_params_to_message(char *buf, const int64_t buf_l return ret; } -int ObBuildMViewTask::deserlize_params_from_message( +int ObBuildMViewTask::deserialize_params_from_message( const uint64_t tenant_id, const char *buf, const int64_t data_len, @@ -610,7 +610,7 @@ int ObBuildMViewTask::deserlize_params_from_message( if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id) || nullptr == buf || data_len <= 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(tenant_id), KP(buf), K(data_len)); - } else if (OB_FAIL(ObDDLTask::deserlize_params_from_message(tenant_id, buf, data_len, pos))) { + } else if (OB_FAIL(ObDDLTask::deserialize_params_from_message(tenant_id, buf, data_len, pos))) { LOG_WARN("ObDDLTask deserlize failed", K(ret)); } else if (OB_FAIL(tmp_arg.deserialize(buf, data_len, pos))) { LOG_WARN("deserialize table failed", K(ret)); diff --git a/src/rootserver/ddl_task/ob_build_mview_task.h b/src/rootserver/ddl_task/ob_build_mview_task.h index 236ff5eb1f..c097d46a22 100644 --- a/src/rootserver/ddl_task/ob_build_mview_task.h +++ b/src/rootserver/ddl_task/ob_build_mview_task.h @@ -42,10 +42,10 @@ public: virtual int serialize_params_to_message(char *buf, const int64_t buf_size, int64_t &pos) const override; - virtual int deserlize_params_from_message(const uint64_t tenant_id, - const char *buf, - const int64_t buf_size, - int64_t &pos) override; + virtual int deserialize_params_from_message(const uint64_t tenant_id, + const char *buf, + const int64_t buf_size, + int64_t &pos) override; virtual int64_t get_serialize_param_size() const override; int on_child_task_prepare(const int64_t task_id); diff --git a/src/rootserver/ddl_task/ob_column_redefinition_task.cpp b/src/rootserver/ddl_task/ob_column_redefinition_task.cpp index 241b837504..5048d17664 100644 --- a/src/rootserver/ddl_task/ob_column_redefinition_task.cpp +++ b/src/rootserver/ddl_task/ob_column_redefinition_task.cpp @@ -107,7 +107,7 @@ int ObColumnRedefinitionTask::init(const ObDDLTaskRecord &task_record) } else if (!task_record.is_valid()) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret)); - } else if (OB_FAIL(deserlize_params_from_message(task_record.tenant_id_, task_record.message_.ptr(), task_record.message_.length(), pos))) { + } else if (OB_FAIL(deserialize_params_from_message(task_record.tenant_id_, task_record.message_.ptr(), task_record.message_.length(), pos))) { LOG_WARN("deserialize params from message failed", K(ret)); } else if (OB_FAIL(set_ddl_stmt_str(task_record.ddl_stmt_str_))) { LOG_WARN("set ddl stmt str failed", K(ret)); @@ -446,15 +446,15 @@ int ObColumnRedefinitionTask::serialize_params_to_message(char *buf, const int64 return ret; } -int ObColumnRedefinitionTask::deserlize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t data_len, int64_t &pos) +int ObColumnRedefinitionTask::deserialize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t data_len, int64_t &pos) { int ret = OB_SUCCESS; obrpc::ObAlterTableArg tmp_arg; if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id) || nullptr == buf || data_len <= 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), KP(buf), K(tenant_id), K(data_len)); - } else if (OB_FAIL(ObDDLTask::deserlize_params_from_message(tenant_id, buf, data_len, pos))) { - LOG_WARN("ObDDLTask deserlize failed", K(ret)); + } else if (OB_FAIL(ObDDLTask::deserialize_params_from_message(tenant_id, buf, data_len, pos))) { + LOG_WARN("ObDDLTask deserialize failed", K(ret)); } else if (OB_FAIL(tmp_arg.deserialize(buf, data_len, pos))) { LOG_WARN("serialize table failed", K(ret)); } else if (OB_FAIL(ObDDLUtil::replace_user_tenant_id(task_type_, tenant_id, tmp_arg))) { diff --git a/src/rootserver/ddl_task/ob_column_redefinition_task.h b/src/rootserver/ddl_task/ob_column_redefinition_task.h index fbcbd2aaf3..f39e81ae51 100644 --- a/src/rootserver/ddl_task/ob_column_redefinition_task.h +++ b/src/rootserver/ddl_task/ob_column_redefinition_task.h @@ -63,7 +63,7 @@ private: int copy_table_constraints(); int copy_table_foreign_keys(); virtual int serialize_params_to_message(char *buf, const int64_t buf_len, int64_t &pos) const override; - virtual int deserlize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t data_len, int64_t &pos) override; + virtual int deserialize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t data_len, int64_t &pos) override; virtual int64_t get_serialize_param_size() const override; private: static const int64_t OB_COLUMN_REDEFINITION_TASK_VERSION = 1L; diff --git a/src/rootserver/ddl_task/ob_constraint_task.cpp b/src/rootserver/ddl_task/ob_constraint_task.cpp index 31545545f3..daf98fea1f 100755 --- a/src/rootserver/ddl_task/ob_constraint_task.cpp +++ b/src/rootserver/ddl_task/ob_constraint_task.cpp @@ -607,7 +607,7 @@ int ObConstraintTask::init(const ObDDLTaskRecord &task_record) } else if (OB_ISNULL(root_service)) { ret = OB_ERR_SYS; LOG_WARN("error sys, root service must not be nullptr", K(ret)); - } else if (OB_FAIL(deserlize_params_from_message(task_record.tenant_id_, task_record.message_.ptr(), task_record.message_.length(), pos))) { + } else if (OB_FAIL(deserialize_params_from_message(task_record.tenant_id_, task_record.message_.ptr(), task_record.message_.length(), pos))) { LOG_WARN("deserialize params from message failed", K(ret)); } else { object_id_ = table_id; @@ -2030,14 +2030,14 @@ int ObConstraintTask::serialize_params_to_message(char *buf, const int64_t buf_l return ret; } -int ObConstraintTask::deserlize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t data_len, int64_t &pos) +int ObConstraintTask::deserialize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t data_len, int64_t &pos) { int ret = OB_SUCCESS; ObAlterTableArg tmp_arg; if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id) || nullptr == buf || data_len <= 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(tenant_id), KP(buf), K(data_len)); - } else if (OB_FAIL(ObDDLTask::deserlize_params_from_message(tenant_id, buf, data_len, pos))) { + } else if (OB_FAIL(ObDDLTask::deserialize_params_from_message(tenant_id, buf, data_len, pos))) { LOG_WARN("ObDDLTask deserlize failed", K(ret)); } else if (OB_FAIL(tmp_arg.deserialize(buf, data_len, pos))) { LOG_WARN("serialize table failed", K(ret)); diff --git a/src/rootserver/ddl_task/ob_constraint_task.h b/src/rootserver/ddl_task/ob_constraint_task.h index f06c5efa03..122c0b8ccd 100644 --- a/src/rootserver/ddl_task/ob_constraint_task.h +++ b/src/rootserver/ddl_task/ob_constraint_task.h @@ -106,7 +106,7 @@ public: virtual int process() override; int update_check_constraint_finish(const int ret_code); virtual int serialize_params_to_message(char *buf, const int64_t buf_size, int64_t &pos) const override; - virtual int deserlize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t buf_size, int64_t &pos) override; + virtual int deserialize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t buf_size, int64_t &pos) override; virtual int64_t get_serialize_param_size() const override; virtual void flt_set_task_span_tag() const override; virtual void flt_set_status_span_tag() const override; diff --git a/src/rootserver/ddl_task/ob_ddl_redefinition_task.h b/src/rootserver/ddl_task/ob_ddl_redefinition_task.h index fb2477fc50..ca069ef858 100644 --- a/src/rootserver/ddl_task/ob_ddl_redefinition_task.h +++ b/src/rootserver/ddl_task/ob_ddl_redefinition_task.h @@ -143,9 +143,9 @@ public: const int64_t execution_id, const int ret_code, const ObDDLTaskInfo &addition_info) = 0; - int on_child_task_finish( + virtual int on_child_task_finish( const uint64_t child_task_key, - const int ret_code); + const int ret_code) override; int notify_update_autoinc_finish(const uint64_t autoinc_val, const int ret_code); virtual void flt_set_task_span_tag() const = 0; virtual void flt_set_status_span_tag() const = 0; diff --git a/src/rootserver/ddl_task/ob_ddl_retry_task.cpp b/src/rootserver/ddl_task/ob_ddl_retry_task.cpp index dbf87fa187..07587e5451 100644 --- a/src/rootserver/ddl_task/ob_ddl_retry_task.cpp +++ b/src/rootserver/ddl_task/ob_ddl_retry_task.cpp @@ -232,7 +232,7 @@ int ObDDLRetryTask::init(const ObDDLTaskRecord &task_record) dst_schema_version_ = schema_version_; if (nullptr != task_record.message_) { int64_t pos = 0; - if (OB_FAIL(deserlize_params_from_message(task_record.tenant_id_, task_record.message_.ptr(), task_record.message_.length(), pos))) { + if (OB_FAIL(deserialize_params_from_message(task_record.tenant_id_, task_record.message_.ptr(), task_record.message_.length(), pos))) { LOG_WARN("fail to deserialize params from message", K(ret)); } } @@ -643,13 +643,13 @@ int ObDDLRetryTask::serialize_params_to_message(char *buf, const int64_t buf_siz return ret; } -int ObDDLRetryTask::deserlize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t buf_size, int64_t &pos) +int ObDDLRetryTask::deserialize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t buf_size, int64_t &pos) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id) || nullptr == buf || buf_size <= 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(tenant_id), KP(buf), K(buf_size)); - } else if (OB_FAIL(ObDDLTask::deserlize_params_from_message(tenant_id, buf, buf_size, pos))) { + } else if (OB_FAIL(ObDDLTask::deserialize_params_from_message(tenant_id, buf, buf_size, pos))) { LOG_WARN("fail to deserialize ObDDLTask", K(ret)); } else if (ObDDLType::DDL_DROP_DATABASE == task_type_) { obrpc::ObDropDatabaseArg tmp_arg; diff --git a/src/rootserver/ddl_task/ob_ddl_retry_task.h b/src/rootserver/ddl_task/ob_ddl_retry_task.h index 31dbc68746..13b8fd6439 100644 --- a/src/rootserver/ddl_task/ob_ddl_retry_task.h +++ b/src/rootserver/ddl_task/ob_ddl_retry_task.h @@ -41,7 +41,7 @@ public: virtual int process() override; virtual bool is_valid() const override; virtual int serialize_params_to_message(char *buf, const int64_t buf_size, int64_t &pos) const override; - virtual int deserlize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t buf_size, int64_t &pos) override; + virtual int deserialize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t buf_size, int64_t &pos) override; virtual int64_t get_serialize_param_size() const override; static int update_task_status_wait_child_task_finish( common::ObMySQLTransaction &trans, diff --git a/src/rootserver/ddl_task/ob_ddl_scheduler.cpp b/src/rootserver/ddl_task/ob_ddl_scheduler.cpp index 7b84b8ea66..4cb8230eaa 100755 --- a/src/rootserver/ddl_task/ob_ddl_scheduler.cpp +++ b/src/rootserver/ddl_task/ob_ddl_scheduler.cpp @@ -22,6 +22,7 @@ #include "rootserver/ddl_task/ob_ddl_scheduler.h" #include "rootserver/ddl_task/ob_ddl_task.h" #include "rootserver/ddl_task/ob_drop_index_task.h" +#include "rootserver/ddl_task/ob_drop_fts_index_task.h" #include "rootserver/ddl_task/ob_drop_primary_key_task.h" #include "rootserver/ddl_task/ob_index_build_task.h" #include "rootserver/ddl_task/ob_build_mview_task.h" @@ -1015,6 +1016,20 @@ int ObDDLScheduler::create_ddl_task(const ObCreateDDLTaskParam ¶m, LOG_WARN("fail to create drop index task failed", K(ret)); } break; + case DDL_DROP_FTS_INDEX: + case DDL_DROP_MULVALUE_INDEX: + if (OB_FAIL(create_drop_fts_index_task(proxy, + param.src_table_schema_, + param.schema_version_, + param.consumer_group_id_, + param.aux_rowkey_doc_schema_, + param.aux_doc_rowkey_schema_, + param.aux_doc_word_schema_, + *param.allocator_, + task_record))) { + LOG_WARN("fail to create drop fts index task", K(ret)); + } + break; case DDL_MODIFY_COLUMN: case DDL_ADD_PRIMARY_KEY: case DDL_ALTER_PRIMARY_KEY: @@ -1570,9 +1585,6 @@ int ObDDLScheduler::create_drop_index_task( } else if (OB_ISNULL(index_schema) || OB_ISNULL(drop_index_arg)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), KP(index_schema), KP(drop_index_arg)); - } else if (index_schema->is_domain_index()) { - ret = OB_NOT_SUPPORTED; - LOG_WARN("drop domain index is not supported", K(ret)); } else if (OB_FAIL(ObDDLTask::fetch_new_task_id(root_service_->get_sql_proxy(), index_schema->get_tenant_id(), task_id))) { LOG_WARN("fetch new task id failed", K(ret)); } else { @@ -1599,6 +1611,80 @@ int ObDDLScheduler::create_drop_index_task( return ret; } +int ObDDLScheduler::create_drop_fts_index_task( + common::ObISQLClient &proxy, + const share::schema::ObTableSchema *index_schema, + const int64_t schema_version, + const int64_t consumer_group_id, + const share::schema::ObTableSchema *rowkey_doc_schema, + const share::schema::ObTableSchema *doc_rowkey_schema, + const share::schema::ObTableSchema *doc_word_schema, + ObIAllocator &allocator, + ObDDLTaskRecord &task_record) +{ + int ret = OB_SUCCESS; + int64_t task_id = 0; + ObDropFTSIndexTask index_task; + common::ObString domain_index_name; + common::ObString fts_doc_word_name; + common::ObString rowkey_doc_name; + common::ObString doc_rowkey_name; + // multivalue index may run here, need calc index type first + bool is_fts_index = false; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_ISNULL(index_schema)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(index_schema)); + } else if (FALSE_IT(is_fts_index = index_schema->is_fts_index_aux())) { + } else if (OB_ISNULL(rowkey_doc_schema) + || OB_ISNULL(doc_rowkey_schema) + || (is_fts_index && OB_ISNULL(doc_word_schema)) + || OB_UNLIKELY(schema_version <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(index_schema), KP(rowkey_doc_schema), K(doc_rowkey_schema), + K(doc_word_schema), K(schema_version)); + } else if (OB_FAIL(ObDDLTask::fetch_new_task_id(root_service_->get_sql_proxy(), index_schema->get_tenant_id(), + task_id))) { + LOG_WARN("fetch new task id failed", K(ret)); + } else if (OB_FAIL(index_schema->get_index_name(domain_index_name))) { + LOG_WARN("fail to get domain index name", K(ret), KPC(index_schema)); + } else if (is_fts_index && OB_FAIL(doc_word_schema->get_index_name(fts_doc_word_name))) { + LOG_WARN("fail to get fts doc word name", K(ret), KPC(doc_word_schema)); + } else if (OB_FAIL(rowkey_doc_schema->get_index_name(rowkey_doc_name))) { + LOG_WARN("fail to get rowkey doc name", K(ret), KPC(rowkey_doc_schema)); + } else if (OB_FAIL(doc_rowkey_schema->get_index_name(doc_rowkey_name))) { + LOG_WARN("fail to get doc rowkey name", K(ret), KPC(doc_rowkey_schema)); + } else { + const uint64_t data_table_id = index_schema->get_data_table_id(); + const ObFTSDDLChildTaskInfo domain_index(domain_index_name, index_schema->get_table_id(), 0/*task_id*/); + const ObFTSDDLChildTaskInfo fts_doc_word(fts_doc_word_name, + is_fts_index ? doc_word_schema->get_table_id() : OB_INVALID_ID, 0/*task_id*/); + const ObFTSDDLChildTaskInfo rowkey_doc(rowkey_doc_name, rowkey_doc_schema->get_table_id(), 0/*task_id*/); + const ObFTSDDLChildTaskInfo doc_rowkey(doc_rowkey_name, doc_rowkey_schema->get_table_id(), 0/*task_id*/); + const ObDDLType ddl_type = is_fts_index ? DDL_DROP_FTS_INDEX : DDL_DROP_MULVALUE_INDEX; + if (OB_FAIL(index_task.init(index_schema->get_tenant_id(), + task_id, + data_table_id, + ddl_type, + rowkey_doc, + doc_rowkey, + domain_index, + fts_doc_word, + schema_version, + consumer_group_id))) { + LOG_WARN("init drop index task failed", K(ret), K(data_table_id), K(domain_index)); + } else if (OB_FAIL(index_task.set_trace_id(*ObCurTraceId::get_trace_id()))) { + LOG_WARN("set trace id failed", K(ret)); + } else if (OB_FAIL(insert_task_record(proxy, index_task, allocator, task_record))) { + LOG_WARN("fail to insert task record", K(ret)); + } + } + LOG_INFO("ddl_scheduler create drop fts index task finished", K(ret), K(index_task)); + return ret; +} + int ObDDLScheduler::create_constraint_task( common::ObISQLClient &proxy, const share::schema::ObTableSchema *table_schema, @@ -2064,6 +2150,10 @@ int ObDDLScheduler::schedule_ddl_task(const ObDDLTaskRecord &record) case ObDDLType::DDL_DROP_MLOG: ret = schedule_drop_index_task(record); break; + case ObDDLType::DDL_DROP_FTS_INDEX: + case ObDDLType::DDL_DROP_MULVALUE_INDEX: + ret = schedule_drop_fts_index_task(record); + break; case DDL_DROP_PRIMARY_KEY: ret = schedule_drop_primary_key_task(record); break; @@ -2360,6 +2450,33 @@ int ObDDLScheduler::schedule_drop_index_task(const ObDDLTaskRecord &task_record) return ret; } +int ObDDLScheduler::schedule_drop_fts_index_task(const ObDDLTaskRecord &task_record) +{ + int ret = OB_SUCCESS; + ObDropFTSIndexTask *drop_fts_index_task = nullptr; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObDDLScheduler has not been inited", K(ret)); + } else if (OB_FAIL(alloc_ddl_task(drop_fts_index_task))) { + LOG_WARN("fail to alloc drop fts index task", K(ret)); + } else if (OB_FAIL(drop_fts_index_task->init(task_record))) { + LOG_WARN("fail to init drop fts index task", K(ret)); + } else if (OB_FAIL(drop_fts_index_task->set_trace_id(task_record.trace_id_))) { + LOG_WARN("fail to set trace id", K(ret)); + } else if (OB_FAIL(inner_schedule_ddl_task(drop_fts_index_task, task_record))) { + if (OB_ENTRY_EXIST != ret) { + LOG_WARN("fail to inner schedule task", K(ret)); + } + } + if (OB_FAIL(ret) && nullptr != drop_fts_index_task) { + drop_fts_index_task->~ObDropFTSIndexTask(); + allocator_.free(drop_fts_index_task); + drop_fts_index_task = nullptr; + } + + return ret; +} + int ObDDLScheduler::schedule_recover_restore_table_task(const ObDDLTaskRecord &task_record) { int ret = OB_SUCCESS; @@ -2751,8 +2868,7 @@ int ObDDLScheduler::on_ddl_task_finish( } else { ObDDLTask *ddl_task = nullptr; if (OB_FAIL(task_queue_.modify_task(parent_task_id, [&child_task_key, &ret_code](ObDDLTask &task) -> int { - ObDDLRedefinitionTask *redefinition_task = static_cast(&task); - return redefinition_task->on_child_task_finish(child_task_key.object_id_, ret_code); + return task.on_child_task_finish(child_task_key.object_id_, ret_code); task.add_event_info("ddl task finish"); }))) { LOG_WARN("failed to modify task", K(ret)); diff --git a/src/rootserver/ddl_task/ob_ddl_scheduler.h b/src/rootserver/ddl_task/ob_ddl_scheduler.h index 8fdc130ec3..4567cc8ab7 100755 --- a/src/rootserver/ddl_task/ob_ddl_scheduler.h +++ b/src/rootserver/ddl_task/ob_ddl_scheduler.h @@ -453,7 +453,17 @@ private: ObIAllocator &allocator, ObDDLTaskRecord &task_record); - + int create_drop_fts_index_task( + common::ObISQLClient &proxy, + const share::schema::ObTableSchema *index_schema, + const int64_t schema_version, + const int64_t consumer_group_id, + const share::schema::ObTableSchema *rowkey_doc_schema, + const share::schema::ObTableSchema *doc_rowkey_schema, + const share::schema::ObTableSchema *doc_word_schema, + ObIAllocator &allocator, + ObDDLTaskRecord &task_record); + int create_ddl_retry_task( common::ObISQLClient &proxy, const uint64_t tenant_id, @@ -489,6 +499,7 @@ private: int schedule_column_redefinition_task(const ObDDLTaskRecord &task_record); int schedule_modify_autoinc_task(const ObDDLTaskRecord &task_record); int schedule_drop_index_task(const ObDDLTaskRecord &task_record); + int schedule_drop_fts_index_task(const ObDDLTaskRecord &task_record); int schedule_ddl_retry_task(const ObDDLTaskRecord &task_record); int schedule_recover_restore_table_task(const ObDDLTaskRecord &task_record); int add_sys_task(ObDDLTask *task); diff --git a/src/rootserver/ddl_task/ob_ddl_task.cpp b/src/rootserver/ddl_task/ob_ddl_task.cpp index b3508ab365..04cb36b5c8 100644 --- a/src/rootserver/ddl_task/ob_ddl_task.cpp +++ b/src/rootserver/ddl_task/ob_ddl_task.cpp @@ -164,7 +164,9 @@ OB_SERIALIZE_MEMBER(ObDDLTaskSerializeField, ObCreateDDLTaskParam::ObCreateDDLTaskParam() : sub_task_trace_id_(0), tenant_id_(OB_INVALID_ID), object_id_(OB_INVALID_ID), schema_version_(0), parallelism_(0), consumer_group_id_(0), parent_task_id_(0), task_id_(0), type_(DDL_INVALID), src_table_schema_(nullptr), - dest_table_schema_(nullptr), ddl_arg_(nullptr), allocator_(nullptr), tenant_data_version_(0) + dest_table_schema_(nullptr), ddl_arg_(nullptr), allocator_(nullptr), + aux_rowkey_doc_schema_(nullptr), aux_doc_rowkey_schema_(nullptr), aux_doc_word_schema_(nullptr), + tenant_data_version_(0) { } @@ -182,7 +184,8 @@ ObCreateDDLTaskParam::ObCreateDDLTaskParam(const uint64_t tenant_id, const int64_t task_id) : sub_task_trace_id_(0), tenant_id_(tenant_id), object_id_(object_id), schema_version_(schema_version), parallelism_(parallelism), consumer_group_id_(consumer_group_id), parent_task_id_(parent_task_id), task_id_(task_id), type_(type), src_table_schema_(src_table_schema), dest_table_schema_(dest_table_schema), - ddl_arg_(ddl_arg), allocator_(allocator) + ddl_arg_(ddl_arg), allocator_(allocator), aux_rowkey_doc_schema_(nullptr), aux_doc_rowkey_schema_(nullptr), + aux_doc_word_schema_(nullptr) { } @@ -662,6 +665,24 @@ OB_SERIALIZE_MEMBER(ObDDLTracing, status_start_ts_, parent_task_span_id_); +int ObFTSDDLChildTaskInfo::deep_copy_from_other( + const ObFTSDDLChildTaskInfo &other, + common::ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + if (this != &other) { + if (OB_FAIL(ob_write_string(allocator, other.index_name_, index_name_))) { + LOG_WARN("fail to copy table name", K(ret), K(other)); + } else { + table_id_ = other.table_id_; + task_id_ = other.task_id_; + } + } + return ret; +} + +OB_SERIALIZE_MEMBER(ObFTSDDLChildTaskInfo, index_name_, table_id_); + int ObDDLTask::cleanup() { int ret = cleanup_impl(); @@ -937,7 +958,7 @@ int ObDDLTask::serialize_params_to_message(char *buf, const int64_t buf_size, in return ret; } -int ObDDLTask::deserlize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t buf_size, int64_t &pos) +int ObDDLTask::deserialize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t buf_size, int64_t &pos) { int ret = OB_SUCCESS; ObDDLTaskSerializeField serialize_field; diff --git a/src/rootserver/ddl_task/ob_ddl_task.h b/src/rootserver/ddl_task/ob_ddl_task.h index c73b7b0b3e..4aa2c7eee3 100755 --- a/src/rootserver/ddl_task/ob_ddl_task.h +++ b/src/rootserver/ddl_task/ob_ddl_task.h @@ -111,6 +111,30 @@ public: int64_t row_inserted_; }; +struct ObFTSDDLChildTaskInfo final +{ +public: + ObFTSDDLChildTaskInfo() : index_name_(), table_id_(OB_INVALID_ID), task_id_(0) {} + ObFTSDDLChildTaskInfo( + common::ObString &index_name, + const uint64_t table_id, + const int64_t task_id) + : index_name_(index_name), + table_id_(table_id), + task_id_(task_id) + {} + ~ObFTSDDLChildTaskInfo() = default; + bool is_valid() const { return OB_INVALID_ID != table_id_ && !index_name_.empty(); } + int deep_copy_from_other(const ObFTSDDLChildTaskInfo &other, common::ObIAllocator &allocator); + TO_STRING_KV(K_(table_id), K_(task_id), K_(index_name)); + OB_UNIS_VERSION(1); +public: + common::ObString index_name_; + uint64_t table_id_; + // The following fields are not persisted to the `__all_ddl_task_status` system table. + int64_t task_id_; +}; + struct ObDDLTaskSerializeField final { OB_UNIS_VERSION(1); @@ -154,7 +178,9 @@ public: bool is_valid() const { return OB_INVALID_ID != tenant_id_ && type_ > share::DDL_INVALID && type_ < share::DDL_MAX && nullptr != allocator_; } TO_STRING_KV(K_(tenant_id), K_(object_id), K_(schema_version), K_(parallelism), K_(consumer_group_id), K_(parent_task_id), K_(task_id), - K_(type), KPC_(src_table_schema), KPC_(dest_table_schema), KPC_(ddl_arg), K_(tenant_data_version), K_(sub_task_trace_id)); + K_(type), KPC_(src_table_schema), KPC_(dest_table_schema), KPC_(ddl_arg), K_(tenant_data_version), + K_(sub_task_trace_id), KPC_(aux_rowkey_doc_schema), KPC_(aux_doc_rowkey_schema), KPC_(aux_doc_word_schema)); + public: int32_t sub_task_trace_id_; uint64_t tenant_id_; @@ -169,6 +195,9 @@ public: const ObTableSchema *dest_table_schema_; const obrpc::ObDDLArg *ddl_arg_; common::ObIAllocator *allocator_; + const ObTableSchema *aux_rowkey_doc_schema_; + const ObTableSchema *aux_doc_rowkey_schema_; + const ObTableSchema *aux_doc_word_schema_; uint64_t tenant_data_version_; }; @@ -491,6 +520,7 @@ public: {} virtual ~ObDDLTask() {} virtual int process() = 0; + virtual int on_child_task_finish(const uint64_t child_task_key, const int ret_code) { return common::OB_NOT_SUPPORTED; } virtual bool is_valid() const { return is_inited_; } typedef common::ObCurTraceId::TraceId TraceId; virtual const TraceId &get_trace_id() const { return trace_id_; } @@ -530,7 +560,7 @@ public: uint64_t get_data_format_version() const { return data_format_version_; } static int fetch_new_task_id(ObMySQLProxy &sql_proxy, const uint64_t tenant_id, int64_t &new_task_id); virtual int serialize_params_to_message(char *buf, const int64_t buf_size, int64_t &pos) const; - virtual int deserlize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t buf_size, int64_t &pos); + virtual int deserialize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t buf_size, int64_t &pos); virtual int64_t get_serialize_param_size() const; const ObString &get_ddl_stmt_str() const { return ddl_stmt_str_; } int set_ddl_stmt_str(const ObString &ddl_stmt_str); diff --git a/src/rootserver/ddl_task/ob_drop_fts_index_task.cpp b/src/rootserver/ddl_task/ob_drop_fts_index_task.cpp new file mode 100644 index 0000000000..4bbf4e1e8a --- /dev/null +++ b/src/rootserver/ddl_task/ob_drop_fts_index_task.cpp @@ -0,0 +1,538 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE_FTS + +#include "rootserver/ddl_task/ob_drop_fts_index_task.h" +#include "share/schema/ob_multi_version_schema_service.h" +#include "share/ob_ddl_error_message_table_operator.h" +#include "sql/engine/cmd/ob_ddl_executor_util.h" +#include "rootserver/ob_root_service.h" + +using namespace oceanbase::share; + +namespace oceanbase +{ +namespace rootserver +{ + +ObDropFTSIndexTask::ObDropFTSIndexTask() + : ObDDLTask(DDL_DROP_FTS_INDEX), + root_service_(nullptr), + rowkey_doc_(), + doc_rowkey_(), + domain_index_(), + fts_doc_word_() +{ +} + +ObDropFTSIndexTask::~ObDropFTSIndexTask() +{ +} + +int ObDropFTSIndexTask::init( + const uint64_t tenant_id, + const int64_t task_id, + const uint64_t data_table_id, + const ObDDLType ddl_type, + const ObFTSDDLChildTaskInfo &rowkey_doc, + const ObFTSDDLChildTaskInfo &doc_rowkey, + const ObFTSDDLChildTaskInfo &domain_index, + const ObFTSDDLChildTaskInfo &fts_doc_word, + const int64_t schema_version, + const int64_t consumer_group_id) +{ + int ret = OB_SUCCESS; + const bool is_fts_task = ddl_type == DDL_DROP_FTS_INDEX; + + if (OB_UNLIKELY(OB_INVALID_ID == tenant_id + || task_id <= 0 + || OB_INVALID_ID == data_table_id + || !rowkey_doc.is_valid() + || !doc_rowkey.is_valid() + || !domain_index.is_valid() + || (is_fts_task && !fts_doc_word.is_valid()) + || schema_version <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(tenant_id), K(task_id), K(data_table_id), K(rowkey_doc), + K(doc_rowkey), K(domain_index), K(fts_doc_word), K(schema_version)); + } else if (OB_ISNULL(root_service_ = GCTX.root_service_)) { + ret = OB_ERR_SYS; + LOG_WARN("error sys, root service is null", K(ret)); + } else if (OB_FAIL(rowkey_doc_.deep_copy_from_other(rowkey_doc, allocator_))) { + LOG_WARN("fail to deep copy from other", K(ret), K(rowkey_doc)); + } else if (OB_FAIL(doc_rowkey_.deep_copy_from_other(doc_rowkey, allocator_))) { + LOG_WARN("fail to deep copy from other", K(ret), K(doc_rowkey)); + } else if (OB_FAIL(domain_index_.deep_copy_from_other(domain_index, allocator_))) { + LOG_WARN("fail to deep copy from other", K(ret), K(domain_index)); + } else if (is_fts_task && OB_FAIL(fts_doc_word_.deep_copy_from_other(fts_doc_word, allocator_))) { + LOG_WARN("fail to deep copy from other", K(ret), K(fts_doc_word)); + } else { + task_type_ = ddl_type; + set_gmt_create(ObTimeUtility::current_time()); + tenant_id_ = tenant_id; + object_id_ = data_table_id; + target_object_id_ = domain_index.table_id_; + schema_version_ = schema_version; + task_id_ = task_id; + parent_task_id_ = 0; // no parent task + consumer_group_id_ = consumer_group_id; + task_version_ = OB_DROP_FTS_INDEX_TASK_VERSION; + dst_tenant_id_ = tenant_id; + dst_schema_version_ = schema_version; + is_inited_ = true; + } + return ret; +} + +int ObDropFTSIndexTask::init(const ObDDLTaskRecord &task_record) +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + if (OB_UNLIKELY(!task_record.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(task_record)); + } else if (OB_ISNULL(root_service_ = GCTX.root_service_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, root service is nullptr", K(ret)); + } else { + task_type_ = task_record.ddl_type_; + tenant_id_ = task_record.tenant_id_; + object_id_ = task_record.object_id_; + target_object_id_ = task_record.target_object_id_; + schema_version_ = task_record.schema_version_; + task_id_ = task_record.task_id_; + parent_task_id_ = task_record.parent_task_id_; + task_version_ = task_record.task_version_; + ret_code_ = task_record.ret_code_; + dst_tenant_id_ = tenant_id_; + dst_schema_version_ = schema_version_; + pos = 0; + if (OB_ISNULL(task_record.message_.ptr())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, task record message is nullptr", K(ret), K(task_record)); + } else if (OB_FAIL(deserialize_params_from_message(task_record.tenant_id_, task_record.message_.ptr(), + task_record.message_.length(), pos))) { + LOG_WARN("deserialize params from message failed", K(ret)); + } else { + is_inited_ = true; + // set up span during recover task + ddl_tracing_.open_for_recovery(); + } + } + return ret; +} + +int ObDropFTSIndexTask::process() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObDropIndexTask has not been inited", K(ret)); + } else if (!need_retry()) { + // task is done + } else if (OB_FAIL(check_switch_succ())) { + LOG_WARN("check need retry failed", K(ret)); + } else { + ddl_tracing_.restore_span_hierarchy(); + const ObDDLTaskStatus status = static_cast(task_status_); + switch (status) { + case ObDDLTaskStatus::PREPARE: + if (OB_FAIL(prepare(WAIT_CHILD_TASK_FINISH))) { + LOG_WARN("fail to prepare", K(ret)); + } + break; + case ObDDLTaskStatus::WAIT_CHILD_TASK_FINISH: + if (OB_FAIL(check_and_wait_finish(SUCCESS))) { + LOG_WARN("fail to check and wait task", K(ret)); + } + break; + case ObDDLTaskStatus::SUCCESS: + if (OB_FAIL(succ())) { + LOG_WARN("do succ procedure failed", K(ret)); + } + break; + case ObDDLTaskStatus::FAIL: + if (OB_FAIL(fail())) { + LOG_WARN("do fail procedure failed", K(ret)); + } + break; + default: + ret = OB_ERR_UNEXPECTED; + LOG_WARN("error unexpected, task status is not valid", K(ret), K(task_status_)); + } + ddl_tracing_.release_span_hierarchy(); + } + return ret; +} + +int ObDropFTSIndexTask::serialize_params_to_message(char *buf, const int64_t buf_size, int64_t &pos) const +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(nullptr == buf || buf_size <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KP(buf), K(buf_size)); + } else if (OB_FAIL(ObDDLTask::serialize_params_to_message(buf, buf_size, pos))) { + LOG_WARN("fail to ObDDLTask::serialize", K(ret)); + } else if (OB_FAIL(rowkey_doc_.serialize(buf, buf_size, pos))) { + LOG_WARN("fail to serialize aux rowkey doc table info", K(ret), K(rowkey_doc_)); + } else if (OB_FAIL(doc_rowkey_.serialize(buf, buf_size, pos))) { + LOG_WARN("fail to serialize aux doc rowkey table info", K(ret), K(doc_rowkey_)); + } else if (OB_FAIL(domain_index_.serialize(buf, buf_size, pos))) { + LOG_WARN("fail to serialize aux fts index table info", K(ret), K(domain_index_)); + } else if (OB_FAIL(fts_doc_word_.serialize(buf, buf_size, pos))) { + LOG_WARN("fail to serialize aux doc word aux table info", K(ret), K(fts_doc_word_)); + } + return ret; +} + +int ObDropFTSIndexTask::deserialize_params_from_message( + const uint64_t tenant_id, + const char *buf, + const int64_t buf_size, + int64_t &pos) +{ + int ret = OB_SUCCESS; + obrpc::ObDropIndexArg tmp_drop_index_arg; + ObFTSDDLChildTaskInfo tmp_info; + if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id) || nullptr == buf || buf_size <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(tenant_id), KP(buf), K(buf_size)); + } else if (OB_FAIL(ObDDLTask::deserialize_params_from_message(tenant_id, buf, buf_size, pos))) { + LOG_WARN("fail to ObDDLTask::deserialize", K(ret), K(tenant_id)); + } else if (OB_FAIL(tmp_info.deserialize(buf, buf_size, pos))) { + LOG_WARN("fail to deserialize aux rowkey doc table info", K(ret)); + } else if (OB_FAIL(rowkey_doc_.deep_copy_from_other(tmp_info, allocator_))) { + LOG_WARN("fail to deep copy from tmp info", K(ret), K(tmp_info)); + } else if (OB_FAIL(tmp_info.deserialize(buf, buf_size, pos))) { + LOG_WARN("fail to deserialize aux doc rowkey table info", K(ret)); + } else if (OB_FAIL(doc_rowkey_.deep_copy_from_other(tmp_info, allocator_))) { + LOG_WARN("fail to deep copy from tmp info", K(ret), K(tmp_info)); + } else if (OB_FAIL(tmp_info.deserialize(buf, buf_size, pos))) { + LOG_WARN("fail to deserialize aux fts index table info", K(ret)); + } else if (OB_FAIL(domain_index_.deep_copy_from_other(tmp_info, allocator_))) { + LOG_WARN("fail to deep copy from tmp info", K(ret), K(tmp_info)); + } else if (OB_FAIL(tmp_info.deserialize(buf, buf_size, pos))) { + LOG_WARN("fail to deserialize aux doc word table info", K(ret)); + } else if (OB_FAIL(fts_doc_word_.deep_copy_from_other(tmp_info, allocator_))) { + LOG_WARN("fail to deep copy from tmp info", K(ret), K(tmp_info)); + } + return ret; +} + +int64_t ObDropFTSIndexTask::get_serialize_param_size() const +{ + return ObDDLTask::get_serialize_param_size() + + rowkey_doc_.get_serialize_size() + + doc_rowkey_.get_serialize_size() + + domain_index_.get_serialize_size() + + fts_doc_word_.get_serialize_size(); +} + +void ObDropFTSIndexTask::flt_set_task_span_tag() const +{ + // TODO: @hanxuan, add me for tracing. +} + +void ObDropFTSIndexTask::flt_set_status_span_tag() const +{ + // TODO: @hanxuan, add me for tracing. +} + +int ObDropFTSIndexTask::check_switch_succ() +{ + int ret = OB_SUCCESS; + ObSchemaGetterGuard schema_guard; + bool is_domain_index_exist = false; + bool is_doc_word_exist = false; + bool is_rowkey_doc_exist = false; + bool is_doc_rowkey_exist = false; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("hasn't initialized", K(ret)); + } else if (OB_ISNULL(root_service_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, root service is nullptr", K(ret), KP(root_service_)); + } else if (OB_FAIL(refresh_schema_version())) { + LOG_WARN("refresh schema version failed", K(ret)); + } else if (OB_FAIL(root_service_->get_schema_service().get_tenant_schema_guard(tenant_id_, schema_guard))) { + LOG_WARN("fail to get tenant schema", K(ret), K(tenant_id_)); + } else if (OB_FAIL(schema_guard.check_table_exist(tenant_id_, domain_index_.table_id_, is_domain_index_exist))) { + LOG_WARN("fail to check table exist", K(ret), K(tenant_id_), K(domain_index_)); + } else if (OB_FAIL(is_fts_task() && schema_guard.check_table_exist(tenant_id_, fts_doc_word_.table_id_, is_doc_word_exist))) { + LOG_WARN("fail to check table exist", K(ret), K(tenant_id_), K(fts_doc_word_)); + } else if (OB_FAIL(schema_guard.check_table_exist(tenant_id_, doc_rowkey_.table_id_, is_doc_rowkey_exist))) { + LOG_WARN("fail to check table exist", K(ret), K(tenant_id_), K(doc_rowkey_)); + } else if (OB_FAIL(schema_guard.check_table_exist(tenant_id_, rowkey_doc_.table_id_, is_rowkey_doc_exist))) { + LOG_WARN("fail to check table exist", K(ret), K(tenant_id_), K(rowkey_doc_)); + } else if (!is_domain_index_exist && !is_doc_word_exist && !is_rowkey_doc_exist && !is_doc_rowkey_exist) { + task_status_ = ObDDLTaskStatus::SUCCESS; + } + return ret; +} + +int ObDropFTSIndexTask::prepare(const share::ObDDLTaskStatus &new_status) +{ + int ret = OB_SUCCESS; + bool has_finished = false; + ObSchemaGetterGuard schema_guard; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObDropFTSIndexTask has not been inited", K(ret)); + } else if (OB_ISNULL(root_service_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, root service is nullptr", K(ret), KP(root_service_)); + } else if (OB_FAIL(root_service_->get_schema_service().get_tenant_schema_guard(tenant_id_, schema_guard))) { + LOG_WARN("fail to get tenant schema guard", K(ret), K(tenant_id_)); + } else if (0 == domain_index_.task_id_ + && OB_FAIL(create_drop_index_task(schema_guard, domain_index_.table_id_, domain_index_.index_name_, domain_index_.task_id_))) { + LOG_WARN("fail to create drop index task", K(ret), K(domain_index_)); + } else if (is_fts_task() + && 0 == fts_doc_word_.task_id_ + && OB_FAIL(create_drop_index_task(schema_guard, fts_doc_word_.table_id_, fts_doc_word_.index_name_, fts_doc_word_.task_id_))) { + LOG_WARN("fail to create drop index task", K(ret), K(fts_doc_word_)); + } else if (OB_FAIL(wait_fts_child_task_finish(has_finished))) { + LOG_WARN("fail to wait fts child task finish", K(ret)); + } + if (has_finished) { + // overwrite return code + if (OB_FAIL(switch_status(new_status, true/*enable_flt*/, ret))) { + LOG_WARN("fail to switch status", K(ret), K(new_status)); + } + } + return ret; +} + +int ObDropFTSIndexTask::check_and_wait_finish(const share::ObDDLTaskStatus &new_status) +{ + int ret = OB_SUCCESS; + bool has_finished = false; + if (OB_FAIL(create_drop_doc_rowkey_task())) { + LOG_WARN("fail to create drop doc rowkey child task", K(ret)); + } else if (0 == rowkey_doc_.task_id_ && 0 == doc_rowkey_.task_id_) { + // If there are other fulltext indexes, there is no need to drop the rowkey doc auxiliary table. And the task + // status is set to success and skipped. + has_finished = true; + } else if (OB_FAIL(wait_doc_child_task_finish(has_finished))) { + LOG_WARN("fail to wait doc child task finish", K(ret)); + } + if (has_finished) { + // overwrite return code + if (OB_FAIL(switch_status(new_status, true/*enable_flt*/, ret))) { + LOG_WARN("fail to switch status", K(ret), K(new_status)); + } + } + return ret; +} + +int ObDropFTSIndexTask::check_drop_index_finish( + const uint64_t tenant_id, + const int64_t task_id, + const int64_t table_id, + bool &has_finished) +{ + int ret = OB_SUCCESS; + const ObAddr unused_addr; + int64_t unused_user_msg_len = 0; + share::ObDDLErrorMessageTableOperator::ObBuildDDLErrorMessage error_message; + has_finished = false; + if (OB_UNLIKELY(OB_INVALID_ID == tenant_id || task_id <= 0 || OB_INVALID_ID == table_id)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(tenant_id), K(task_id), K(table_id)); + } else if (OB_FAIL(share::ObDDLErrorMessageTableOperator::get_ddl_error_message( + tenant_id, + task_id, + -1/*target_object_id*/, + table_id, + *GCTX.sql_proxy_, + error_message, + unused_user_msg_len))) { + LOG_WARN("fail to get ddl error message", K(ret), K(tenant_id), K(task_id), K(table_id)); + } else { + ret = error_message.ret_code_; + has_finished = true; + } + LOG_INFO("wait build index finish", K(ret), K(tenant_id), K(task_id), K(table_id), K(has_finished)); + return ret; +} + +int ObDropFTSIndexTask::wait_child_task_finish( + const common::ObIArray &child_task_ids, + bool &has_finished) +{ + int ret = OB_SUCCESS; + if (0 == child_task_ids.count()) { + has_finished = true; + } else { + bool finished = true; + for (int64_t i = 0; OB_SUCC(ret) && finished && i < child_task_ids.count(); ++i) { + const ObFTSDDLChildTaskInfo &task_info = child_task_ids.at(i); + finished = false; + if (-1 == task_info.task_id_) { + finished = true; + } else if (OB_FAIL(check_drop_index_finish(tenant_id_, task_info.task_id_, task_info.table_id_, finished))) { + LOG_WARN("fail to check fts index child task finish", K(ret)); + } else if (!finished) { // nothing to do + LOG_INFO("child task hasn't been finished", K(tenant_id_), K(task_info)); + } + } + if (OB_SUCC(ret) && finished) { + has_finished = true; + } + } + return ret; +} + +int ObDropFTSIndexTask::wait_fts_child_task_finish(bool &has_finished) +{ + int ret = OB_SUCCESS; + ObSEArray fts_child_tasks; + if (OB_FAIL(fts_child_tasks.push_back(domain_index_))) { + LOG_WARN("fail to push back fts index child task", K(ret)); + } else if (is_fts_task() && OB_FAIL(fts_child_tasks.push_back(fts_doc_word_))) { + LOG_WARN("fail to push back doc word child task", K(ret)); + } else if (OB_FAIL(wait_child_task_finish(fts_child_tasks, has_finished))) { + LOG_WARN("fail to wait child task finish", K(ret), K(fts_child_tasks)); + } + return ret; +} + +int ObDropFTSIndexTask::wait_doc_child_task_finish(bool &has_finished) +{ + int ret = OB_SUCCESS; + ObSEArray doc_child_tasks; + if (OB_FAIL(doc_child_tasks.push_back(doc_rowkey_))) { + LOG_WARN("fail to push back doc rowkey child task", K(ret)); + } else if (OB_FAIL(doc_child_tasks.push_back(rowkey_doc_))) { + LOG_WARN("fail to push back rowkey doc child task", K(ret)); + } else if (OB_FAIL(wait_child_task_finish(doc_child_tasks, has_finished))) { + LOG_WARN("fail to wait child task finish", K(ret), K(doc_child_tasks)); + } + return ret; +} + +int ObDropFTSIndexTask::create_drop_index_task( + share::schema::ObSchemaGetterGuard &guard, + const uint64_t index_tid, + const common::ObString &index_name, + int64_t &task_id) +{ + int ret = OB_SUCCESS; + const ObTableSchema *index_schema = nullptr; + const ObDatabaseSchema *database_schema = nullptr; + const ObTableSchema *data_table_schema = nullptr; + bool is_index_exist = false; + if (OB_ISNULL(root_service_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, root service is nullptr", K(ret), KP(root_service_)); + } else if (OB_UNLIKELY(OB_INVALID_ID == index_tid || index_name.empty())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(index_tid), K(index_name)); + } else if (OB_FAIL(guard.check_table_exist(tenant_id_, index_tid, is_index_exist))) { + LOG_WARN("fail to check table exist", K(ret), K(tenant_id_), K(index_tid)); + } else if (!is_index_exist) { + // nothing to do, just by pass. + task_id = -1; + } else if (OB_FAIL(guard.get_table_schema(tenant_id_, index_tid, index_schema))) { + LOG_WARN("fail to get index table schema", K(ret), K(tenant_id_), K(index_tid)); + } else if (OB_ISNULL(index_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, index schema is nullptr", K(ret), KP(index_schema)); + } else if (OB_FAIL(guard.get_database_schema(tenant_id_, index_schema->get_database_id(), database_schema))) { + LOG_WARN("fail to get database schema", K(ret), K(index_schema->get_database_id())); + } else if (OB_FAIL(guard.get_table_schema(tenant_id_, index_schema->get_data_table_id(), data_table_schema))) { + LOG_WARN("fail to get data table schema", K(ret), K(index_schema->get_data_table_id())); + } else if (OB_UNLIKELY(nullptr == database_schema || nullptr == data_table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, schema is nullptr", K(ret), KP(database_schema), KP(data_table_schema)); + } else { + int64_t ddl_rpc_timeout_us = 0; + obrpc::ObDropIndexArg arg; + obrpc::ObDropIndexRes res; + arg.is_inner_ = true; + arg.tenant_id_ = tenant_id_; + arg.exec_tenant_id_ = tenant_id_; + arg.index_table_id_ = index_tid; + arg.session_id_ = data_table_schema->get_session_id(); + arg.index_name_ = index_name; + arg.table_name_ = data_table_schema->get_table_name(); + arg.database_name_ = database_schema->get_database_name_str(); + arg.index_action_type_ = obrpc::ObIndexArg::DROP_INDEX; + arg.ddl_stmt_str_ = nullptr; + arg.is_add_to_scheduler_ = true; + arg.task_id_ = task_id_; + if (OB_FAIL(ObDDLUtil::get_ddl_rpc_timeout( + index_schema->get_all_part_num() + data_table_schema->get_all_part_num(), ddl_rpc_timeout_us))) { + LOG_WARN("fail to get ddl rpc timeout", K(ret)); + } else if (OB_FAIL(root_service_->get_common_rpc_proxy().timeout(ddl_rpc_timeout_us).drop_index(arg, res))) { + LOG_WARN("fail to drop index", K(ret), K(ddl_rpc_timeout_us), K(arg), K(res.task_id_)); + } else { + task_id = res.task_id_; + } + LOG_INFO("drop index", K(ret), K(index_tid), K(index_name), K(task_id), + "data table name", data_table_schema->get_table_name_str(), + "database name", database_schema->get_database_name_str()); + } + return ret; +} + +int ObDropFTSIndexTask::create_drop_doc_rowkey_task() +{ + int ret = OB_SUCCESS; + ObSchemaGetterGuard schema_guard; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_ISNULL(root_service_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, root service is nullptr", K(ret), KP(root_service_)); + } else if (OB_FAIL(root_service_->get_schema_service().get_tenant_schema_guard(tenant_id_, schema_guard))) { + LOG_WARN("fail to get tenant schema guard", K(ret), K(tenant_id_)); + } else if (0 == rowkey_doc_.task_id_ + && OB_FAIL(create_drop_index_task(schema_guard, rowkey_doc_.table_id_, rowkey_doc_.index_name_, rowkey_doc_.task_id_))) { + LOG_WARN("fail to create drop index task", K(ret), K(rowkey_doc_)); + } else if (0 == doc_rowkey_.task_id_ + && OB_FAIL(create_drop_index_task(schema_guard, doc_rowkey_.table_id_, doc_rowkey_.index_name_, doc_rowkey_.task_id_))) { + LOG_WARN("fail to create drop index task", K(ret), K(doc_rowkey_)); + } + return ret; +} + +int ObDropFTSIndexTask::succ() +{ + return cleanup(); +} + +int ObDropFTSIndexTask::fail() +{ + return cleanup(); +} + +int ObDropFTSIndexTask::cleanup_impl() +{ + int ret = OB_SUCCESS; + ObString unused_str; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_FAIL(report_error_code(unused_str))) { + LOG_WARN("report error code failed", K(ret)); + } else if (OB_FAIL(ObDDLTaskRecordOperator::delete_record(root_service_->get_sql_proxy(), tenant_id_, task_id_))) { + LOG_WARN("delete task record failed", K(ret), K(task_id_), K(schema_version_)); + } else { + need_retry_ = false; // clean succ, stop the task + } + LOG_INFO("clean task finished", K(ret), K(*this)); + return ret; +} +} // end namespace rootserver +} // end namespace oceanbase diff --git a/src/rootserver/ddl_task/ob_drop_fts_index_task.h b/src/rootserver/ddl_task/ob_drop_fts_index_task.h new file mode 100644 index 0000000000..883e72fa05 --- /dev/null +++ b/src/rootserver/ddl_task/ob_drop_fts_index_task.h @@ -0,0 +1,108 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_ROOTSERVER_OB_DROP_FTS_INDEX_TASK_H +#define OCEANBASE_ROOTSERVER_OB_DROP_FTS_INDEX_TASK_H + +#include "rootserver/ddl_task/ob_drop_index_task.h" + +namespace oceanbase +{ +namespace rootserver +{ + +/** + * For fulltext search index, the drop fts index task creates other subtasks, and the execution + * dependency directed graph between each task is as follows, + * + * ---> ObDropIndexTask(fts index)---> ---> ObDropIndexTask(rowkey doc) ---> + * / \ / \ + * ObDropFTSIndexTask(parent) ObDropFTSIndexTask(parent) ObDropFTSIndexTask(parent) + * \ / | \ / | + * ---> ObDropIndexTask(doc word)----> | ---> ObDropIndexTask(doc rowkey) ---> | + * | | + * --- non-last ---> SUCCESS (end) <--- last ---- + * + */ +class ObDropFTSIndexTask : public ObDDLTask +{ +public: + ObDropFTSIndexTask(); + virtual ~ObDropFTSIndexTask(); + + int init( + const uint64_t tenant_id, + const int64_t task_id, + const uint64_t data_table_id, + const share::ObDDLType ddl_type, + const ObFTSDDLChildTaskInfo &rowkey_doc, + const ObFTSDDLChildTaskInfo &doc_rowkey, + const ObFTSDDLChildTaskInfo &domain_index, + const ObFTSDDLChildTaskInfo &fts_doc_word, + const int64_t schema_version, + const int64_t consumer_group_id); + int init(const ObDDLTaskRecord &task_record); + virtual int process() override; + virtual int serialize_params_to_message( + char *buf, + const int64_t buf_size, + int64_t &pos) const override; + virtual int deserialize_params_from_message( + const uint64_t tenant_id, + const char *buf, + const int64_t buf_size, + int64_t &pos) override; + virtual int64_t get_serialize_param_size() const override; + + virtual void flt_set_task_span_tag() const override; + virtual void flt_set_status_span_tag() const override; + virtual int on_child_task_finish(const uint64_t child_task_key, const int ret_code) override { return OB_SUCCESS; } + + INHERIT_TO_STRING_KV("ObDDLTask", ObDDLTask, K_(rowkey_doc), K_(doc_rowkey), K_(domain_index), K_(fts_doc_word)); +private: + static const int64_t OB_DROP_FTS_INDEX_TASK_VERSION = 1; + int check_switch_succ(); + int prepare(const share::ObDDLTaskStatus &status); + int check_and_wait_finish(const share::ObDDLTaskStatus &status); + int check_drop_index_finish( + const uint64_t tenant_id, + const int64_t task_id, + const int64_t table_id, + bool &has_finished); + int wait_child_task_finish( + const common::ObIArray &child_task_ids, + bool &has_finished); + int wait_fts_child_task_finish(bool &has_finished); + int wait_doc_child_task_finish(bool &has_finished); + int create_drop_index_task( + share::schema::ObSchemaGetterGuard &guard, + const uint64_t index_tid, + const common::ObString &index_name, + int64_t &task_id); + int create_drop_doc_rowkey_task(); + int succ(); + int fail(); + virtual int cleanup_impl() override; + bool is_fts_task() const { return share::ObDDLType::DDL_DROP_FTS_INDEX == task_type_; } + +private: + ObRootService *root_service_; + ObFTSDDLChildTaskInfo rowkey_doc_; + ObFTSDDLChildTaskInfo doc_rowkey_; + ObFTSDDLChildTaskInfo domain_index_; + ObFTSDDLChildTaskInfo fts_doc_word_; +}; + +} // end namespace rootserver +} // end namespace oceanbase + +#endif // OCEANBASE_ROOTSERVER_OB_DROP_domain_INDEX_TASK_H diff --git a/src/rootserver/ddl_task/ob_drop_index_task.cpp b/src/rootserver/ddl_task/ob_drop_index_task.cpp index 31489ccdec..e9b34f902b 100644 --- a/src/rootserver/ddl_task/ob_drop_index_task.cpp +++ b/src/rootserver/ddl_task/ob_drop_index_task.cpp @@ -27,7 +27,10 @@ using namespace oceanbase::share::schema; using namespace oceanbase::sql; ObDropIndexTask::ObDropIndexTask() - : ObDDLTask(DDL_DROP_INDEX), wait_trans_ctx_(), drop_index_arg_() + : ObDDLTask(DDL_DROP_INDEX), + wait_trans_ctx_(), + root_service_(nullptr), + drop_index_arg_() { } @@ -72,7 +75,6 @@ int ObDropIndexTask::init( dst_tenant_id_ = tenant_id_; dst_schema_version_ = schema_version_; is_inited_ = true; - ddl_tracing_.open(); } return ret; } @@ -102,7 +104,7 @@ int ObDropIndexTask::init( task_type_ = task_record.ddl_type_; // could be drop index / mlog if (nullptr != task_record.message_.ptr()) { int64_t pos = 0; - if (OB_FAIL(deserlize_params_from_message(task_record.tenant_id_, task_record.message_.ptr(), task_record.message_.length(), pos))) { + if (OB_FAIL(deserialize_params_from_message(task_record.tenant_id_, task_record.message_.ptr(), task_record.message_.length(), pos))) { LOG_WARN("deserialize params from message failed", K(ret)); } } @@ -457,14 +459,14 @@ int ObDropIndexTask::serialize_params_to_message(char *buf, const int64_t buf_si return ret; } -int ObDropIndexTask::deserlize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t buf_size, int64_t &pos) +int ObDropIndexTask::deserialize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t buf_size, int64_t &pos) { int ret = OB_SUCCESS; obrpc::ObDropIndexArg tmp_drop_index_arg; if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id) || nullptr == buf || buf_size <= 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arg", K(ret), K(tenant_id), KP(buf), K(buf_size)); - } else if (OB_FAIL(ObDDLTask::deserlize_params_from_message(tenant_id, buf, buf_size, pos))) { + } else if (OB_FAIL(ObDDLTask::deserialize_params_from_message(tenant_id, buf, buf_size, pos))) { LOG_WARN("ObDDLTask deserlize failed", K(ret)); } else if (OB_FAIL(tmp_drop_index_arg.deserialize(buf, buf_size, pos))) { LOG_WARN("deserialize failed", K(ret)); diff --git a/src/rootserver/ddl_task/ob_drop_index_task.h b/src/rootserver/ddl_task/ob_drop_index_task.h index 0ad4441e78..cb15046049 100644 --- a/src/rootserver/ddl_task/ob_drop_index_task.h +++ b/src/rootserver/ddl_task/ob_drop_index_task.h @@ -20,7 +20,7 @@ namespace oceanbase namespace rootserver { -class ObDropIndexTask : public ObDDLTask +class ObDropIndexTask final: public ObDDLTask { public: ObDropIndexTask(); @@ -40,7 +40,7 @@ public: virtual int process() override; virtual bool is_valid() const override; virtual int serialize_params_to_message(char *buf, const int64_t buf_size, int64_t &pos) const override; - virtual int deserlize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t buf_size, int64_t &pos) override; + virtual int deserialize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t buf_size, int64_t &pos) override; virtual int64_t get_serialize_param_size() const override; INHERIT_TO_STRING_KV("ObDDLTask", ObDDLTask, KP_(root_service)); virtual void flt_set_task_span_tag() const override; @@ -68,6 +68,7 @@ private: } private: static const int64_t OB_DROP_INDEX_TASK_VERSION = 1; +private: ObDDLWaitTransEndCtx wait_trans_ctx_; ObRootService *root_service_; obrpc::ObDropIndexArg drop_index_arg_; diff --git a/src/rootserver/ddl_task/ob_index_build_task.cpp b/src/rootserver/ddl_task/ob_index_build_task.cpp index 9fd0bd5266..9dbfc1af4d 100755 --- a/src/rootserver/ddl_task/ob_index_build_task.cpp +++ b/src/rootserver/ddl_task/ob_index_build_task.cpp @@ -466,7 +466,7 @@ int ObIndexBuildTask::init(const ObDDLTaskRecord &task_record) LOG_WARN("invalid arguments", K(ret), K(task_record)); } else if (OB_FAIL(DDL_SIM(task_record.tenant_id_, task_record.task_id_, DDL_TASK_INIT_BY_RECORD_FAILED))) { LOG_WARN("ddl sim failure", K(task_record.tenant_id_), K(task_record.task_id_)); - } else if (OB_FAIL(deserlize_params_from_message(task_record.tenant_id_, task_record.message_.ptr(), task_record.message_.length(), pos))) { + } else if (OB_FAIL(deserialize_params_from_message(task_record.tenant_id_, task_record.message_.ptr(), task_record.message_.length(), pos))) { LOG_WARN("deserialize params from message failed", K(ret)); } else if (OB_FAIL(ObMultiVersionSchemaService::get_instance().get_tenant_schema_guard( task_record.tenant_id_, schema_guard, schema_version))) { @@ -1626,14 +1626,14 @@ int ObIndexBuildTask::serialize_params_to_message(char *buf, const int64_t buf_l return ret; } -int ObIndexBuildTask::deserlize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t data_len, int64_t &pos) +int ObIndexBuildTask::deserialize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t data_len, int64_t &pos) { int ret = OB_SUCCESS; ObCreateIndexArg tmp_arg; if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id) || nullptr == buf || data_len <= 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(tenant_id), KP(buf), K(data_len)); - } else if (OB_FAIL(ObDDLTask::deserlize_params_from_message(tenant_id, buf, data_len, pos))) { + } else if (OB_FAIL(ObDDLTask::deserialize_params_from_message(tenant_id, buf, data_len, pos))) { LOG_WARN("ObDDLTask deserlize failed", K(ret)); } else if (OB_FAIL(tmp_arg.deserialize(buf, data_len, pos))) { LOG_WARN("deserialize table failed", K(ret)); diff --git a/src/rootserver/ddl_task/ob_index_build_task.h b/src/rootserver/ddl_task/ob_index_build_task.h index c9bebcc4f9..986036785a 100644 --- a/src/rootserver/ddl_task/ob_index_build_task.h +++ b/src/rootserver/ddl_task/ob_index_build_task.h @@ -114,7 +114,7 @@ public: virtual bool is_valid() const override; virtual int collect_longops_stat(share::ObLongopsValue &value) override; virtual int serialize_params_to_message(char *buf, const int64_t buf_size, int64_t &pos) const override; - virtual int deserlize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t buf_size, int64_t &pos) override; + virtual int deserialize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t buf_size, int64_t &pos) override; virtual int64_t get_serialize_param_size() const override; virtual bool support_longops_monitoring() const override { return true; } static int deep_copy_index_arg(common::ObIAllocator &allocator, const obrpc::ObCreateIndexArg &source_arg, obrpc::ObCreateIndexArg &dest_arg); diff --git a/src/rootserver/ddl_task/ob_modify_autoinc_task.cpp b/src/rootserver/ddl_task/ob_modify_autoinc_task.cpp index 3bbfc184a2..6b1b32e91a 100644 --- a/src/rootserver/ddl_task/ob_modify_autoinc_task.cpp +++ b/src/rootserver/ddl_task/ob_modify_autoinc_task.cpp @@ -223,7 +223,7 @@ int ObModifyAutoincTask::init(const ObDDLTaskRecord &task_record) } else if (OB_UNLIKELY(!task_record.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(task_record)); - } else if (OB_FAIL(deserlize_params_from_message(task_record.tenant_id_, task_record.message_.ptr(), task_record.message_.length(), pos))) { + } else if (OB_FAIL(deserialize_params_from_message(task_record.tenant_id_, task_record.message_.ptr(), task_record.message_.length(), pos))) { LOG_WARN("deserialize params from message failed", K(ret)); } else if (OB_FAIL(set_ddl_stmt_str(task_record.ddl_stmt_str_))) { LOG_WARN("set ddl stmt str failed", K(ret)); @@ -650,14 +650,14 @@ int ObModifyAutoincTask::serialize_params_to_message(char *buf, const int64_t bu return ret; } -int ObModifyAutoincTask::deserlize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t data_len, int64_t &pos) +int ObModifyAutoincTask::deserialize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t data_len, int64_t &pos) { int ret = OB_SUCCESS; obrpc::ObAlterTableArg tmp_arg; if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id) || nullptr == buf || data_len <= 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(tenant_id), KP(buf), K(data_len)); - } else if (OB_FAIL(ObDDLTask::deserlize_params_from_message(tenant_id, buf, data_len, pos))) { + } else if (OB_FAIL(ObDDLTask::deserialize_params_from_message(tenant_id, buf, data_len, pos))) { LOG_WARN("ObDDLTask deserlize failed", K(ret)); } else if (OB_FAIL(tmp_arg.deserialize(buf, data_len, pos))) { LOG_WARN("serialize table failed", K(ret)); diff --git a/src/rootserver/ddl_task/ob_modify_autoinc_task.h b/src/rootserver/ddl_task/ob_modify_autoinc_task.h index 2d3a6644d1..42ebc06e7a 100644 --- a/src/rootserver/ddl_task/ob_modify_autoinc_task.h +++ b/src/rootserver/ddl_task/ob_modify_autoinc_task.h @@ -66,7 +66,7 @@ public: int init(const ObDDLTaskRecord &task_record); virtual int process() override; virtual int serialize_params_to_message(char *buf, const int64_t buf_size, int64_t &pos) const override; - virtual int deserlize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t buf_size, int64_t &pos) override; + virtual int deserialize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t buf_size, int64_t &pos) override; virtual int64_t get_serialize_param_size() const override; int notify_update_autoinc_finish(const uint64_t autoinc_val, const int ret_code); virtual void flt_set_task_span_tag() const override; diff --git a/src/rootserver/ddl_task/ob_table_redefinition_task.cpp b/src/rootserver/ddl_task/ob_table_redefinition_task.cpp index b04865d7c1..82a5c5b23d 100755 --- a/src/rootserver/ddl_task/ob_table_redefinition_task.cpp +++ b/src/rootserver/ddl_task/ob_table_redefinition_task.cpp @@ -144,7 +144,7 @@ int ObTableRedefinitionTask::init(const ObDDLTaskRecord &task_record) } else if (!task_record.is_valid()) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(task_record)); - } else if (OB_FAIL(deserlize_params_from_message(task_record.tenant_id_, task_record.message_.ptr(), task_record.message_.length(), pos))) { + } else if (OB_FAIL(deserialize_params_from_message(task_record.tenant_id_, task_record.message_.ptr(), task_record.message_.length(), pos))) { LOG_WARN("deserialize params from message failed", K(ret), K(task_record.message_), K(common::lbt())); } else if (OB_FAIL(set_ddl_stmt_str(task_record.ddl_stmt_str_))) { LOG_WARN("set ddl stmt str failed", K(ret)); @@ -1121,7 +1121,7 @@ int ObTableRedefinitionTask::serialize_params_to_message(char *buf, const int64_ return ret; } -int ObTableRedefinitionTask::deserlize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t data_len, int64_t &pos) +int ObTableRedefinitionTask::deserialize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t data_len, int64_t &pos) { int ret = OB_SUCCESS; int8_t copy_indexes = 0; @@ -1134,7 +1134,7 @@ int ObTableRedefinitionTask::deserlize_params_from_message(const uint64_t tenant if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id) || nullptr == buf || data_len <= 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(tenant_id), KP(buf), K(data_len)); - } else if (OB_FAIL(ObDDLTask::deserlize_params_from_message(tenant_id, buf, data_len, pos))) { + } else if (OB_FAIL(ObDDLTask::deserialize_params_from_message(tenant_id, buf, data_len, pos))) { LOG_WARN("ObDDLTask deserlize failed", K(ret)); } else if (OB_FAIL(tmp_arg.deserialize(buf, data_len, pos))) { LOG_WARN("serialize table failed", K(ret)); diff --git a/src/rootserver/ddl_task/ob_table_redefinition_task.h b/src/rootserver/ddl_task/ob_table_redefinition_task.h index 1b52cefc53..ba72eaf982 100644 --- a/src/rootserver/ddl_task/ob_table_redefinition_task.h +++ b/src/rootserver/ddl_task/ob_table_redefinition_task.h @@ -59,7 +59,7 @@ public: inline void set_is_ignore_errors(const bool is_ignore_errors) {is_ignore_errors_ = is_ignore_errors;} inline void set_is_do_finish(const bool is_do_finish) {is_do_finish_ = is_do_finish;} virtual int serialize_params_to_message(char *buf, const int64_t buf_len, int64_t &pos) const override; - virtual int deserlize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t data_len, int64_t &pos) override; + virtual int deserialize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t data_len, int64_t &pos) override; virtual int64_t get_serialize_param_size() const override; int assign(const ObTableRedefinitionTask *table_redef_task); virtual int collect_longops_stat(share::ObLongopsValue &value) override; diff --git a/src/rootserver/freeze/ob_checksum_validator.cpp b/src/rootserver/freeze/ob_checksum_validator.cpp index 52b7a80fcc..33a039e305 100755 --- a/src/rootserver/freeze/ob_checksum_validator.cpp +++ b/src/rootserver/freeze/ob_checksum_validator.cpp @@ -27,6 +27,7 @@ #include "share/ob_tablet_meta_table_compaction_operator.h" #include "share/ob_zone_merge_info.h" #include "share/ob_freeze_info_manager.h" +#include "rootserver/freeze/ob_fts_checksum_validate_util.h" namespace oceanbase { @@ -735,6 +736,8 @@ int ObChecksumValidator::validate_index_checksum() { if (OB_FAIL(handle_index_table(*simple_schema_))) { LOG_WARN("fail to handle index table", KR(ret), KPC_(simple_schema)); } + } else if (table_compaction_info_.need_check_fts_) { + LOG_INFO("check fts for data table", KR(ret), K_(table_compaction_info)); } else if (table_compaction_info_.is_compacted()) { // for data table, check status if (0 == table_compaction_info_.unfinish_index_cnt_) { // no unfinish index table_compaction_info_.set_index_ckm_verified(); @@ -771,6 +774,8 @@ int ObChecksumValidator::handle_index_table( if (index_compaction_info.finish_compaction()) { index_compaction_info.set_index_ckm_verified(); } + } else if (fts_group_array_.need_check_fts() && index_simple_schema.is_fts_or_multivalue_index()) { + LOG_INFO("skip fts or multivalue index", KR(ret), K(index_simple_schema), K(index_compaction_info)); } else { if (index_compaction_info.is_compacted() && data_compaction_info.is_compacted()) { #ifdef ERRSIM @@ -812,7 +817,7 @@ int ObChecksumValidator::handle_index_table( } // deal with data table if (OB_SUCC(ret) && index_compaction_info.finish_idx_verified() && !data_compaction_info.finish_idx_verified()) { - if (0 == (--data_compaction_info.unfinish_index_cnt_)) { + if ((0 == (--data_compaction_info.unfinish_index_cnt_)) && !data_compaction_info.need_check_fts_) { data_compaction_info.set_index_ckm_verified(); } if (OB_FAIL(table_compaction_map_.set_refactored( @@ -821,7 +826,7 @@ int ObChecksumValidator::handle_index_table( LOG_WARN("failed to set", K(ret), K(data_compaction_info)); } } - LOG_TRACE("handle index table", KR(ret), K_(table_id), K_(table_compaction_info), K(data_compaction_info)); + LOG_TRACE("handle index table", KR(ret), K_(table_id), K(index_compaction_info), K(data_compaction_info)); return ret; } @@ -897,5 +902,133 @@ int ObChecksumValidator::get_replica_ckm(const bool include_larger_than/* = fals share::OBCG_DEFAULT, true/*with_order_by_field*/); } +/***************************************** FTS Checksum Section ******************************************/ + +int ObChecksumValidator::build_ckm_item_for_fts(const int64_t table_id, + ObTableCkmItems &ckm_item, + ObIArray &finish_table_ids) +{ + int ret = OB_SUCCESS; + bool skip_verify = false; + ObTableCompactionInfo table_compaction_info; + if (OB_FAIL(get_table_compaction_info(table_id, table_compaction_info))) { + LOG_WARN("failed to get table compaction info", KR(ret)); + } else if (OB_UNLIKELY(!table_compaction_info.is_compacted())) { + LOG_WARN("exist special status table", KR(ret), K(table_compaction_info)); + skip_verify = true; + } else if (OB_FAIL(ckm_item.build(table_id, compaction_scn_, *sql_proxy_, + *schema_guard_, tablet_ls_pair_cache_))) { + if (OB_TABLE_NOT_EXIST == ret || OB_STATE_NOT_MATCH == ret) { + skip_verify = true; + ret = OB_SUCCESS; + } else { + LOG_WARN("fail to prepare schema checksum items", KR(ret), K_(tenant_id), K(table_id)); + } + } else if (OB_FAIL(finish_table_ids.push_back(table_id))) { + LOG_WARN("failed to push index id", KR(ret), K(table_id)); + } else { + ckm_item.set_is_fts_index(true); + } + if (OB_FAIL(ret) || !skip_verify) { + } else if (OB_FAIL(finish_verify_fts_ckm(table_id))) { + LOG_WARN("failed to skip verify fts ckm", KR(ret), K(table_id)); + } else { + LOG_INFO("skip verify fts ckm", KR(ret), K(table_id)); + } + return ret; +} + +int ObChecksumValidator::finish_verify_fts_ckm(const int64_t table_id) +{ + int ret = OB_SUCCESS; + ObTableCompactionInfo table_compaction_info; + if (OB_FAIL(get_table_compaction_info(table_id, table_compaction_info))) { + LOG_WARN("fail to get table compaction info", KR(ret), K(table_id), K(table_compaction_info)); + } else if (FALSE_IT(table_compaction_info.need_check_fts_ = false)) { + } else if (table_compaction_info.unfinish_index_cnt_ <= 0) { + // for data table, may exist other index + table_compaction_info.set_index_ckm_verified(); + } + if (FAILEDx(table_compaction_map_.set_refactored(table_id, table_compaction_info, true /*overwrite*/))) { + LOG_WARN("fail to set refactored", KR(ret), K(table_id), K(table_compaction_info)); + } + return ret; +} + +#define VALIDATE_CKM(data_ckm, index_ckm) \ + if (OB_FAIL(ret) || !data_ckm.is_inited() || !index_ckm.is_inited()) { \ + } else if (OB_FAIL(ObTableCkmItems::validate_ckm_func[0]( \ + compaction_scn_, *sql_proxy_, data_ckm, index_ckm))) { \ + LOG_ERROR("failed to validate ckm func", KR(ret), K(data_ckm), \ + K(index_ckm)); \ + } + +int ObChecksumValidator::handle_fts_checksum( + share::schema::ObSchemaGetterGuard &schema_guard, + const ObFTSGroupArray &fts_group_array) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(fts_group_array.count() <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(fts_group_array)); + } else { + schema_guard_ = &schema_guard; + ObSEArray finish_table_ids; + finish_table_ids.set_attr(ObMemAttr(tenant_id_, "FTS_CKM_VER")); + for (int64_t arr_idx = 0; OB_SUCC(ret) && arr_idx < fts_group_array.count(); ++arr_idx) { + const ObFTSGroup &fts_group = fts_group_array.at(arr_idx); + if (OB_FAIL(validate_rowkey_doc_indexs(fts_group, finish_table_ids))) { + LOG_WARN("failed to validate rowkey doc indexs", KR(ret), K(fts_group)); + } + for (int64_t idx = 0; OB_SUCC(ret) && idx < fts_group.count(); ++idx) { + if (OB_FAIL(validate_fts_indexs(fts_group.at(idx), finish_table_ids))) { + LOG_WARN("failed to validate doc rowkey index", KR(ret), K(idx), K(fts_group)); + } else { + LOG_INFO("validate index info", K(ret), K(fts_group), K(idx), K(fts_group.at(idx))); + } + } // for of fts_group + } // for of fts_group_array + for (int64_t idx = 0; OB_SUCC(ret) && idx < finish_table_ids.count(); ++idx) { + if (OB_FAIL(finish_verify_fts_ckm(finish_table_ids.at(idx)))) { + LOG_WARN("fail to skip or finish verify fts", KR(ret), K(idx), K(finish_table_ids)); + } + } // for + schema_guard_ = NULL; + } + + return ret; +} + +int ObChecksumValidator::validate_rowkey_doc_indexs(const ObFTSGroup &fts_group, ObIArray &finish_table_ids) +{ + int ret = OB_SUCCESS; + ObTableCkmItems ckm_item[3]; + if (OB_FAIL(build_ckm_item_for_fts(fts_group.data_table_id_, ckm_item[0], finish_table_ids))) { + LOG_WARN_RET(ret, "failed to build ckm", K(fts_group.data_table_id_)); + } else if (OB_FAIL(build_ckm_item_for_fts(fts_group.rowkey_doc_index_id_, ckm_item[1], finish_table_ids))) { + LOG_WARN_RET(ret, "failed to build ckm", K(fts_group.rowkey_doc_index_id_)); + } else if (OB_FAIL(build_ckm_item_for_fts(fts_group.doc_rowkey_index_id_, ckm_item[2], finish_table_ids))) { + LOG_WARN_RET(ret, "failed to build ckm", K(fts_group.doc_rowkey_index_id_)); + } + // all fts index is local index now + VALIDATE_CKM(ckm_item[0], ckm_item[1]); + VALIDATE_CKM(ckm_item[1], ckm_item[2]); + return ret; +} + +int ObChecksumValidator::validate_fts_indexs(const ObFTSIndexInfo &index_info, ObIArray &finish_table_ids) +{ + int ret = OB_SUCCESS; + ObTableCkmItems ckm_item[2]; + if (OB_FAIL(build_ckm_item_for_fts(index_info.fts_index_id_, ckm_item[0], finish_table_ids))) { + LOG_WARN_RET(ret, "failed to build ckm", K(index_info.fts_index_id_)); + } else if (OB_FAIL(build_ckm_item_for_fts(index_info.doc_word_index_id_, ckm_item[1], finish_table_ids))) { + LOG_WARN_RET(ret, "failed to build ckm", K(index_info.doc_word_index_id_)); + } + VALIDATE_CKM(ckm_item[0], ckm_item[1]); + return ret; +} +#undef VALIDATE_CKM + } // end namespace rootserver } // end namespace oceanbase diff --git a/src/rootserver/freeze/ob_checksum_validator.h b/src/rootserver/freeze/ob_checksum_validator.h index b93d0070dc..fc8dcbf545 100644 --- a/src/rootserver/freeze/ob_checksum_validator.h +++ b/src/rootserver/freeze/ob_checksum_validator.h @@ -31,7 +31,9 @@ namespace rootserver { class ObZoneMergeManager; class ObServerManager; - +struct ObFTSGroupArray; +struct ObFTSGroup; +struct ObFTSIndexInfo; struct ObReplicaCkmItems { ObReplicaCkmItems() @@ -64,7 +66,8 @@ public: compaction::ObCkmValidatorStatistics &statistics, ObArray &finish_tablet_ls_pair_array, ObArray &finish_tablet_ckm_array, - compaction::ObUncompactInfo &uncompact_info) + compaction::ObUncompactInfo &uncompact_info, + ObFTSGroupArray &fts_group_array) : is_inited_(false), is_primary_service_(false), need_validate_index_ckm_(false), @@ -85,6 +88,7 @@ public: finish_tablet_ls_pair_array_(finish_tablet_ls_pair_array), finish_tablet_ckm_array_(finish_tablet_ckm_array), uncompact_info_(uncompact_info), + fts_group_array_(fts_group_array), schema_guard_(nullptr), simple_schema_(nullptr), table_compaction_info_(), @@ -118,6 +122,9 @@ public: const common::ObIArray &tablet_ls_pairs); int batch_write_tablet_ckm(); int batch_update_report_scn(); + int handle_fts_checksum( + share::schema::ObSchemaGetterGuard &schema_guard, + const ObFTSGroupArray &fts_group_array); static const int64_t SPECIAL_TABLE_ID = 1; TO_STRING_KV(K_(tenant_id), K_(is_primary_service), K_(table_id), K_(compaction_scn)); private: @@ -150,6 +157,14 @@ private: const ObArray &tablet_checksum_items); bool check_waiting_tablet_checksum_timeout() const; int try_update_tablet_checksum_items(); + /* FTS Checksum Section */ + int validate_rowkey_doc_indexs(const ObFTSGroup &fts_group, ObIArray &finish_table_ids); + int validate_fts_indexs(const ObFTSIndexInfo &index_info, ObIArray &finish_table_ids); + int build_ckm_item_for_fts( + const int64_t table_id, + compaction::ObTableCkmItems &data_table_ckm, + ObIArray &finish_table_ids); + int finish_verify_fts_ckm(const int64_t table_id); static const int64_t PRINT_CROSS_CLUSTER_LOG_INVERVAL = 10 * 60 * 1000 * 1000; // 10 mins static const int64_t MAX_TABLET_CHECKSUM_WAIT_TIME_US = 36 * 3600 * 1000 * 1000L; // 36 hours static const int64_t MAX_BATCH_INSERT_COUNT = 1500; @@ -174,6 +189,7 @@ private: ObArray &finish_tablet_ls_pair_array_; ObArray &finish_tablet_ckm_array_; compaction::ObUncompactInfo &uncompact_info_; + ObFTSGroupArray &fts_group_array_; /* different for every table */ share::schema::ObSchemaGetterGuard *schema_guard_; diff --git a/src/rootserver/freeze/ob_fts_checksum_validate_util.cpp b/src/rootserver/freeze/ob_fts_checksum_validate_util.cpp new file mode 100644 index 0000000000..868d6a1d15 --- /dev/null +++ b/src/rootserver/freeze/ob_fts_checksum_validate_util.cpp @@ -0,0 +1,39 @@ +//Copyright (c) 2024 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. +#include "rootserver/freeze/ob_fts_checksum_validate_util.h" +#include "share/rc/ob_tenant_base.h" +#include "storage/compaction/ob_compaction_util.h" + +namespace oceanbase +{ +namespace rootserver +{ +ObFTSGroup::ObFTSGroup() + : data_table_id_(0), + rowkey_doc_index_id_(0), + doc_rowkey_index_id_(0), + index_info_() +{ + index_info_.set_attr(ObMemAttr(MTL_ID(), "FTS_GROUP")); +} + +ObFTSGroupArray::ObFTSGroupArray() + : fts_groups_() +{ + fts_groups_.set_attr(ObMemAttr(MTL_ID(), "FTS_INFO_ARR")); +} + +bool ObFTSGroupArray::need_check_fts() const +{ + return VERIFY_FTS_CHECKSUM && count() > 0; +} + +} // namespace rootserver +} // namespace oceanbase diff --git a/src/rootserver/freeze/ob_fts_checksum_validate_util.h b/src/rootserver/freeze/ob_fts_checksum_validate_util.h new file mode 100644 index 0000000000..1c012c168c --- /dev/null +++ b/src/rootserver/freeze/ob_fts_checksum_validate_util.h @@ -0,0 +1,73 @@ +//Copyright (c) 2024 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. +#ifndef OB_ROOTSERVER_FREEZE_FTS_CHECKSUM_VALIDATE_UTIL_H_ +#define OB_ROOTSERVER_FREEZE_FTS_CHECKSUM_VALIDATE_UTIL_H_ +#include "lib/container/ob_se_array.h" +#include "share/ob_delegate.h" +namespace oceanbase +{ +namespace share { +} // namespace share +namespace rootserver +{ + +struct ObFTSIndexInfo +{ + ObFTSIndexInfo() + : fts_index_id_(0), + doc_word_index_id_(0) + {} + ObFTSIndexInfo(const int64_t index_id, const int64_t doc_word_index_id) + : fts_index_id_(index_id), + doc_word_index_id_(doc_word_index_id) + {} + TO_STRING_KV(K_(fts_index_id), K_(doc_word_index_id)); + int64_t fts_index_id_; + int64_t doc_word_index_id_; +}; + +struct ObFTSGroup +{ + ObFTSGroup(); + ~ObFTSGroup() {} + bool is_valid() const + { + return data_table_id_ > 0 && rowkey_doc_index_id_ > 0 && doc_rowkey_index_id_ > 0 + && index_info_.count() >= 0; + } + CONST_DELEGATE_WITH_RET(index_info_, count, int64_t); + CONST_DELEGATE_WITH_RET(index_info_, at, const ObFTSIndexInfo&); + DELEGATE_WITH_RET(index_info_, push_back, int); + TO_STRING_KV(K_(data_table_id), K_(rowkey_doc_index_id), K(doc_rowkey_index_id_), + "index_cnt", index_info_.count(), K_(index_info)); + int64_t data_table_id_; + int64_t rowkey_doc_index_id_; + int64_t doc_rowkey_index_id_; + ObSEArray index_info_; +}; + +struct ObFTSGroupArray +{ + ObFTSGroupArray(); + ~ObFTSGroupArray() {} + CONST_DELEGATE_WITH_RET(fts_groups_, count, int64_t); + CONST_DELEGATE_WITH_RET(fts_groups_, at, const ObFTSGroup&); + DELEGATE_WITH_RET(fts_groups_, push_back, int); + DELEGATE_WITH_RET(fts_groups_, reuse, void); + bool need_check_fts() const; + TO_STRING_KV(K_(fts_groups)); + common::ObSEArray fts_groups_; +}; + + +} // namespace rootserver +} // namespace oceanbase + +#endif // OB_ROOTSERVER_FREEZE_FTS_CHECKSUM_VALIDATE_UTIL_H_ diff --git a/src/rootserver/freeze/ob_major_merge_progress_checker.cpp b/src/rootserver/freeze/ob_major_merge_progress_checker.cpp index e903b62119..f3ac877081 100644 --- a/src/rootserver/freeze/ob_major_merge_progress_checker.cpp +++ b/src/rootserver/freeze/ob_major_merge_progress_checker.cpp @@ -50,10 +50,10 @@ ObMajorMergeProgressChecker::ObMajorMergeProgressChecker( loop_cnt_(0), last_errno_(OB_SUCCESS), tenant_id_(tenant_id), compaction_scn_(), expected_epoch_(OB_INVALID_ID), sql_proxy_(nullptr), schema_service_(nullptr), server_trace_(nullptr), progress_(), - tablet_status_map_(), table_compaction_map_(), + tablet_status_map_(), table_compaction_map_(), fts_group_array_(), ckm_validator_(tenant_id, stop_, tablet_ls_pair_cache_, tablet_status_map_, table_compaction_map_, idx_ckm_validate_array_, validator_statistics_, - finish_tablet_ls_pair_array_, finish_tablet_ckm_array_, uncompact_info_), + finish_tablet_ls_pair_array_, finish_tablet_ckm_array_, uncompact_info_, fts_group_array_), uncompact_info_(), ls_locality_cache_(), total_time_guard_(), validator_statistics_(), batch_size_mgr_() {} int ObMajorMergeProgressChecker::init( @@ -277,6 +277,7 @@ int ObMajorMergeProgressChecker::check_schema_version() int ObMajorMergeProgressChecker::prepare_unfinish_table_ids() { int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; ObArray table_id_array; if (OB_FAIL(check_schema_version())) { LOG_WARN("fail to check schema version", KR(ret), K_(tenant_id)); @@ -314,11 +315,13 @@ int ObMajorMergeProgressChecker::prepare_unfinish_table_ids() LOG_WARN("failed to get table & index schemas", KR(ret), K(table_id)); } else if (is_table_valid) { int64_t index_cnt = 0; + bool need_check_fts = false; for (int64_t j = 0; OB_SUCC(ret) && j < index_schemas.count(); ++j) { // loop index info index_simple_schema = index_schemas.at(j); if (should_ignore_cur_table(index_simple_schema)) { // should ignore cur table continue; + } else if (FALSE_IT(need_check_fts |= index_simple_schema->is_fts_or_multivalue_index())) { } else if (index_simple_schema->should_not_validate_data_index_ckm()) { if (OB_FAIL(not_validate_index_ids.push_back(index_simple_schema->get_table_id()))) { LOG_WARN("failed to push back index id", KR(ret), KPC(index_simple_schema)); @@ -331,8 +334,15 @@ int ObMajorMergeProgressChecker::prepare_unfinish_table_ids() } } // end of for if (OB_SUCC(ret)) { // add table_compaction_info + if (need_check_fts + && (!VERIFY_FTS_CHECKSUM || OB_TMP_FAIL(prepare_fts_group(table_id, index_schemas)))) { + need_check_fts = false; + LOG_WARN_RET(tmp_ret, "close fts verify or fail to prepare fts group", + K(table_id), K(need_check_fts), K(table_compaction_info)); + } table_compaction_info.table_id_ = table_id; table_compaction_info.unfinish_index_cnt_ = index_cnt; + table_compaction_info.need_check_fts_ = need_check_fts; if (OB_FAIL(table_compaction_map_.set_refactored( table_id, table_compaction_info, true /*overwrite*/))) { LOG_WARN("fail to set refactored", KR(ret), K(table_id), K(table_compaction_info)); @@ -473,6 +483,10 @@ int ObMajorMergeProgressChecker::check_index_and_rest_table() LOG_WARN("failed to validate index checksum", KR(ret), K_(compaction_scn)); } else if (OB_FAIL(deal_with_rest_data_table())) { LOG_WARN("deal with rest data table", KR(ret), K_(compaction_scn)); + } else if (0 == progress_.table_cnt_[INITIAL] + && fts_group_array_.need_check_fts() + && OB_FAIL(handle_fts_checksum())) { + LOG_WARN("failed to handle fts checksum", KR(ret), K_(compaction_scn), K_(progress)); } else if (progress_.is_merge_finished()) { LOG_INFO("progress is check finished", KR(ret), K_(progress)); } else if (progress_.only_remain_special_table_to_verified() || table_ids_.empty()) { @@ -966,5 +980,104 @@ int ObMajorMergeProgressChecker::generate_tablet_status_map() return ret; } +int inner_find_doc_word_index( + const ObIArray &index_schemas, + const ObSimpleTableSchemaV2 &input_index_schema, + const ObSimpleTableSchemaV2 *&doc_word_schema) +{ + int ret = OB_SUCCESS; + doc_word_schema = NULL; + const int64_t buf_size = OB_MAX_TABLE_NAME_BUF_LENGTH; + char buf[buf_size] = {0}; + const ObString &input_index_name = input_index_schema.get_table_name_str(); + for (int64_t idx = 0; OB_SUCC(ret) && idx < index_schemas.count(); ++idx) { + const ObSimpleTableSchemaV2 *index_schema = index_schemas.at(idx); + if (OB_ISNULL(index_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("schema is unexpected null", KR(ret), K(idx), KP(index_schema)); + } else if (is_fts_doc_word_aux(index_schema->get_index_type())) { + if (OB_FAIL(databuff_printf(buf, buf_size, "%.*s_fts_doc_word", input_index_name.length(), input_index_name.ptr()))) { + LOG_WARN("fail to printf fts doc word name str", K(ret), K(input_index_name)); + } else if (0 == index_schema->get_table_name_str().case_compare(buf)) { + doc_word_schema = index_schema; + break; + } + } + } + if (OB_SUCC(ret) && OB_ISNULL(doc_word_schema)) { + ret = OB_ENTRY_NOT_EXIST; + } + return ret; +} + +int ObMajorMergeProgressChecker::prepare_fts_group( + const int64_t table_id, + const ObIArray &index_schemas) +{ + int ret = OB_SUCCESS; + ObFTSGroup fts_group; + fts_group.data_table_id_ = table_id; + for (int64_t idx = 0; OB_SUCC(ret) && idx < index_schemas.count(); ++idx) { + const ObSimpleTableSchemaV2 *index_schema = index_schemas.at(idx); + if (OB_ISNULL(index_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("schema is unexpected null", KR(ret), K(idx), KP(index_schema)); + } else if (is_rowkey_doc_aux(index_schema->get_index_type())) { + if (OB_UNLIKELY(0 != fts_group.rowkey_doc_index_id_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("can't have two rowkey_doc_index_id", KR(ret), K(fts_group)); + } else { + fts_group.rowkey_doc_index_id_ = index_schema->get_table_id(); + } + } else if (is_doc_rowkey_aux(index_schema->get_index_type())) { + if (OB_UNLIKELY(0 != fts_group.doc_rowkey_index_id_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("can't have two doc_rowkey_index_id", KR(ret), K(fts_group)); + } else { + fts_group.doc_rowkey_index_id_ = index_schema->get_table_id(); + } + } else if (is_fts_index_aux(index_schema->get_index_type())) { + const ObSimpleTableSchemaV2 *doc_word_schema = NULL; + if (OB_FAIL(inner_find_doc_word_index(index_schemas, *index_schema, doc_word_schema))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + LOG_INFO("doc word schema of fts index is not found, skip verify", KR(ret), KPC(index_schema)); + } else { + LOG_WARN("failed to find doc word index", KR(ret), K(idx), KPC(index_schema)); + } + } else if (OB_FAIL(fts_group.push_back(ObFTSIndexInfo(index_schema->get_table_id(), doc_word_schema->get_table_id())))) { + LOG_WARN("failed to push doc word index", KR(ret), K(idx), KPC(index_schema), KPC(doc_word_schema)); + } + } + } + if (OB_FAIL(ret) || !fts_group.is_valid()) { + } else if (OB_FAIL(fts_group_array_.push_back(fts_group))) { + LOG_WARN("failed to prepare push fts group", KR(ret), K(fts_group)); + } else { + LOG_INFO("success to prepare fts group", KR(ret), K(fts_group)); + } + return ret; +} + +int ObMajorMergeProgressChecker::handle_fts_checksum() +{ + int ret = OB_SUCCESS; + int64_t tenant_schema_version = 0; + ObSchemaGetterGuard schema_guard(ObSchemaMgrItem::MOD_RS_MAJOR_CHECK); + if (OB_FAIL(schema_service_->get_tenant_refreshed_schema_version(tenant_id_, tenant_schema_version))) { + LOG_WARN("failed to get schema version", K(ret), K_(tenant_id)); + } else if (OB_FAIL(schema_service_->get_tenant_schema_guard( + tenant_id_, schema_guard, tenant_schema_version, OB_INVALID_VERSION, + ObMultiVersionSchemaService::RefreshSchemaMode::FORCE_LAZY))) { + LOG_WARN("fail to get schema guard", KR(ret), K_(tenant_id)); + } else if (OB_FAIL(ckm_validator_.handle_fts_checksum(schema_guard, fts_group_array_))) { + LOG_WARN("failed to handle fts checksum", KR(ret)); + } else { + LOG_INFO("success to handle fts checksum", KR(ret), K_(compaction_scn), K_(progress), K_(fts_group_array)); + fts_group_array_.reuse(); + } + return ret; +} + } // namespace rootserver } // namespace oceanbase diff --git a/src/rootserver/freeze/ob_major_merge_progress_checker.h b/src/rootserver/freeze/ob_major_merge_progress_checker.h index ffec062436..a194749739 100644 --- a/src/rootserver/freeze/ob_major_merge_progress_checker.h +++ b/src/rootserver/freeze/ob_major_merge_progress_checker.h @@ -22,6 +22,7 @@ #include "rootserver/freeze/ob_major_freeze_util.h" #include "rootserver/freeze/ob_major_merge_progress_util.h" #include "share/compaction/ob_schedule_batch_size_mgr.h" +#include "rootserver/freeze/ob_fts_checksum_validate_util.h" namespace oceanbase { @@ -120,6 +121,10 @@ private: bool &is_table_valid, ObIArray &index_schemas); int rebuild_map_by_tablet_cnt(); + int prepare_fts_group( + const int64_t table_id, + const ObIArray &index_schemas); + int handle_fts_checksum(); private: static const int64_t ADD_RS_EVENT_INTERVAL = 10L * 60 * 1000 * 1000; // 10m static const int64_t PRINT_LOG_INTERVAL = 2 * 60 * 1000 * 1000; // 2m @@ -146,6 +151,7 @@ private: compaction::ObTabletStatusMap tablet_status_map_; // record each table compaction/verify status compaction::ObTableCompactionInfoMap table_compaction_map_; // + ObFTSGroupArray fts_group_array_; ObChecksumValidator ckm_validator_; compaction::ObUncompactInfo uncompact_info_; // cache of ls_infos in __all_ls_meta_table diff --git a/src/rootserver/freeze/ob_major_merge_progress_util.cpp b/src/rootserver/freeze/ob_major_merge_progress_util.cpp index 82002e7874..904e9d3797 100644 --- a/src/rootserver/freeze/ob_major_merge_progress_util.cpp +++ b/src/rootserver/freeze/ob_major_merge_progress_util.cpp @@ -28,6 +28,7 @@ ObTableCompactionInfo &ObTableCompactionInfo::operator=(const ObTableCompactionI tablet_cnt_ = other.tablet_cnt_; status_ = other.status_; unfinish_index_cnt_ = other.unfinish_index_cnt_; + need_check_fts_ = other.need_check_fts_; return *this; } @@ -55,7 +56,8 @@ ObTableCompactionInfo::ObTableCompactionInfo() : table_id_(OB_INVALID_ID), tablet_cnt_(0), unfinish_index_cnt_(INVALID_INDEX_CNT), - status_(Status::INITIAL) + status_(Status::INITIAL), + need_check_fts_(false) { } /** diff --git a/src/rootserver/freeze/ob_major_merge_progress_util.h b/src/rootserver/freeze/ob_major_merge_progress_util.h index cce192297c..59ef668c6e 100644 --- a/src/rootserver/freeze/ob_major_merge_progress_util.h +++ b/src/rootserver/freeze/ob_major_merge_progress_util.h @@ -32,7 +32,7 @@ enum ObTabletCompactionStatus struct ObTableCompactionInfo { public: - enum Status + enum Status : uint8_t { INITIAL = 0, // already finished compaction and verified tablet checksum @@ -60,6 +60,7 @@ public: tablet_cnt_ = 0; status_ = Status::INITIAL; unfinish_index_cnt_ = INVALID_INDEX_CNT; + need_check_fts_ = false; } ObTableCompactionInfo &operator=(const ObTableCompactionInfo &other); @@ -80,12 +81,13 @@ public: const int64_t INVALID_INDEX_CNT = -1; bool is_index_table() const { return INVALID_INDEX_CNT == unfinish_index_cnt_; } - TO_STRING_KV(K_(table_id), K_(tablet_cnt), "status", status_to_str(status_), K_(unfinish_index_cnt)); + TO_STRING_KV(K_(table_id), K_(tablet_cnt), "status", status_to_str(status_), K_(unfinish_index_cnt), K_(need_check_fts)); public: uint64_t table_id_; int64_t tablet_cnt_; int64_t unfinish_index_cnt_; // accurate for main table, record cnt of unfinish index_table Status status_; + bool need_check_fts_; }; struct ObMergeProgress diff --git a/src/rootserver/ob_ddl_operator.cpp b/src/rootserver/ob_ddl_operator.cpp index 6def26e7da..13cfbcece3 100644 --- a/src/rootserver/ob_ddl_operator.cpp +++ b/src/rootserver/ob_ddl_operator.cpp @@ -3525,15 +3525,10 @@ int ObDDLOperator::alter_table_rename_index( schema::ObTableSchema &new_index_table_schema) { int ret = OB_SUCCESS; - ObSchemaService *schema_service = schema_service_.get_schema_service(); ObSchemaGetterGuard schema_guard; - if (OB_ISNULL(schema_service)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("schema_service is NULL", K(ret)); - } else if (OB_FAIL(schema_service_.get_tenant_schema_guard(tenant_id, schema_guard))) { + if (OB_FAIL(schema_service_.get_tenant_schema_guard(tenant_id, schema_guard))) { LOG_WARN("failed to get schema guard", K(ret)); } else { - int64_t new_schema_version = OB_INVALID_VERSION; RS_LOG(INFO, "start alter table rename index", K(rename_index_arg)); const ObTableSchema *index_table_schema = NULL; ObString index_table_name; @@ -3560,40 +3555,99 @@ int ObDDLOperator::alter_table_rename_index( is_index, index_table_schema))) { LOG_WARN("fail to get table schema", K(ret), K(tenant_id), K(database_id), K(index_table_schema)); - } else if (OB_UNLIKELY(NULL == index_table_schema)) { - ret = OB_ERR_UNEXPECTED; - RS_LOG(WARN, "get index table schema failed", K(tenant_id), K(database_id), K(index_table_name), K(ret)); - } else if (index_table_schema->is_in_recyclebin()) { - ret = OB_ERR_OPERATION_ON_RECYCLE_OBJECT; - LOG_WARN("index table is in recyclebin", K(ret)); - } else if (OB_FAIL(schema_service_.gen_new_schema_version(tenant_id, new_schema_version))) { - LOG_WARN("fail to gen new schema_version", K(ret), K(tenant_id)); - } else { - if (OB_FAIL(new_index_table_schema.assign(*index_table_schema))) { - LOG_WARN("fail to assign schema", K(ret)); - } else { - new_index_table_schema.set_schema_version(new_schema_version); - if (nullptr != new_index_status) { - new_index_table_schema.set_index_status(*new_index_status); - } - new_index_table_schema.set_name_generated_type(GENERATED_TYPE_USER); - } - if (OB_FAIL(ret)) { - } else if (OB_FAIL(new_index_table_schema.set_table_name(new_index_table_name))) { - RS_LOG(WARN, "failed to set new table name!", K(new_index_table_schema), K(ret)); - } else if (OB_FAIL(schema_service->get_table_sql_service().update_table_options( - trans, - *index_table_schema, - new_index_table_schema, - index_table_schema->is_global_index_table() ? OB_DDL_RENAME_GLOBAL_INDEX: OB_DDL_RENAME_INDEX))) { - RS_LOG(WARN, "schema service update_table_options failed", K(*index_table_schema), K(ret)); - } + } else if (OB_FAIL(inner_alter_table_rename_index_(tenant_id, index_table_schema, new_index_table_name, + new_index_status, trans, new_index_table_schema))) { + LOG_WARN("fail to alter table rename index", K(ret), K(tenant_id), KPC(index_table_schema), + K(new_index_table_name)); } } } return ret; } +int ObDDLOperator::alter_table_rename_index_with_origin_index_name( + const uint64_t tenant_id, + const uint64_t index_table_id, + const ObString &new_index_name, // Attention!!! origin index name, don't use table name. For example, __idx_500005_{index_name}, please using index_name!!! + const ObIndexStatus &new_index_status, + common::ObMySQLTransaction &trans, + share::schema::ObTableSchema &new_index_table_schema) +{ + int ret = OB_SUCCESS; + ObArenaAllocator allocator(ObModIds::OB_SCHEMA); + ObString new_index_table_name; + ObSchemaGetterGuard schema_guard; + const ObTableSchema *index_table_schema = nullptr; + RS_LOG(INFO, "start alter table rename index", K(tenant_id), K(index_table_id), K(new_index_name)); + if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id || OB_INVALID_ID == index_table_id || new_index_name.empty())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(tenant_id), K(index_table_id), K(new_index_name)); + } else if (OB_FAIL(schema_service_.get_tenant_schema_guard(tenant_id, schema_guard))) { + LOG_WARN("fail to get schema guard", K(ret)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, index_table_id, index_table_schema))) { + LOG_WARN("fail to get table schema", K(ret), K(tenant_id), K(index_table_id)); + } else if (OB_ISNULL(index_table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unecpected error, index table schema is nullptr", K(ret), K(index_table_id)); + } else if (OB_FAIL(ObTableSchema::build_index_table_name(allocator, + index_table_schema->get_data_table_id(), + new_index_name, + new_index_table_name))) { + LOG_WARN("fail to build new index name", K(ret), K(new_index_name), KPC(index_table_schema)); + } else if (OB_FAIL(inner_alter_table_rename_index_(tenant_id, index_table_schema, new_index_table_name, &new_index_status, + trans, new_index_table_schema))) { + LOG_WARN("fail to alter table rename index", K(ret), K(tenant_id), KPC(index_table_schema), + K(new_index_table_name), K(new_index_status)); + } + return ret; +} + +int ObDDLOperator::inner_alter_table_rename_index_( + const uint64_t tenant_id, + const share::schema::ObTableSchema *index_table_schema, + const ObString &new_index_name, + const ObIndexStatus *new_index_status, + common::ObMySQLTransaction &trans, + share::schema::ObTableSchema &new_index_table_schema) +{ + int ret = OB_SUCCESS; + int64_t new_schema_version = OB_INVALID_VERSION; + ObSchemaService *schema_service = schema_service_.get_schema_service(); + if (OB_ISNULL(schema_service)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("schema_service is NULL", K(ret)); + } else if (OB_ISNULL(index_table_schema) + || OB_UNLIKELY(new_index_name.empty()) + || OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KP(index_table_schema), KP(new_index_status), K(new_index_name), + K(tenant_id)); + } else if (index_table_schema->is_in_recyclebin()) { + ret = OB_ERR_OPERATION_ON_RECYCLE_OBJECT; + LOG_WARN("index table is in recyclebin", K(ret)); + } else if (OB_FAIL(schema_service_.gen_new_schema_version(tenant_id, new_schema_version))) { + LOG_WARN("fail to gen new schema_version", K(ret), K(tenant_id)); + } else if (OB_FAIL(new_index_table_schema.assign(*index_table_schema))) { + LOG_WARN("fail to assign schema", K(ret)); + } else { + new_index_table_schema.set_schema_version(new_schema_version); + if (nullptr != new_index_status) { + new_index_table_schema.set_index_status(*new_index_status); + } + new_index_table_schema.set_name_generated_type(GENERATED_TYPE_USER); + if (OB_FAIL(new_index_table_schema.set_table_name(new_index_name))) { + RS_LOG(WARN, "failed to set new table name!", K(new_index_table_schema), K(ret)); + } else if (OB_FAIL(schema_service->get_table_sql_service().update_table_options( + trans, + *index_table_schema, + new_index_table_schema, + index_table_schema->is_global_index_table() ? OB_DDL_RENAME_GLOBAL_INDEX: OB_DDL_RENAME_INDEX))) { + RS_LOG(WARN, "schema service update_table_options failed", K(*index_table_schema), K(ret)); + } + } + return ret; +} + int ObDDLOperator::alter_index_table_parallel( const uint64_t tenant_id, const uint64_t data_table_id, @@ -10094,7 +10148,6 @@ int ObDDLOperator::drop_inner_generated_index_column(ObMySQLTransaction &trans, const ObColumnSchemaV2 *index_col = NULL; const uint64_t tenant_id = index_schema.get_tenant_id(); uint64_t data_table_id = index_schema.get_data_table_id(); - const ObIndexInfo &index_info = index_schema.get_index_info(); if (OB_FAIL(schema_guard.get_table_schema(tenant_id, data_table_id, data_table))) { LOG_WARN("get table schema failed", KR(ret), K(tenant_id), K(data_table_id)); } else if (OB_ISNULL(data_table)) { @@ -10112,14 +10165,22 @@ int ObDDLOperator::drop_inner_generated_index_column(ObMySQLTransaction &trans, } else { new_data_table_schema.set_in_offline_ddl_white_list(index_schema.get_in_offline_ddl_white_list()); } - for (int64_t i = 0; OB_SUCC(ret) && i < index_info.get_size(); ++i) { + for (ObTableSchema::const_column_iterator iter = index_schema.column_begin(); + OB_SUCC(ret) && iter != index_schema.column_end(); + ++iter) { + ObColumnSchemaV2 *column_schema = (*iter); + if (OB_ISNULL(column_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, column schema is nullptr", K(ret), KPC(column_schema), K(index_schema)); + } else if (OB_UNLIKELY(is_shadow_column(column_schema->get_column_id()))) { + continue;// skip the shadow rowkeys for unique index. // Generated columns on index table are converted to normal column, // we need to get column schema from data table here. - if (OB_ISNULL(index_col = data_table->get_column_schema( - tenant_id, index_info.get_column(i)->column_id_))) { + } else if (OB_ISNULL(index_col = data_table->get_column_schema( + tenant_id, column_schema->get_column_id()))) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("get index column schema failed", K(ret), K(tenant_id), K(index_info)); - } else if (index_col->is_hidden() && index_col->is_generated_column()) { + LOG_WARN("get index column schema failed", K(ret), K(tenant_id), KPC(column_schema)); + } else if (index_col->is_hidden() && index_col->is_generated_column() && !index_col->is_rowkey_column()) { // delete the generated column generated internally when the index is created, // This kind of generated column is hidden. // delete generated column in data table for spatial index diff --git a/src/rootserver/ob_ddl_operator.h b/src/rootserver/ob_ddl_operator.h index fa2e1c192f..2a6c381e89 100644 --- a/src/rootserver/ob_ddl_operator.h +++ b/src/rootserver/ob_ddl_operator.h @@ -362,6 +362,14 @@ public: const ObIndexStatus *new_index_status, common::ObMySQLTransaction &trans, share::schema::ObTableSchema &new_index_table_schema); + int alter_table_rename_index_with_origin_index_name( + const uint64_t tenant_id, + const uint64_t index_table_id, + const ObString &new_index_name, // Attention!!! origin index name, don't use table name. For example, __idx_500005_{index_name}, please using index_name!!! + const ObIndexStatus &new_index_status, + common::ObMySQLTransaction &trans, + share::schema::ObTableSchema &new_index_table_schema); + virtual int alter_index_table_parallel(const uint64_t tenant_id, const uint64_t data_table_id, const uint64_t database_id, @@ -1145,6 +1153,14 @@ private: int64_t routine_id); private: + int inner_alter_table_rename_index_( + const uint64_t tenant_id, + const share::schema::ObTableSchema *index_table_schema, + const ObString &new_index_table_name, + const ObIndexStatus *new_index_status, + common::ObMySQLTransaction &trans, + share::schema::ObTableSchema &new_index_table_schema); + int drop_fk_cascade( uint64_t tenant_id, share::schema::ObSchemaGetterGuard &schema_guard, diff --git a/src/rootserver/ob_ddl_service.cpp b/src/rootserver/ob_ddl_service.cpp index 6745a5de0d..81b2c4b15d 100755 --- a/src/rootserver/ob_ddl_service.cpp +++ b/src/rootserver/ob_ddl_service.cpp @@ -43,6 +43,7 @@ #include "share/ob_primary_zone_util.h" #include "share/ob_replica_info.h" #include "share/ob_index_builder_util.h" +#include "share/ob_fts_index_builder_util.h" #include "share/sequence/ob_sequence_ddl_proxy.h" #include "share/ob_schema_status_proxy.h" #include "share/ob_tenant_mgr.h" @@ -6344,7 +6345,7 @@ int ObDDLService::check_index_on_foreign_key(const ObTableSchema *index_table_sc return ret; } -int ObDDLService::alter_table_index(const obrpc::ObAlterTableArg &alter_table_arg, +int ObDDLService::alter_table_index(obrpc::ObAlterTableArg &alter_table_arg, const ObTableSchema &origin_table_schema, ObTableSchema &new_table_schema, ObSchemaGetterGuard &schema_guard, @@ -6357,7 +6358,7 @@ int ObDDLService::alter_table_index(const obrpc::ObAlterTableArg &alter_table_ar { int ret = OB_SUCCESS; ObIndexBuilder index_builder(*this); - const ObSArray &index_arg_list = alter_table_arg.index_arg_list_; + ObSArray &index_arg_list = alter_table_arg.index_arg_list_; common::ObArray drop_parent_table_mock_foreign_key_infos_array; ObIArray &ddl_res_array = res.ddl_res_array_; // To many hashset will fill up the stack, construct them on heap instead @@ -6368,18 +6369,21 @@ int ObDDLService::alter_table_index(const obrpc::ObAlterTableArg &alter_table_ar HEAP_VAR(AlterIndexNameHashSet, alter_index_name_set) { int64_t index_count = new_table_schema.get_index_tid_count(); for (int64_t i = 0; OB_SUCC(ret) && i < index_arg_list.size(); ++i) { - ObIndexArg *index_arg = const_cast(index_arg_list.at(i)); + ObIndexArg *index_arg = index_arg_list.at(i); if (OB_ISNULL(index_arg)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("index arg should not be null", K(ret)); } else { if (index_arg->index_action_type_ == ObIndexArg::ADD_INDEX) { - if (OB_MAX_INDEX_PER_TABLE <= index_count) { + ObCreateIndexArg *create_index_arg = static_cast(index_arg); + uint64_t tenant_data_version = 0; + if (OB_FAIL(GET_MIN_DATA_VERSION(create_index_arg->tenant_id_, tenant_data_version))) { + LOG_WARN("get min data version failed", K(ret), KPC(create_index_arg)); + } else if (OB_MAX_INDEX_PER_TABLE <= index_count) { ret = OB_ERR_TOO_MANY_KEYS; LOG_USER_ERROR(OB_ERR_TOO_MANY_KEYS, OB_MAX_INDEX_PER_TABLE); LOG_WARN("too many index for table!", K(index_count), K(OB_MAX_INDEX_PER_TABLE)); } - ObCreateIndexArg *create_index_arg = static_cast(index_arg); if (!new_table_schema.is_partitioned_table() && !create_index_arg->index_schema_.is_partitioned_table()) { if (INDEX_TYPE_NORMAL_GLOBAL == create_index_arg->index_type_) { @@ -6404,7 +6408,8 @@ int ObDDLService::alter_table_index(const obrpc::ObAlterTableArg &alter_table_ar continue; } } - if (create_index_arg->index_name_.empty()) { + if (OB_FAIL(ret)) { + } else if (create_index_arg->index_name_.empty()) { if (OB_FAIL(generate_index_name(*create_index_arg, new_table_schema, add_index_name_set, @@ -6468,7 +6473,8 @@ int ObDDLService::alter_table_index(const obrpc::ObAlterTableArg &alter_table_ar // 2. In addition, for the case where the primary_zone is random, // currently the leader_coordinator cannot guarantee that the results of // the random breakup of the primary table and the global index table primary_zone are consistent. - if (OB_FAIL(index_schema.assign(create_index_arg->index_schema_))) { + if (OB_FAIL(ret)) { + } else if (OB_FAIL(index_schema.assign(create_index_arg->index_schema_))) { LOG_WARN("fail to assign schema", K(ret)); } else if (FALSE_IT(index_schema.set_tenant_id(origin_table_schema.get_tenant_id()))) { } @@ -6515,7 +6521,6 @@ int ObDDLService::alter_table_index(const obrpc::ObAlterTableArg &alter_table_ar drop_index_arg->tenant_id_ = origin_table_schema.get_tenant_id(); const ObString &index_name = drop_index_arg->index_name_; ObIndexNameHashWrapper index_key(index_name); - if (OB_HASH_EXIST == drop_index_name_set.exist_refactored(index_key)) { //already drop in the same alter table clause ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; @@ -6560,8 +6565,10 @@ int ObDDLService::alter_table_index(const obrpc::ObAlterTableArg &alter_table_ar } else if (drop_index_arg->is_add_to_scheduler_) { ObDDLRes ddl_res; ObDDLTaskRecord task_record; + const bool is_inner_and_domain_index = drop_index_arg->is_inner_ && index_table_schema->is_fts_or_multivalue_index(); bool has_index_task = false; - SMART_VAR(ObTableSchema, new_index_schema) { + typedef common::ObSEArray TableSchemaArray; + SMART_VAR(TableSchemaArray, new_index_schemas) { if (!drop_index_arg->is_inner_ && !index_table_schema->can_read_index() && OB_FAIL(ObDDLTaskRecordOperator::check_has_index_task( trans, origin_table_schema.get_tenant_id(), origin_table_schema.get_table_id(), index_table_schema->get_table_id(), has_index_task))) { LOG_WARN("failed to check ddl conflict", K(ret)); @@ -6571,23 +6578,36 @@ int ObDDLService::alter_table_index(const obrpc::ObAlterTableArg &alter_table_ar LOG_USER_ERROR(OB_NOT_SUPPORTED, "dropping a building or dropping index is"); } else if (OB_FAIL(rename_dropping_index_name(origin_table_schema.get_table_id(), origin_table_schema.get_database_id(), + is_inner_and_domain_index, *drop_index_arg, schema_guard, ddl_operator, trans, - new_index_schema))) { + new_index_schemas))) { LOG_WARN("submit drop index arg failed", K(ret)); - } else if (OB_FAIL(index_builder.submit_drop_index_task(trans, origin_table_schema, *index_table_schema, - new_index_schema.get_schema_version(), *drop_index_arg, allocator, task_record))) { - LOG_WARN("failed to submit drop index task", K(ret)); + } else if (OB_UNLIKELY(!index_table_schema->is_fts_or_multivalue_index() && new_index_schemas.count() != 1) + || OB_UNLIKELY(index_table_schema->is_fts_index_aux() && new_index_schemas.count() != 4) + || OB_UNLIKELY(index_table_schema->is_multivalue_index_aux() && new_index_schemas.count() != 3)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, invalid new index schema count", K(ret), "count", + new_index_schemas.count(), "is fts index", index_table_schema->is_fts_index_aux(), + "is multivalue index", index_table_schema->is_multivalue_index_aux(), + K(new_index_schemas)); } else { - ddl_res.task_id_ = task_record.task_id_; - ddl_res.tenant_id_ = new_index_schema.get_tenant_id(); - ddl_res.schema_id_ = new_index_schema.get_table_id(); - if (OB_FAIL(ddl_tasks.push_back(task_record))) { - LOG_WARN("push back ddl task failed", K(ret)); - } else if (OB_FAIL(ddl_res_array.push_back(ddl_res))) { - LOG_WARN("push back ddl res array failed", K(ret)); + const ObTableSchema &new_index_schema = new_index_schemas.at(new_index_schemas.count() - 1); + bool has_exist = false; + if (OB_FAIL(index_builder.submit_drop_index_task(trans, origin_table_schema, new_index_schemas, + *drop_index_arg, allocator, has_exist, task_record))) { + LOG_WARN("failed to submit drop index task", K(ret)); + } else { + ddl_res.task_id_ = task_record.task_id_; + ddl_res.tenant_id_ = new_index_schema.get_tenant_id(); + ddl_res.schema_id_ = new_index_schema.get_table_id(); + if (OB_FAIL(ddl_tasks.push_back(task_record))) { + LOG_WARN("push back ddl task failed", K(ret)); + } else if (OB_FAIL(ddl_res_array.push_back(ddl_res))) { + LOG_WARN("push back ddl res array failed", K(ret)); + } } } } @@ -7423,16 +7443,48 @@ int ObDDLService::get_index_schema_by_name( return ret; } +int ObDDLService::get_valid_index_schema_by_id_for_drop_index_( + const uint64_t data_table_id, + const ObDropIndexArg &drop_index_arg, + ObSchemaGetterGuard &schema_guard, + const ObTableSchema *&index_table_schema) +{ + int ret = OB_SUCCESS; + const uint64_t tenant_id = drop_index_arg.tenant_id_; + // drop_index_arg.index_name_ is specified by user. drop_index_arg.index_name_ may be + // not matched with drop_index_arg.index_table_id_. For example: Drop FTS index need + // to drop all built-in FTS index tables. One index_name correspond to multiple index + // tables. So, index_name may be not matched with index_table_id + const uint64_t table_id = drop_index_arg.index_table_id_; + const ObString index_name = drop_index_arg.index_name_; + index_table_schema = nullptr; + if (OB_UNLIKELY(OB_INVALID_ID == data_table_id || !drop_index_arg.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(data_table_id), K(drop_index_arg)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, table_id, index_table_schema))) { + LOG_WARN("fail to get table schema", K(ret), K(tenant_id), K(table_id), K(index_table_schema)); + } else if (nullptr == index_table_schema) { + ret = OB_ERR_CANT_DROP_FIELD_OR_KEY; + LOG_USER_ERROR(OB_ERR_CANT_DROP_FIELD_OR_KEY, index_name.length(), index_name.ptr()); + LOG_WARN("get index table schema failed", K(tenant_id), K(data_table_id), K(ret)); + } else if (index_table_schema->is_in_recyclebin()) { + ret = OB_ERR_OPERATION_ON_RECYCLE_OBJECT; + LOG_WARN("index table is in recyclebin", K(ret), K(data_table_id), K(drop_index_arg)); + } + return ret; +} + // To avoid ddl hung when drop and add index with same index-name in single stmt, // should rename dropping index firstly, and then push it into ddl scheduler queue. int ObDDLService::rename_dropping_index_name( const uint64_t data_table_id, const uint64_t database_id, + const bool is_inner_and_domain_index, const ObDropIndexArg &drop_index_arg, ObSchemaGetterGuard &schema_guard, ObDDLOperator &ddl_operator, common::ObMySQLTransaction &trans, - share::schema::ObTableSchema &new_index_schema) + common::ObIArray &new_index_schemas) { int ret = OB_SUCCESS; const ObTableSchema *index_table_schema = nullptr; @@ -7440,33 +7492,169 @@ int ObDDLService::rename_dropping_index_name( const int64_t buf_size = number::ObNumber::MAX_PRINTABLE_SIZE; char buf[buf_size] = {0}; ObString index_name = drop_index_arg.index_name_; - if (OB_FAIL(get_index_schema_by_name(data_table_id, database_id, drop_index_arg, - schema_guard, index_table_schema))) { - LOG_WARN("get index schema by name", K(ret), K(data_table_id), K(database_id)); + bool need_rename = true; + // When dropping fts index, one index_name corresponds to multiple index tables. We can not decide + // right index table by index_name. To handle multiple FTS built-in index tables, inner DDL task + // will drop these built-in index tables by drop_index_arg.index_table_id_. + if (!is_inner_and_domain_index) { + if (OB_FAIL(get_index_schema_by_name(data_table_id, database_id, drop_index_arg, + schema_guard, index_table_schema))) { + LOG_WARN("get index schema by name", K(ret), K(data_table_id), K(database_id)); + } + } else if (OB_FAIL(get_valid_index_schema_by_id_for_drop_index_(data_table_id, drop_index_arg, schema_guard, + index_table_schema))) { + LOG_WARN("fail to get valid index schema by id for drop index", K(ret), K(data_table_id), K(drop_index_arg)); + } else if (OB_ISNULL(index_table_schema)) { + // FTS or multi-value index and inner rpc, if its table schema does not exist, just skip rename and return success. + need_rename = false; + } else { + ObArenaAllocator allocator(ObModIds::OB_SCHEMA); + ObString cur_index_name; + if (OB_FAIL(index_table_schema->get_index_name(cur_index_name))) { + LOG_WARN("build_index_table_name failed", K(ret), K(data_table_id), KPC(index_table_schema)); + } else if (0 != index_name.case_compare(cur_index_name)) { + // FTS index and inner rpc, it has been renamed, just skip rename and return success. + need_rename = false; + } + } + if (OB_FAIL(ret) || !need_rename) { + } else if (OB_ISNULL(index_table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, index table schema is nullptr", K(ret), KP(index_table_schema), K(data_table_id), + K(drop_index_arg)); + } else if (!drop_index_arg.is_inner_ && index_table_schema->is_fts_index_aux()) { + // This task is the parent task of drop fts index, no need to rename. + if (OB_FAIL(get_dropping_domain_index_invisiable_aux_table_schema(index_table_schema->get_tenant_id(), data_table_id, + index_table_schema->get_table_id(), true, index_table_schema->get_table_name_str(), schema_guard, ddl_operator, + trans, new_index_schemas))) { + LOG_WARN("fail to get dropping fts aux table schema", K(ret), K(data_table_id), K(index_table_schema)); + } else if (OB_FAIL(new_index_schemas.push_back(*index_table_schema))) { + LOG_WARN("fail to push back index schema", K(ret), KPC(index_table_schema)); + } + } else if (!drop_index_arg.is_inner_ && index_table_schema->is_multivalue_index_aux()) { + if (OB_FAIL(get_dropping_domain_index_invisiable_aux_table_schema(index_table_schema->get_tenant_id(), data_table_id, + index_table_schema->get_table_id(), false, index_table_schema->get_table_name_str(), schema_guard, ddl_operator, + trans, new_index_schemas))) { + LOG_WARN("fail to get dropping fts aux table schema", K(ret), K(data_table_id), K(index_table_schema)); + } else if (OB_FAIL(new_index_schemas.push_back(*index_table_schema))) { + LOG_WARN("fail to push back index schema", K(ret), KPC(index_table_schema)); + } } else if ((nwrite = snprintf(buf, buf_size, "%s_%lu", "DELETING", ObTimeUtility::current_time())) >= buf_size || nwrite < 0) { ret = common::OB_BUF_NOT_ENOUGH; LOG_WARN("buf is not large enough", K(ret), K(buf_size)); } else { - const ObIndexStatus new_index_status = INDEX_STATUS_UNAVAILABLE; - ObString new_index_name = ObString::make_string(buf); - obrpc::ObRenameIndexArg rename_index_arg; - rename_index_arg.tenant_id_ = index_table_schema->get_tenant_id(); - rename_index_arg.origin_index_name_ = index_name; - rename_index_arg.new_index_name_ = new_index_name; - if (OB_FAIL(ddl_operator.alter_table_rename_index(index_table_schema->get_tenant_id(), - index_table_schema->get_data_table_id(), - index_table_schema->get_database_id(), - rename_index_arg, - &new_index_status, - trans, - new_index_schema))) { - LOG_WARN("rename index failed", K(ret)); + SMART_VAR(ObTableSchema, new_index_schema) { + const ObIndexStatus new_index_status = INDEX_STATUS_UNAVAILABLE; + ObString new_index_name = ObString::make_string(buf); + obrpc::ObRenameIndexArg rename_index_arg; + rename_index_arg.tenant_id_ = index_table_schema->get_tenant_id(); + rename_index_arg.origin_index_name_ = index_name; + rename_index_arg.new_index_name_ = new_index_name; + if (OB_INVALID_ID != drop_index_arg.index_table_id_) { + if (OB_FAIL(ddl_operator.alter_table_rename_index_with_origin_index_name(index_table_schema->get_tenant_id(), + index_table_schema->get_table_id(), + new_index_name, + new_index_status, + trans, + new_index_schema))) { + LOG_WARN("fail to alter table rename index", K(ret)); + } + } else if (OB_FAIL(ddl_operator.alter_table_rename_index(index_table_schema->get_tenant_id(), + index_table_schema->get_data_table_id(), + index_table_schema->get_database_id(), + rename_index_arg, + &new_index_status, + trans, + new_index_schema))) { + LOG_WARN("rename index failed", K(ret)); + } + if (FAILEDx(new_index_schemas.push_back(new_index_schema))) { + LOG_WARN("fail to push back new index schemas", K(ret), K(new_index_schema)); + } } } return ret; } +int ObDDLService::get_dropping_domain_index_invisiable_aux_table_schema( + const uint64_t tenant_id, + const uint64_t data_table_id, + const uint64_t index_table_id, + const bool is_fts_index, + const ObString &index_name, + share::schema::ObSchemaGetterGuard &schema_guard, + ObDDLOperator &ddl_operator, + common::ObMySQLTransaction &trans, + common::ObIArray &new_aux_schemas) +{ + int ret = OB_SUCCESS; + const share::schema::ObTableSchema *data_table_schema = nullptr; + ObSEArray indexs; + if (OB_UNLIKELY(OB_INVALID_ID == data_table_id + || OB_INVALID_ID == index_table_id + || OB_INVALID_TENANT_ID == tenant_id + || index_name.empty())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(data_table_id), K(index_table_id), K(tenant_id), K(index_name)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, data_table_id, data_table_schema))) { + LOG_WARN("fail to get index schema with data table id", K(ret), K(tenant_id), K(data_table_id)); + } else if (OB_ISNULL(data_table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, data table schema is nullptr", K(ret), KP(data_table_schema)); + } else { + SMART_VAR(ObTableSchema, new_aux_schema) { + const ObIArray &indexs = data_table_schema->get_simple_index_infos(); + const share::schema::ObTableSchema *doc_word_schema = nullptr; + const share::schema::ObTableSchema *rowkey_doc_schema = nullptr; + const share::schema::ObTableSchema *doc_rowkey_schema = nullptr; + + for (int64_t i = 0; OB_SUCC(ret) && i < indexs.count(); ++i) { + const share::schema::ObAuxTableMetaInfo &info = indexs.at(i); + if (share::schema::is_rowkey_doc_aux(info.index_type_)) { + if (OB_FAIL(schema_guard.get_table_schema(tenant_id, info.table_id_, rowkey_doc_schema))) { + LOG_WARN("fail to get rowkey doc table schema", K(ret), K(tenant_id), K(info)); + } else if (OB_ISNULL(rowkey_doc_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpeted error, rowkey doc schema is nullptr", K(ret), K(info)); + } else if (OB_FAIL(new_aux_schemas.push_back(*rowkey_doc_schema))) { + LOG_WARN("fail to push doc rowkey table schema", K(ret), KPC(rowkey_doc_schema)); + } + } else if (share::schema::is_doc_rowkey_aux(info.index_type_)) { + if (OB_FAIL(schema_guard.get_table_schema(tenant_id, info.table_id_, doc_rowkey_schema))) { + LOG_WARN("fail to get doc rowkey table schema", K(ret), K(tenant_id), K(info)); + } else if (OB_ISNULL(doc_rowkey_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, doc rowkey schema is nullptr", K(ret), K(info)); + } else if (OB_FAIL(new_aux_schemas.push_back(*doc_rowkey_schema))) { + LOG_WARN("fail to push doc rowkey table schema", K(ret), KPC(doc_rowkey_schema)); + } + } else if (is_fts_index && share::schema::is_fts_doc_word_aux(info.index_type_)){ + if (OB_FAIL(schema_guard.get_table_schema(tenant_id, info.table_id_, doc_word_schema))) { + LOG_WARN("fail to get doc word table schema", K(ret), K(tenant_id), K(info)); + } else if (OB_ISNULL(doc_word_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, doc word schema is nullptr", K(ret), K(info)); + } else { + int nwrite = 0; + const int64_t buf_size = OB_MAX_TABLE_NAME_BUF_LENGTH; + char buf[buf_size] = {0}; + if (OB_FAIL(databuff_printf(buf, buf_size, "%.*s_fts_doc_word", index_name.length(), index_name.ptr()))) { + LOG_WARN("fail to printf fts doc word name str", K(ret), K(index_name)); + } else if (0 == doc_word_schema->get_table_name_str().case_compare(buf)) { + if (OB_FAIL(new_aux_schemas.push_back(*doc_word_schema))) { + LOG_WARN("fail to push doc word table schema", K(ret), KPC(doc_word_schema)); + } + } + } + } + } + } + STORAGE_FTS_LOG(INFO, "get dropping fts aux table name", K(ret), K(tenant_id), K(data_table_id), K(index_table_id)); + } + return ret; +} + int ObDDLService::generate_tmp_idx_schemas( const ObTableSchema &new_table_schema, ObIArray &idx_schemas, @@ -12571,8 +12759,11 @@ int ObDDLService::alter_table_in_trans(obrpc::ObAlterTableArg &alter_table_arg, || ObIndexArg::REBUILD_INDEX == index_arg->index_action_type_) { ObCreateIndexArg *create_index_arg = static_cast(index_arg); ObTableSchema &index_schema = create_index_arg->index_schema_; - if (INDEX_TYPE_PRIMARY == create_index_arg->index_type_) { - // do nothing + if (INDEX_TYPE_PRIMARY == create_index_arg->index_type_ || + is_fts_index(create_index_arg->index_type_) || + is_multivalue_index(create_index_arg->index_type_)) { + // TODO hanxuan tempory bypass sumbit build fulltext index task + // TODO yunyi tempory bypass sumbit build multi value index task } else { ObArray inc_tablet_ids; ObArray del_tablet_ids; @@ -12907,6 +13098,7 @@ int ObDDLService::check_is_offline_ddl(ObAlterTableArg &alter_table_arg, } if (OB_SUCC(ret) && is_double_table_long_running_ddl(ddl_type)) { bool has_index_operation = false; + bool has_fts_index = false; bool is_adding_constraint = false; bool is_column_store = false; uint64_t table_id = alter_table_arg.alter_table_schema_.get_table_id(); @@ -12923,11 +13115,19 @@ int ObDDLService::check_is_offline_ddl(ObAlterTableArg &alter_table_arg, table_id, has_index_operation))) { LOG_WARN("check has index operation failed", K(ret)); + } else if (OB_FAIL(check_has_fts_index(schema_guard, + tenant_id, + table_id, + has_fts_index))) { + LOG_WARN("check has fts index failed", K(ret)); } else if (OB_FAIL(check_is_adding_constraint(tenant_id, table_id, is_adding_constraint))) { LOG_WARN("failed to call check_is_adding_constraint", K(ret)); } else if (has_index_operation) { ret = OB_NOT_SUPPORTED; LOG_USER_ERROR(OB_NOT_SUPPORTED, "The DDL cannot be run concurrently with creating index."); + } else if (has_fts_index) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "Run this DDL operation on table with fulltext search index"); } else if (is_adding_constraint) { ret = OB_NOT_SUPPORTED; LOG_USER_ERROR(OB_NOT_SUPPORTED, "The DDL cannot be run concurrently with adding constraint."); @@ -12937,6 +13137,40 @@ int ObDDLService::check_is_offline_ddl(ObAlterTableArg &alter_table_arg, return ret; } +int ObDDLService::check_has_fts_index( + ObSchemaGetterGuard &schema_guard, + const uint64_t tenant_id, + const uint64_t data_table_id, + bool &fts_exist) +{ + int ret = OB_SUCCESS; + fts_exist = false; + ObRootService *root_service = GCTX.root_service_; + const ObTableSchema *table_schema = nullptr; + if (OB_ISNULL(root_service)) { + ret = OB_ERR_SYS; + LOG_WARN("error sys, root service must not be nullptr", K(ret)); + } else if (OB_FAIL(root_service->get_ddl_service().get_tenant_schema_guard_with_version_in_inner_table(tenant_id, schema_guard))) { + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, data_table_id, table_schema))) { + LOG_WARN("get table schema failed", K(ret), K(tenant_id), K(data_table_id)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("error unexpected, table schema must not be nullptr", K(ret), K(data_table_id)); + } else { + const common::ObIArray &index_infos = table_schema->get_simple_index_infos(); + if (index_infos.count() > 0) { + // if there is indexes in new tables, if so, the indexes is already rebuilt in new table + for (int64_t i = 0; OB_SUCC(ret) && i < index_infos.count(); ++i) { + if (share::schema::is_doc_rowkey_aux(index_infos.at(i).index_type_)) { + fts_exist = true; + break; + } + } + } + } + return ret; +} + // check whether there is index operation, including add index and drop index. int ObDDLService::check_has_index_operation(ObSchemaGetterGuard &schema_guard, const uint64_t tenant_id, @@ -14152,6 +14386,19 @@ int ObDDLService::check_alter_partitions(const ObTableSchema &orig_table_schema, LOG_WARN("split partition in 4.0 not allowed", K(ret), K(tablegroup_id)); LOG_USER_ERROR(OB_OP_NOT_ALLOW, "split partition in 4.0"); } + bool has_fts_index = false; + const int64_t table_id = orig_table_schema.get_table_id(); + if (OB_FAIL(ret)) { + } else if (OB_FAIL(check_has_fts_index(schema_guard, + tenant_id, + table_id, + has_fts_index))) { + LOG_WARN("failed to check if have fts index", K(ret), K(table_id)); + } else if (has_fts_index) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("alter partition operation on table with fts index not supported", K(ret), K(orig_table_schema)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "alter partition operation on table with fts index"); + } if (OB_FAIL(ret)) { } else if (obrpc::ObAlterTableArg::TRUNCATE_PARTITION == alter_part_type) { @@ -16827,7 +17074,7 @@ int ObDDLService::gen_hidden_index_schema_columns(const ObTableSchema &orig_inde create_index_arg.index_type_ = index_schema.get_index_type(); if (INDEX_TYPE_NORMAL_LOCAL == create_index_arg.index_type_ || INDEX_TYPE_UNIQUE_LOCAL == create_index_arg.index_type_ - || INDEX_TYPE_DOMAIN_CTXCAT == create_index_arg.index_type_) { + || INDEX_TYPE_DOMAIN_CTXCAT_DEPRECATED == create_index_arg.index_type_) { if (OB_FAIL(sql::ObResolverUtils::check_unique_index_cover_partition_column( new_table_schema, create_index_arg))) { LOG_WARN("fail to check unique key cover partition column", K(ret)); @@ -22747,14 +22994,20 @@ int ObDDLService::check_table_exists(const uint64_t tenant_id, } else { is_view = false; } - if (OB_FAIL(guard.get_table_schema(tenant_id, - database_id, - table_item.table_name_, - USER_INDEX == expected_table_type, - tmp_table_schema, - table_item.is_hidden_))) { - LOG_WARN("get_table_schema failed", K(tenant_id), - KT(database_id), K(table_item), K(expected_table_type), K(ret)); + if (OB_INVALID_ID == table_item.table_id_) { + if (OB_FAIL(guard.get_table_schema(tenant_id, + database_id, + table_item.table_name_, + USER_INDEX == expected_table_type, + tmp_table_schema, + table_item.is_hidden_))) { + LOG_WARN("get_table_schema failed", K(tenant_id), + KT(database_id), K(table_item), K(expected_table_type), K(ret)); + } + } else if (OB_FAIL(guard.get_table_schema(tenant_id, table_item.table_id_, tmp_table_schema))) { + LOG_WARN("fail to get table schema", K(ret), K(tenant_id), K(table_item)); + } + if (OB_FAIL(ret)) { } else if (NULL == tmp_table_schema) { ret = OB_TABLE_NOT_EXIST; LOG_WARN("not find this table schema:", K(ret), diff --git a/src/rootserver/ob_ddl_service.h b/src/rootserver/ob_ddl_service.h index 8fe7bd3dbf..fb721f6562 100644 --- a/src/rootserver/ob_ddl_service.h +++ b/src/rootserver/ob_ddl_service.h @@ -311,7 +311,7 @@ public: share::schema::ObSchemaGetterGuard &schema_guard, const bool need_check_tablet_cnt, const uint64_t tenant_data_version); - virtual int alter_table_index(const obrpc::ObAlterTableArg &alter_table_arg, + virtual int alter_table_index(obrpc::ObAlterTableArg &alter_table_arg, const share::schema::ObTableSchema &orgin_table_schema, share::schema::ObTableSchema &new_table_schema, share::schema::ObSchemaGetterGuard &schema_guard, @@ -1194,11 +1194,12 @@ int check_table_udt_id_is_exist(share::schema::ObSchemaGetterGuard &schema_guard int rename_dropping_index_name( const uint64_t data_table_id, const uint64_t database_id, + const bool is_inner_and_fts_index, const obrpc::ObDropIndexArg &drop_index_arg, ObSchemaGetterGuard &schema_guard, ObDDLOperator &ddl_operator, ObMySQLTransaction &trans, - share::schema::ObTableSchema &new_index_schema); + common::ObIArray &new_index_schemas); int get_index_schema_by_name( const uint64_t data_table_id, const uint64_t database_id, @@ -1284,6 +1285,11 @@ private: const ObTableSchema &table_schema, uint64_t &tablet_cnt); + int check_has_fts_index( + ObSchemaGetterGuard &schema_guard, + const uint64_t tenant_id, + const uint64_t data_table_id, + bool &fts_exist); int check_has_index_operation( ObSchemaGetterGuard &schema_guard, const uint64_t teannt_id, @@ -1334,6 +1340,11 @@ private: int get_sample_table_schema( common::ObIArray &table_schemas, const share::schema::ObSimpleTableSchemaV2 *&sample_table_schema); + int get_valid_index_schema_by_id_for_drop_index_( + const uint64_t data_table_id, + const obrpc::ObDropIndexArg &drop_index_arg, + share::schema::ObSchemaGetterGuard &schema_guard, + const share::schema::ObTableSchema *&index_table_schema); int set_tablegroup_id(share::schema::ObTableSchema &table_schema); template int set_default_tablegroup_id(SCHEMA &schema); @@ -2051,6 +2062,16 @@ private: ObMySQLTransaction &trans); int lock_tables_in_recyclebin(const share::schema::ObDatabaseSchema &database_schema, ObMySQLTransaction &trans); + int get_dropping_domain_index_invisiable_aux_table_schema( + const uint64_t tenant_id, + const uint64_t data_table_id, + const uint64_t index_table_id, + const bool is_fts_index, + const ObString &index_name, + share::schema::ObSchemaGetterGuard &schema_guard, + ObDDLOperator &ddl_operator, + common::ObMySQLTransaction &trans, + common::ObIArray &new_aux_schemas); public: int check_parallel_ddl_conflict( diff --git a/src/rootserver/ob_index_builder.cpp b/src/rootserver/ob_index_builder.cpp index 939fad47f9..9a72e7b01e 100644 --- a/src/rootserver/ob_index_builder.cpp +++ b/src/rootserver/ob_index_builder.cpp @@ -105,6 +105,7 @@ int ObIndexBuilder::drop_index(const ObDropIndexArg &arg, obrpc::ObDropIndexRes const ObTableSchema *table_schema = NULL; ObSchemaGetterGuard schema_guard; bool is_db_in_recyclebin = false; + bool ignore_for_domain_index = false; bool need_rename_index = true; ObTableType drop_table_type = USER_INDEX; uint64_t compat_version = 0; @@ -134,7 +135,9 @@ int ObIndexBuilder::drop_index(const ObDropIndexArg &arg, obrpc::ObDropIndexRes LOG_WARN("failed to get data table schema", K(arg), K(ret)); } else if (NULL == table_schema) { ret = OB_TABLE_NOT_EXIST; - LOG_USER_ERROR(OB_TABLE_NOT_EXIST, to_cstring(arg.database_name_), to_cstring(arg.table_name_)); + if (!(ignore_for_domain_index = ignore_error_code_for_domain_index(ret, arg))) { + LOG_USER_ERROR(OB_TABLE_NOT_EXIST, to_cstring(arg.database_name_), to_cstring(arg.table_name_)); + } LOG_WARN("table not found", K(arg), K(ret)); } else if (arg.is_in_recyclebin_) { // internal delete index @@ -151,15 +154,17 @@ int ObIndexBuilder::drop_index(const ObDropIndexArg &arg, obrpc::ObDropIndexRes LOG_WARN("check whether foreign key related table executes ddl failed", K(ret)); } if (OB_SUCC(ret)) { + ObString index_table_name; const uint64_t data_table_id = table_schema->get_table_id(); const ObTableSchema *index_table_schema = NULL; if (OB_INVALID_ID != arg.index_table_id_) { LOG_DEBUG("drop index with index_table_id", K(arg.index_table_id_)); if (OB_FAIL(schema_guard.get_table_schema(tenant_id, arg.index_table_id_, index_table_schema))) { LOG_WARN("fail to get index table schema", K(ret), K(tenant_id), K(arg.index_table_id_)); + } else if (OB_ISNULL(index_table_schema)) { + ignore_for_domain_index = ignore_error_code_for_domain_index(OB_TABLE_NOT_EXIST, arg); } } else { - ObString index_table_name; if (is_mlog) { index_table_name = arg.index_name_; } else if (OB_FAIL(ObTableSchema::build_index_table_name( @@ -187,8 +192,10 @@ int ObIndexBuilder::drop_index(const ObDropIndexArg &arg, obrpc::ObDropIndexRes LOG_USER_ERROR(OB_ERR_TABLE_NO_MLOG, to_cstring(arg.database_name_), to_cstring(arg.index_name_)); } else { ret = OB_ERR_CANT_DROP_FIELD_OR_KEY; - LOG_WARN("index table schema should not be null", K(arg.index_name_), K(ret)); - LOG_USER_ERROR(OB_ERR_CANT_DROP_FIELD_OR_KEY, arg.index_name_.length(), arg.index_name_.ptr()); + LOG_WARN("index table schema should not be null", K(arg.index_name_), K(index_table_name), K(ret)); + if (!ignore_for_domain_index) { + LOG_USER_ERROR(OB_ERR_CANT_DROP_FIELD_OR_KEY, arg.index_name_.length(), arg.index_name_.ptr()); + } } } else if (OB_FAIL(ddl_service_.check_index_on_foreign_key(index_table_schema, foreign_key_infos, @@ -207,10 +214,22 @@ int ObIndexBuilder::drop_index(const ObDropIndexArg &arg, obrpc::ObDropIndexRes int64_t refreshed_schema_version = 0; ObArenaAllocator allocator(lib::ObLabel("DdlTaskTmp")); ObDDLTaskRecord task_record; + bool has_other_domain_index = false; + const bool is_inner_and_fts_index = arg.is_inner_ && index_table_schema->is_fts_index(); + const bool is_inner_and_multivalue_index = arg.is_inner_ && index_table_schema->is_multivalue_index(); + const bool is_inner_and_fts_or_mulvalue_index = is_inner_and_fts_index || is_inner_and_multivalue_index; bool has_index_task = false; - SMART_VAR(ObTableSchema, new_index_schema) { + typedef common::ObSEArray TableSchemaArray; + SMART_VAR(TableSchemaArray, new_index_schemas) { if (OB_FAIL(schema_guard.get_schema_version(tenant_id, refreshed_schema_version))) { LOG_WARN("failed to get tenant schema version", KR(ret), K(tenant_id)); + } else if ((index_table_schema->is_doc_id_rowkey() || index_table_schema->is_rowkey_doc_id()) + && OB_FAIL(check_has_fts_or_multivalue_index(tenant_id, index_table_schema->get_data_table_id(), schema_guard, + has_other_domain_index))) { + LOG_WARN("fail to check has domain index", K(ret), K(tenant_id), K(index_table_schema->get_index_type()), K(arg), KPC(index_table_schema)); + } else if (has_other_domain_index) { + LOG_INFO("there are some other fulltext or multivalue index, and don't need to drop rowkey doc or doc rowkey", + K(index_table_schema->get_index_type()), KPC(index_table_schema)); } else if (OB_FAIL(trans.start(&ddl_service_.get_sql_proxy(), tenant_id, refreshed_schema_version))) { LOG_WARN("start transaction failed", KR(ret), K(tenant_id), K(refreshed_schema_version)); } else if (!arg.is_inner_ && !index_table_schema->can_read_index() && OB_FAIL(ObDDLTaskRecordOperator::check_has_index_task( @@ -223,21 +242,44 @@ int ObIndexBuilder::drop_index(const ObDropIndexArg &arg, obrpc::ObDropIndexRes } else if (need_rename_index && OB_FAIL(ddl_service_.rename_dropping_index_name( table_schema->get_table_id(), table_schema->get_database_id(), + is_inner_and_fts_or_mulvalue_index, arg, schema_guard, ddl_operator, trans, - new_index_schema))) { - LOG_WARN("renmae index name failed", K(ret)); - } else if (!need_rename_index && OB_FAIL(new_index_schema.assign(*index_table_schema))) { + new_index_schemas))) { + LOG_WARN("rename index name failed", K(ret)); + } else if (!need_rename_index && OB_FAIL(new_index_schemas.push_back(*index_table_schema))) { LOG_WARN("failed to assign index table schema to new index schema", KR(ret)); - } else if (OB_FAIL(submit_drop_index_task(trans, *table_schema, new_index_schema, new_index_schema.get_schema_version(), arg, allocator, task_record))) { - LOG_WARN("submit drop index task failed", K(ret)); - } else { - res.tenant_id_ = new_index_schema.get_tenant_id(); - res.index_table_id_ = new_index_schema.get_table_id(); - res.schema_version_ = new_index_schema.get_schema_version(); - res.task_id_ = task_record.task_id_; + } else if (is_inner_and_fts_or_mulvalue_index && 0 == new_index_schemas.count()) { + if (OB_FAIL(new_index_schemas.push_back(*index_table_schema))) { + LOG_WARN("fail to push back index schema", K(ret), KPC(index_table_schema)); + } + } else if (OB_UNLIKELY(!index_table_schema->is_fts_or_multivalue_index() && new_index_schemas.count() != 1) + || OB_UNLIKELY(is_inner_and_fts_or_mulvalue_index && new_index_schemas.count() != 1) + || OB_UNLIKELY(!arg.is_inner_ && index_table_schema->is_fts_index_aux() && new_index_schemas.count() != 4) + || OB_UNLIKELY(!arg.is_inner_ && index_table_schema->is_multivalue_index_aux() && new_index_schemas.count() != 3)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, invalid new index schema count", K(ret), + "is inner", arg.is_inner_, + "count", new_index_schemas.count(), + "is fts index", index_table_schema->is_fts_index_aux(), + "is multivalue index", index_table_schema->is_multivalue_index_aux(), + K(new_index_schemas)); + } + if (OB_SUCC(ret) && !has_other_domain_index) { + bool has_exist = false; + const ObTableSchema &new_index_schema = new_index_schemas.at(new_index_schemas.count() - 1); + if (OB_FAIL(submit_drop_index_task(trans, *table_schema, new_index_schemas, arg, allocator, has_exist, task_record))) { + LOG_WARN("submit drop index task failed", K(ret), K(task_record)); + } else if (has_exist) { + res.task_id_ = task_record.task_id_; + } else { + res.tenant_id_ = new_index_schema.get_tenant_id(); + res.index_table_id_ = new_index_schema.get_table_id(); + res.schema_version_ = new_index_schema.get_schema_version(); + res.task_id_ = task_record.task_id_; + } } } if (trans.is_started()) { @@ -247,7 +289,7 @@ int ObIndexBuilder::drop_index(const ObDropIndexArg &arg, obrpc::ObDropIndexRes ret = (OB_SUCC(ret)) ? temp_ret : ret; } } - if (OB_SUCC(ret)) { + if (OB_SUCC(ret) && !has_other_domain_index) { int tmp_ret = OB_SUCCESS; if (OB_FAIL(ddl_service_.publish_schema(tenant_id))) { LOG_WARN("fail to publish schema", K(ret), K(tenant_id)); @@ -261,6 +303,7 @@ int ObIndexBuilder::drop_index(const ObDropIndexArg &arg, obrpc::ObDropIndexRes table_item.database_name_ = arg.database_name_; table_item.table_name_ = index_table_schema->get_table_name(); table_item.is_hidden_ = index_table_schema->is_user_hidden_table(); + table_item.table_id_ = arg.index_table_id_; obrpc::ObDDLRes ddl_res; obrpc::ObDropTableArg drop_table_arg; drop_table_arg.tenant_id_ = tenant_id; @@ -282,7 +325,11 @@ int ObIndexBuilder::drop_index(const ObDropIndexArg &arg, obrpc::ObDropIndexRes } } } - + if (OB_FAIL(ret) && ignore_for_domain_index) { + // ignore error code and return success for fts index, while data table or index table isn't exist. + res.task_id_ = -1; // just skip following steps. + ret = OB_SUCCESS; + } LOG_INFO("finish drop index", K(arg), K(ret)); return ret; } @@ -420,42 +467,154 @@ int ObIndexBuilder::submit_build_index_task( return ret; } +int ObIndexBuilder::recognize_index_schemas( + const common::ObIArray &index_schemas, + int64_t &index_ith, + int64_t &aux_doc_word_ith, + int64_t &aux_rowkey_doc_ith, + int64_t &aux_doc_rowkey_ith) +{ + int ret = OB_SUCCESS; + index_ith = -1; + if (OB_UNLIKELY(1 != index_schemas.count() && 4 != index_schemas.count() && 3 != index_schemas.count())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(index_schemas)); + } else if (index_schemas.count() == 1) { + index_ith = 0; + } else { + aux_doc_word_ith = -1; + aux_rowkey_doc_ith = -1; + aux_doc_rowkey_ith = -1; + for (int64_t i = 0; OB_SUCC(ret) && i < index_schemas.count(); ++i) { + if (index_schemas.at(i).is_rowkey_doc_id()) { + if (OB_UNLIKELY(-1 != aux_rowkey_doc_ith)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpeted error, there are multiple aux rowkey doc tables", K(ret), K(index_schemas)); + } else { + aux_rowkey_doc_ith = i; + } + } else if (index_schemas.at(i).is_doc_id_rowkey()) { + if (OB_UNLIKELY(-1 != aux_doc_rowkey_ith)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpeted error, there are multiple aux doc rowkey tables", K(ret), K(index_schemas)); + } else { + aux_doc_rowkey_ith = i; + } + } else if (index_schemas.at(i).is_fts_doc_word_aux()) { + if (OB_UNLIKELY(-1 != aux_doc_word_ith)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpeted error, there are multiple aux doc word tables", K(ret), K(index_schemas)); + } else { + aux_doc_word_ith = i; + } + } else if (OB_UNLIKELY(-1 != index_ith)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpeted error, there are multiple user index tables", K(ret), K(index_schemas)); + } else { + index_ith = i; + } + } + } + return ret; +} + int ObIndexBuilder::submit_drop_index_task(ObMySQLTransaction &trans, const ObTableSchema &data_schema, - const ObTableSchema &index_schema, - const int64_t schema_version, + const common::ObIArray &index_schemas, const obrpc::ObDropIndexArg &arg, common::ObIAllocator &allocator, + bool &task_has_exist, ObDDLTaskRecord &task_record) { int ret = OB_SUCCESS; - if (OB_UNLIKELY(!index_schema.is_valid())) { + int64_t index_ith = -1; + int64_t aux_doc_word_ith = -1; + int64_t aux_rowkey_doc_ith = -1; + int64_t aux_doc_rowkey_ith = -1; + int64_t aux_multivalue_ith = -1; + if (OB_UNLIKELY(index_schemas.count() != 1 && index_schemas.count() != 4 && index_schemas.count() != 3)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret)); + LOG_WARN("invalid index schema count", K(ret), K(index_schemas)); + } else if (OB_FAIL(recognize_index_schemas(index_schemas, index_ith, aux_doc_word_ith, + aux_rowkey_doc_ith, aux_doc_rowkey_ith))) { + LOG_WARN("fail to recognize index and aux table from schema array", K(ret)); + } else if (OB_ISNULL(GCTX.root_service_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, root service is nullptr", K(ret), KP(GCTX.root_service_)); + } else if (OB_UNLIKELY(index_ith < 0 || index_ith >= index_schemas.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, invalid array index", K(ret), K(index_ith)); } else { - int64_t refreshed_schema_version = 0; - const uint64_t tenant_id = index_schema.get_tenant_id(); - ObTableLockOwnerID owner_id; - const ObDDLType ddl_type = (ObIndexArg::DROP_MLOG == arg.index_action_type_) ? - ObDDLType::DDL_DROP_MLOG : ObDDLType::DDL_DROP_INDEX; - ObCreateDDLTaskParam param(tenant_id, - ddl_type, - &index_schema, - nullptr, - 0/*object_id*/, - schema_version, - 0/*parallelism*/, - arg.consumer_group_id_, - &allocator, - &arg); - if (OB_FAIL(GCTX.root_service_->get_ddl_task_scheduler().create_ddl_task(param, trans, task_record))) { - LOG_WARN("submit create index ddl task failed", K(ret)); - } else if (OB_FAIL(owner_id.convert_from_value(ObLockOwnerType::DEFAULT_OWNER_TYPE, - task_record.task_id_))) { - LOG_WARN("failed to get owner id", K(ret), K(task_record.task_id_)); - } else if (OB_FAIL(ObDDLLock::lock_for_add_drop_index( - data_schema, nullptr/*inc_data_tablet_ids*/, nullptr/*del_data_tablet_ids*/, index_schema, owner_id, trans))) { - LOG_WARN("failed to lock online ddl lock", K(ret)); + const ObTableSchema &index_schema = index_schemas.at(index_ith); + const bool is_drop_fts_task = !arg.is_inner_ && index_schema.is_fts_index_aux(); + const bool is_drop_multivalue_task = !arg.is_inner_ && index_schema.is_multivalue_index_aux(); + const bool is_drop_fts_or_multivalue_task = is_drop_fts_task || is_drop_multivalue_task; + if (OB_UNLIKELY(!index_schema.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(index_schema)); + } else if (OB_UNLIKELY(is_drop_fts_task && (aux_rowkey_doc_ith < 0 || aux_rowkey_doc_ith >= index_schemas.count() + || aux_doc_rowkey_ith < 0 || aux_doc_rowkey_ith >= index_schemas.count() + || aux_doc_word_ith < 0 || aux_doc_word_ith >= index_schemas.count()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, invalid aux table id for fts index", K(ret), K(is_drop_fts_task), + K(aux_rowkey_doc_ith), K(aux_doc_rowkey_ith), K(aux_doc_word_ith), K(index_schemas.count())); + } else if (OB_UNLIKELY(is_drop_multivalue_task && (aux_rowkey_doc_ith < 0 || aux_rowkey_doc_ith >= index_schemas.count() + || aux_doc_rowkey_ith < 0 || aux_doc_rowkey_ith >= index_schemas.count()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, invalid aux table id for multivalue index", K(ret), K(is_drop_multivalue_task), + K(aux_rowkey_doc_ith), K(aux_doc_rowkey_ith), K(index_schemas.count())); + } else if (!is_drop_fts_or_multivalue_task) { + // this isn't drop fts task. + const int64_t parent_task_id = arg.task_id_; + ObTableLockOwnerID owner_id; + const ObDDLType ddl_type = (ObIndexArg::DROP_MLOG == arg.index_action_type_) ? + ObDDLType::DDL_DROP_MLOG : ObDDLType::DDL_DROP_INDEX; + ObCreateDDLTaskParam param(index_schema.get_tenant_id(), + ddl_type, + &index_schema, + nullptr, + 0/*object_id*/, + index_schema.get_schema_version(), + 0/*parallelism*/, + arg.consumer_group_id_, + &allocator, + &arg, + parent_task_id); + if (OB_FAIL(GCTX.root_service_->get_ddl_task_scheduler().create_ddl_task(param, trans, task_record))) { + if (OB_HASH_EXIST == ret) { + task_has_exist = true; + ret = OB_SUCCESS; + } else { + LOG_WARN("submit create index ddl task failed", K(ret)); + } + } else if (OB_FAIL(owner_id.convert_from_value(ObLockOwnerType::DEFAULT_OWNER_TYPE, + task_record.task_id_))) { + LOG_WARN("failed to get owner id", K(ret), K(task_record.task_id_)); + } else if (OB_FAIL(ObDDLLock::lock_for_add_drop_index(data_schema, nullptr/*inc_data_tablet_ids*/, + nullptr/*del_data_tablet_ids*/, index_schema, owner_id, trans))) { + LOG_WARN("failed to lock online ddl lock", K(ret)); + } + } else { // create dropping fts index parent task. + ObDDLType ddl_type = is_drop_fts_task ? ObDDLType::DDL_DROP_FTS_INDEX : ObDDLType::DDL_DROP_MULVALUE_INDEX; + ObCreateDDLTaskParam param(index_schema.get_tenant_id(), + ddl_type, + &index_schema, + nullptr/*dest_table_schema*/, + 0/*object_id*/, + index_schema.get_schema_version(), + 0/*parallelism*/, + arg.consumer_group_id_, + &allocator); + + param.aux_rowkey_doc_schema_ = &(index_schemas.at(aux_rowkey_doc_ith)); + param.aux_doc_rowkey_schema_ = &(index_schemas.at(aux_doc_rowkey_ith)); + if (is_drop_fts_task) { + param.aux_doc_word_schema_ = &(index_schemas.at(aux_doc_word_ith)); + } + + if (OB_FAIL(GCTX.root_service_->get_ddl_task_scheduler().create_ddl_task(param, trans, task_record))) { + LOG_WARN("fail to create drop fts index task", K(ret), K(param)); + } } } return ret; @@ -478,10 +637,14 @@ int ObIndexBuilder::do_create_local_index( int64_t refreshed_schema_version = 0; const uint64_t tenant_id = table_schema.get_tenant_id(); uint64_t tenant_data_version = 0; - if (OB_FAIL(schema_guard.get_schema_version(tenant_id, refreshed_schema_version))) { + // TODO hanxuan support fulltext index after table created + if (share::schema::is_fts_index(create_index_arg.index_type_)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supported", K(ret)); + } else if (OB_FAIL(schema_guard.get_schema_version(tenant_id, refreshed_schema_version))) { LOG_WARN("failed to get tenant schema version", KR(ret), K(tenant_id)); } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { - LOG_WARN("get tenant data version failed", K(ret)); + LOG_WARN("get tenant data version failed", K(ret)); } else if (OB_FAIL(trans.start(&ddl_service_.get_sql_proxy(), tenant_id, refreshed_schema_version))) { LOG_WARN("start transaction failed", KR(ret), K(tenant_id), K(refreshed_schema_version)); } else if (OB_FAIL(new_table_schema.assign(table_schema))) { @@ -627,8 +790,10 @@ int ObIndexBuilder::do_create_index( LOG_WARN("check whether the foreign key related table is executing ddl failed", K(ret)); } else if (INDEX_TYPE_NORMAL_LOCAL == arg.index_type_ || INDEX_TYPE_UNIQUE_LOCAL == arg.index_type_ - || INDEX_TYPE_DOMAIN_CTXCAT == arg.index_type_ - || INDEX_TYPE_SPATIAL_LOCAL == arg.index_type_) { + || INDEX_TYPE_DOMAIN_CTXCAT_DEPRECATED == arg.index_type_ + || INDEX_TYPE_SPATIAL_LOCAL == arg.index_type_ + || is_fts_index(arg.index_type_) + || is_multivalue_index(arg.index_type_)) { if (OB_FAIL(do_create_local_index(schema_guard, arg, *table_schema, res))) { LOG_WARN("fail to do create local index", K(ret), K(arg)); } @@ -716,10 +881,33 @@ int ObIndexBuilder::generate_schema( } } + if (OB_FAIL(ret)) { + } else if (share::schema::is_fts_index(arg.index_type_)) { + uint64_t tenant_data_version = 0; + if (OB_FAIL(GET_MIN_DATA_VERSION(data_schema.get_tenant_id(), + tenant_data_version))) { + LOG_WARN("failed to get tenant data version", K(ret)); + } else if (tenant_data_version < DATA_VERSION_4_3_1_0) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("tenant data version is less than 4.3.1, fulltext index is not supported", K(ret), K(tenant_data_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.3.1, fulltext index"); + } + } else if (is_multivalue_index(arg.index_type_)) { + uint64_t tenant_data_version = 0; + if (OB_FAIL(GET_MIN_DATA_VERSION(data_schema.get_tenant_id(), + tenant_data_version))) { + LOG_WARN("failed to get tenant data version", K(ret)); + } else if (tenant_data_version < DATA_VERSION_4_3_1_0) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("tenant data version is less than 4.3.1, multivalue index is not supported", K(ret), K(tenant_data_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.3.1, multivalue index"); + } + } + if (OB_SUCC(ret) && (INDEX_TYPE_NORMAL_LOCAL == arg.index_type_ || INDEX_TYPE_UNIQUE_LOCAL == arg.index_type_ - || INDEX_TYPE_DOMAIN_CTXCAT == arg.index_type_)) { + || INDEX_TYPE_DOMAIN_CTXCAT_DEPRECATED == arg.index_type_)) { if (OB_FAIL(sql::ObResolverUtils::check_unique_index_cover_partition_column( data_schema, arg))) { RS_LOG(WARN, "fail to check unique key cover partition column", K(ret)); @@ -775,7 +963,8 @@ int ObIndexBuilder::generate_schema( LOG_USER_ERROR(OB_ERR_WRONG_KEY_COLUMN, sort_item.column_name_.length(), sort_item.column_name_.ptr()); LOG_WARN("index created on udt column is not supported", K(arg.index_type_), K(ret)); } else if (ob_is_json_tc(data_column->get_data_type())) { - if (!is_oracle_mode && data_column->is_func_idx_column()) { + if (data_column->is_multivalue_generated_array_column()) { + } else if (!is_oracle_mode && data_column->is_func_idx_column()) { ret = OB_ERR_FUNCTIONAL_INDEX_ON_JSON_OR_GEOMETRY_FUNCTION; LOG_WARN("Cannot create a functional index on an expression that returns a JSON or GEOMETRY.",K(ret)); } else { @@ -823,13 +1012,7 @@ int ObIndexBuilder::generate_schema( if (OB_SUCC(ret)) { // column information of the global index is filled during the resolve stage - const bool is_index_local_storage = (INDEX_TYPE_NORMAL_LOCAL == arg.index_type_ - || INDEX_TYPE_UNIQUE_LOCAL == arg.index_type_ - || INDEX_TYPE_NORMAL_GLOBAL_LOCAL_STORAGE == arg.index_type_ - || INDEX_TYPE_UNIQUE_GLOBAL_LOCAL_STORAGE == arg.index_type_ - || INDEX_TYPE_DOMAIN_CTXCAT == arg.index_type_ - || INDEX_TYPE_SPATIAL_LOCAL == arg.index_type_ - || INDEX_TYPE_SPATIAL_GLOBAL_LOCAL_STORAGE == arg.index_type_); + const bool is_index_local_storage = share::schema::is_index_local_storage(arg.index_type_); const bool need_generate_index_schema_column = (is_index_local_storage || global_index_without_column_info); schema.set_table_mode(data_schema.get_table_mode_flag()); schema.set_table_state_flag(data_schema.get_table_state_flag()); @@ -1023,7 +1206,9 @@ int ObIndexBuilder::set_basic_infos(const ObCreateIndexArg &arg, schema.set_def_type(data_schema.get_def_type()); if (INDEX_TYPE_NORMAL_LOCAL == arg.index_type_ || INDEX_TYPE_UNIQUE_LOCAL == arg.index_type_ - || INDEX_TYPE_SPATIAL_LOCAL == arg.index_type_) { + || INDEX_TYPE_SPATIAL_LOCAL == arg.index_type_ + || is_fts_index(arg.index_type_) + || is_multivalue_index(arg.index_type_)) { schema.set_part_level(data_schema.get_part_level()); } else {} // partition level is filled during resolve stage for global index schema.set_charset_type(data_schema.get_charset_type()); @@ -1121,5 +1306,77 @@ bool ObIndexBuilder::is_final_index_status(const ObIndexStatus index_status) con || is_error_index_status(index_status)); } +int ObIndexBuilder::check_has_fts_or_multivalue_index( + const uint64_t tenant_id, + const uint64_t data_table_id, + share::schema::ObSchemaGetterGuard &schema_guard, + bool &has_fts_or_multivalue_index) +{ + int ret = OB_SUCCESS; + ObSEArray indexs; + has_fts_or_multivalue_index = false; + if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id || OB_INVALID_ID == data_table_id)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid tenant id or data table id", K(ret), K(tenant_id), K(data_table_id)); + } else if (OB_FAIL(schema_guard.get_index_schemas_with_data_table_id(tenant_id, data_table_id, indexs))) { + LOG_WARN("fail to get index schema with data table id", K(ret), K(tenant_id), K(data_table_id)); + } else { + bool has_other_fts_index = false; + for (int64_t i = 0; OB_SUCC(ret) && !has_fts_or_multivalue_index && i < indexs.count(); ++i) { + const ObSimpleTableSchemaV2 *index_schema = indexs.at(i); + if (OB_ISNULL(index_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, index schema is nullptr", K(ret), KP(index_schema), K(i), K(indexs)); + } else if (!index_schema->is_fts_index() && !index_schema->is_multivalue_index()) { + continue; // The index isn't fulltext index / multivalue index, just skip. + } else if (index_schema->is_fts_index_aux() || + index_schema->is_fts_doc_word_aux() || + index_schema->is_multivalue_index_aux()) { // The index is fulltext index + has_fts_or_multivalue_index = true; + } + } + } + return ret; +} + +bool ObIndexBuilder::ignore_error_code_for_domain_index( + const int ret, + const obrpc::ObDropIndexArg &arg, + const share::schema::ObTableSchema *index_schema/*= nullptr*/) +{ + const bool is_domain_index = nullptr == index_schema ? + true : (index_schema->is_fts_index() || index_schema->is_multivalue_index()); + bool ignore = false; + if (!arg.is_inner_ || !is_domain_index) { + ignore = false; + } else if (OB_TABLE_NOT_EXIST == ret) { + ignore = true; + } + return ignore; +} + +int ObIndexBuilder::set_index_table_column_store_if_need( + share::schema::ObTableSchema &table_schema) +{ + int ret = OB_SUCCESS; + uint64_t compat_version = 0; + const uint64_t tenant_id = table_schema.get_tenant_id(); + const uint64_t table_id = table_schema.get_table_id(); + if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(tenant_id), K(table_schema)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, compat_version))) { + LOG_WARN("fail to get min data version", KR(ret), K(tenant_id), K(table_id)); + } else if (compat_version >= DATA_VERSION_4_2_0_0) { + table_schema.set_column_store(true); + if (table_schema.get_column_group_count() == 0) { + if (OB_FAIL(table_schema.add_default_column_group())) { + LOG_WARN("fail to add default column group", KR(ret), K(tenant_id), K(table_id)); + } + } + } + return ret; +} + }//end namespace rootserver }//end namespace oceanbase diff --git a/src/rootserver/ob_index_builder.h b/src/rootserver/ob_index_builder.h index 08bc96ab48..6df6645565 100644 --- a/src/rootserver/ob_index_builder.h +++ b/src/rootserver/ob_index_builder.h @@ -47,6 +47,7 @@ namespace rootserver class ObZoneManager; class ObDDLService; class ObDDLTaskRecord; +struct ObCreateDDLTaskParam; class ObIndexBuilder { @@ -81,10 +82,10 @@ public: int submit_drop_index_task( common::ObMySQLTransaction &trans, const share::schema::ObTableSchema &data_schema, - const share::schema::ObTableSchema &index_schema, - const int64_t schema_version, + const common::ObIArray &index_schemas, const obrpc::ObDropIndexArg &arg, common::ObIAllocator &allocator, + bool &task_has_exist, ObDDLTaskRecord &task_record); int submit_build_index_task(common::ObMySQLTransaction &trans, const obrpc::ObCreateIndexArg &arg, @@ -98,20 +99,12 @@ public: common::ObIAllocator &allocator, ObDDLTaskRecord &task_record); private: - typedef common::ObArray > OrderFTColumns; - class FulltextColumnOrder - { - public: - FulltextColumnOrder() {} - ~FulltextColumnOrder() {} - - bool operator()(const std::pair &left, - const std::pair &right) const - { - return left.first < right.first; - } - }; - + int recognize_index_schemas( + const common::ObIArray &index_schemas, + int64_t &index_ith, + int64_t &aux_doc_word_ith, + int64_t &aux_rowkey_doc_ith, + int64_t &aux_doc_rowkey_ith); int set_basic_infos(const obrpc::ObCreateIndexArg &arg, const share::schema::ObTableSchema &data_schema, share::schema::ObTableSchema &schema); @@ -123,7 +116,16 @@ private: share::schema::ObTableSchema &schema); bool is_final_index_status(const share::schema::ObIndexStatus index_status) const; - + int check_has_fts_or_multivalue_index( + const uint64_t tenant_id, + const uint64_t data_table_id, + share::schema::ObSchemaGetterGuard &schema_guard, + bool &has_fts_or_multivalue_index); + bool ignore_error_code_for_domain_index( + const int ret, + const obrpc::ObDropIndexArg &arg, + const share::schema::ObTableSchema *index_schema = nullptr); + int set_index_table_column_store_if_need(share::schema::ObTableSchema &table_schema); int create_index_column_group(const obrpc::ObCreateIndexArg &arg, share::schema::ObTableSchema &index_table_schema); diff --git a/src/rootserver/ob_root_service.cpp b/src/rootserver/ob_root_service.cpp index c5d7f7272b..0ee022ab4e 100755 --- a/src/rootserver/ob_root_service.cpp +++ b/src/rootserver/ob_root_service.cpp @@ -3431,6 +3431,14 @@ int ObRootService::create_table(const ObCreateTableArg &arg, ObCreateTableRes &r index_arg.index_type_ = INDEX_TYPE_UNIQUE_GLOBAL_LOCAL_STORAGE; } else if (INDEX_TYPE_SPATIAL_GLOBAL == index_arg.index_type_) { index_arg.index_type_ = INDEX_TYPE_SPATIAL_GLOBAL_LOCAL_STORAGE; + } else if (is_global_fts_index(index_arg.index_type_)) { + if (index_arg.index_type_ == INDEX_TYPE_DOC_ID_ROWKEY_GLOBAL) { + index_arg.index_type_ = INDEX_TYPE_DOC_ID_ROWKEY_GLOBAL_LOCAL_STORAGE; + } else if (index_arg.index_type_ == INDEX_TYPE_FTS_INDEX_GLOBAL) { + index_arg.index_type_ = INDEX_TYPE_FTS_INDEX_GLOBAL_LOCAL_STORAGE; + } else if (index_arg.index_type_ == INDEX_TYPE_FTS_DOC_WORD_GLOBAL) { + index_arg.index_type_ = INDEX_TYPE_FTS_DOC_WORD_GLOBAL_LOCAL_STORAGE; + } } } // the global index has generated column schema during resolve, RS no need to generate index schema, @@ -4620,6 +4628,11 @@ int ObRootService::create_index(const ObCreateIndexArg &arg, obrpc::ObAlterTable } else if (!arg.is_valid()) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arg", K(arg), K(ret)); + } else if (is_fts_index(arg.index_type_) || is_multivalue_index(arg.index_type_)) { + // TODO hanxuan support create fulltext index + // todo yunyi not dynamic create multivlaue index + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supported", K(ret)); } else { ObIndexBuilder index_builder(ddl_service_); if (OB_FAIL(ddl_service_.get_tenant_schema_guard_with_version_in_inner_table(arg.tenant_id_, schema_guard))) { diff --git a/src/rootserver/parallel_ddl/ob_create_table_helper.cpp b/src/rootserver/parallel_ddl/ob_create_table_helper.cpp index 564df7010c..5eb18adcd8 100644 --- a/src/rootserver/parallel_ddl/ob_create_table_helper.cpp +++ b/src/rootserver/parallel_ddl/ob_create_table_helper.cpp @@ -1196,6 +1196,14 @@ int ObCreateTableHelper::generate_aux_table_schemas_() index_arg.index_type_ = INDEX_TYPE_UNIQUE_GLOBAL_LOCAL_STORAGE; } else if (INDEX_TYPE_SPATIAL_GLOBAL == index_arg.index_type_) { index_arg.index_type_ = INDEX_TYPE_SPATIAL_GLOBAL_LOCAL_STORAGE; + } else if (is_global_fts_index(index_arg.index_type_)) { + if (index_arg.index_type_ == INDEX_TYPE_DOC_ID_ROWKEY_GLOBAL) { + index_arg.index_type_ = INDEX_TYPE_DOC_ID_ROWKEY_GLOBAL_LOCAL_STORAGE; + } else if (index_arg.index_type_ == INDEX_TYPE_FTS_INDEX_GLOBAL) { + index_arg.index_type_ = INDEX_TYPE_FTS_INDEX_GLOBAL_LOCAL_STORAGE; + } else if (index_arg.index_type_ == INDEX_TYPE_FTS_DOC_WORD_GLOBAL) { + index_arg.index_type_ = INDEX_TYPE_FTS_DOC_WORD_GLOBAL_LOCAL_STORAGE; + } } } // the global index has generated column schema during resolve, RS no need to generate index schema, diff --git a/src/share/CMakeLists.txt b/src/share/CMakeLists.txt index d4e8b2d9f8..af15431e7d 100644 --- a/src/share/CMakeLists.txt +++ b/src/share/CMakeLists.txt @@ -107,6 +107,7 @@ ob_set_subtarget(ob_share common ob_gts_info.cpp ob_i_tablet_scan.cpp ob_index_builder_util.cpp + ob_fts_index_builder_util.cpp ob_inner_config_root_addr.cpp ob_io_device_helper.cpp ob_kv_parser.cpp @@ -130,6 +131,7 @@ ob_set_subtarget(ob_share common ob_primary_zone_util.cpp ob_priv_common.cpp ob_proposal_id.cpp + ob_plugin_helper.cpp ob_replica_info.cpp ob_resource_limit.cpp ob_root_addr_agent.cpp @@ -530,6 +532,15 @@ if (OB_ERRSIM) ) endif() +ob_set_subtarget(ob_share text_analysis + text_analysis/ob_text_analyzer.cpp + text_analysis/ob_token_stream.cpp +) + +ob_set_subtarget(ob_share throttle + throttle/ob_throttle_common.cpp +) + ob_set_subtarget(ob_share wr wr/ob_wr_service.cpp wr/ob_wr_task.cpp diff --git a/src/share/compaction/ob_table_ckm_items.cpp b/src/share/compaction/ob_table_ckm_items.cpp index a40363ed29..3f2413628a 100644 --- a/src/share/compaction/ob_table_ckm_items.cpp +++ b/src/share/compaction/ob_table_ckm_items.cpp @@ -144,6 +144,7 @@ ObTableCkmItems::VALIDATE_CKM_FUNC ObTableCkmItems::validate_ckm_func[FUNC_CNT] ObTableCkmItems::ObTableCkmItems(const uint64_t tenant_id) : is_inited_(false), + is_fts_index_(false), tenant_id_(tenant_id), table_id_(0), row_count_(0), @@ -179,7 +180,8 @@ int ObTableCkmItems::build( LOG_WARN("failed to assgin tablet replica ckm array", KR(ret), K(input_ckm_items)); } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, table_id, table_schema_))) { LOG_WARN("fail to get table schema", KR(ret), K_(tenant_id), K(table_id)); - } else if (!simple_schema.is_index_table() && OB_FAIL(sort_col_id_array_.build(tenant_id_, *table_schema_))) { + } else if ((!simple_schema.is_index_table() || simple_schema.is_fts_or_multivalue_index()) + && OB_FAIL(sort_col_id_array_.build(tenant_id_, *table_schema_))) { LOG_WARN("failed to build column id array for data table", KR(ret), KPC_(table_schema)); } else { table_id_ = simple_schema.get_table_id(); @@ -219,7 +221,8 @@ int ObTableCkmItems::build( compaction_scn, tablet_pairs_, ckm_items_))) { LOG_WARN("failed to get table column checksum items", KR(ret)); - } else if (!table_schema_->is_index_table() && OB_FAIL(sort_col_id_array_.build(tenant_id_, *table_schema_))) { + } else if ((!table_schema_->is_index_table() || table_schema_->is_fts_or_multivalue_index()) + && OB_FAIL(sort_col_id_array_.build(tenant_id_, *table_schema_))) { LOG_WARN("failed to build column id array for data table", KR(ret), KPC(table_schema_)); } else { table_id_ = table_id; @@ -324,8 +327,9 @@ int ObTableCkmItems::validate_column_ckm_sum( int64_t index_row_cnt = 0; const schema::ObTableSchema *data_table_schema = data_ckm.table_schema_; const schema::ObTableSchema *index_table_schema = index_ckm.table_schema_; - if (OB_UNLIKELY(nullptr == data_table_schema || nullptr == index_table_schema - || data_table_schema->get_table_id() != index_table_schema->get_data_table_id())) { + if (OB_UNLIKELY(!index_ckm.is_fts_index_ + && (nullptr == data_table_schema || nullptr == index_table_schema + || data_table_schema->get_table_id() != index_table_schema->get_data_table_id()))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("data table and index table should not validate column checksum", KR(ret), KPC(data_table_schema), KPC(index_table_schema)); @@ -338,6 +342,7 @@ int ObTableCkmItems::validate_column_ckm_sum( LOG_ERROR("sum row count in data & global index is not equal", KR(ret), K(data_row_cnt), K(index_row_cnt)); } else if (OB_FAIL(compare_ckm_by_column_ids( data_ckm, + index_ckm, *data_table_schema, *index_table_schema, data_ckm.ckm_sum_array_, @@ -387,8 +392,9 @@ int ObTableCkmItems::validate_tablet_column_ckm( ObColumnChecksumErrorInfo ckm_error_info; const schema::ObTableSchema *data_table_schema = data_ckm.table_schema_; const schema::ObTableSchema *index_table_schema = index_ckm.table_schema_; - if (OB_UNLIKELY(nullptr == data_table_schema || nullptr == index_table_schema - || data_table_schema->get_table_id() != index_table_schema->get_data_table_id())) { + if (OB_UNLIKELY(!index_ckm.is_fts_index_ + && (nullptr == data_table_schema || nullptr == index_table_schema + || data_table_schema->get_table_id() != index_table_schema->get_data_table_id()))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("data table and index table should not validate column checksum", KR(ret), KPC(data_table_schema), KPC(index_table_schema)); @@ -430,6 +436,7 @@ int ObTableCkmItems::validate_tablet_column_ckm( "index_row_cnt", index_replica_ckm.row_count_); } else if (OB_FAIL(compare_ckm_by_column_ids( data_ckm, + index_ckm, *data_table_schema, *index_table_schema, data_replica_ckm.column_meta_.column_checksums_, @@ -470,6 +477,7 @@ int ObTableCkmItems::validate_tablet_column_ckm( int ObTableCkmItems::compare_ckm_by_column_ids( ObTableCkmItems &data_ckm, + ObTableCkmItems &index_ckm, const schema::ObTableSchema &data_table_schema, const schema::ObTableSchema &index_table_schema, const ObIArray &data_replica_col_ckm_array, @@ -497,7 +505,8 @@ int ObTableCkmItems::compare_ckm_by_column_ids( if (OB_ISNULL(index_column_schema = index_table_schema.get_column_schema(column_id))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("index column schema is unexpected null", KR(ret)); - } else if (index_column_schema->is_shadow_column()) { + } else if (index_ckm.is_fts_index_ || index_column_schema->is_shadow_column()) { + // some column in fts index is not exist in data table // shadow column only exists in index table LOG_TRACE("column do not need to compare checksum", K(column_id), KPC(index_column_schema), K(index_column_schema->is_shadow_column())); @@ -508,9 +517,12 @@ int ObTableCkmItems::compare_ckm_by_column_ids( } else if (!data_column_schema->is_column_stored_in_sstable()) { // virtual column/ROWID fake column only tag in data table LOG_TRACE("column do not need to compare checksum", KPC(data_column_schema), K(data_column_schema->is_column_stored_in_sstable())); + } else if (OB_UNLIKELY(!data_ckm.sort_col_id_array_.is_inited())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("sort col id is unexpected invalid", KR(ret), K(data_ckm)); } else if (OB_FAIL(ObSortColumnIdArray::get_array_idx_by_column_id(data_ckm.sort_col_id_array_, column_id, data_array_idx))) { LOG_WARN("failed to get array idx from data ckm", KR(ret), K(idx), K(column_id)); - } else if (OB_UNLIKELY(data_array_idx >= data_replica_col_ckm_array.count())) { + } else if (OB_UNLIKELY(data_array_idx < 0 || data_array_idx >= data_replica_col_ckm_array.count())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("array idx is invalid", KR(ret), K(data_array_idx), K(data_ckm)); } else if (data_replica_col_ckm_array.at(data_array_idx) != index_replica_col_ckm_array.at(idx)) { @@ -537,6 +549,7 @@ void ObTableCkmItems::clear() void ObTableCkmItems::reset() { is_inited_ = false; + is_fts_index_ = false; table_id_ = 0; row_count_ = 0; table_schema_ = NULL; diff --git a/src/share/compaction/ob_table_ckm_items.h b/src/share/compaction/ob_table_ckm_items.h index ca1111aaec..c6d606e98e 100644 --- a/src/share/compaction/ob_table_ckm_items.h +++ b/src/share/compaction/ob_table_ckm_items.h @@ -73,9 +73,10 @@ public: {} ~ObSortColumnIdArray() { reset(); } int build(const uint64_t tenant_id, const share::schema::ObTableSchema &table_schema); + bool is_inited() const { return is_inited_; } static int get_array_idx_by_column_id(ObSortColumnIdArray& sort_array, const int64_t column_id, int64_t &array_idx) { - return sort_array.get_func_(sort_array, column_id, array_idx); + return NULL == sort_array.get_func_ ? -1 : sort_array.get_func_(sort_array, column_id, array_idx); } void reset(); TO_STRING_KV(K_(is_inited), K_(build_map_flag), K_(array), "map_size", map_.size()); @@ -100,9 +101,10 @@ private: struct ObTableCkmItems { public: - ObTableCkmItems(const uint64_t tenant_id); + ObTableCkmItems(const uint64_t tenant_id = MTL_ID()); ~ObTableCkmItems(); bool is_inited() const { return is_inited_; } + void set_is_fts_index(const bool is_fts_index) { is_fts_index_ = is_fts_index; } void clear(); void reset(); int64_t get_table_id() const { return table_id_; } @@ -148,6 +150,7 @@ private: ObTableCkmItems &index_ckm); static int compare_ckm_by_column_ids( ObTableCkmItems &data_ckm, + ObTableCkmItems &index_ckm, const share::schema::ObTableSchema &data_table_schema, const share::schema::ObTableSchema &index_table_schema, const ObIArray &data_replica_ckm_array, @@ -160,6 +163,7 @@ private: static const int64_t DEFAULT_COLUMN_CNT = 64; static const int64_t DEFAULT_TABLET_CNT = 16; bool is_inited_; + bool is_fts_index_; uint64_t tenant_id_; uint64_t table_id_; int64_t row_count_; diff --git a/src/share/inner_table/ob_inner_table_schema.20001_20050.cpp b/src/share/inner_table/ob_inner_table_schema.20001_20050.cpp index 153fbf63d2..64cce99417 100644 --- a/src/share/inner_table/ob_inner_table_schema.20001_20050.cpp +++ b/src/share/inner_table/ob_inner_table_schema.20001_20050.cpp @@ -310,7 +310,7 @@ int ObInnerTableSchema::statistics_schema(ObTableSchema &table_schema) table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST('def' AS CHAR(512)) AS TABLE_CATALOG, V.TABLE_SCHEMA collate utf8mb4_name_case AS TABLE_SCHEMA, V.TABLE_NAME collate utf8mb4_name_case AS TABLE_NAME, CAST(V.NON_UNIQUE AS SIGNED) AS NON_UNIQUE, V.INDEX_SCHEMA collate utf8mb4_name_case AS INDEX_SCHEMA, V.INDEX_NAME collate utf8mb4_name_case AS INDEX_NAME, CAST(V.SEQ_IN_INDEX AS UNSIGNED) AS SEQ_IN_INDEX, V.COLUMN_NAME AS COLUMN_NAME, CAST('A' AS CHAR(1)) AS COLLATION, CAST(NULL AS SIGNED) AS CARDINALITY, CAST(V.SUB_PART AS SIGNED) AS SUB_PART, CAST(NULL AS CHAR(10)) AS PACKED, CAST(V.NULLABLE AS CHAR(3)) AS NULLABLE, CAST(V.INDEX_TYPE AS CHAR(16)) AS INDEX_TYPE, CAST(V.COMMENT AS CHAR(16)) AS COMMENT, CAST(V.INDEX_COMMENT AS CHAR(1024)) AS INDEX_COMMENT, CAST(V.IS_VISIBLE AS CHAR(3)) AS IS_VISIBLE, V.EXPRESSION AS EXPRESSION FROM (SELECT db.database_name AS TABLE_SCHEMA, t.table_name AS TABLE_NAME, CASE WHEN i.index_type IN (2,4,8) THEN 0 ELSE 1 END AS NON_UNIQUE, db.database_name AS INDEX_SCHEMA, substr(i.table_name, 7 + instr(substr(i.table_name, 7), '_')) AS INDEX_NAME, c.index_position AS SEQ_IN_INDEX, CASE WHEN d_col.column_name IS NOT NULL THEN d_col.column_name ELSE c.column_name END AS COLUMN_NAME, CASE WHEN d_col.column_name IS NOT NULL THEN c.data_length ELSE NULL END AS SUB_PART, CASE WHEN c.nullable = 1 THEN 'YES' ELSE '' END AS NULLABLE, CASE WHEN i.index_using_type = 0 THEN 'BTREE' ELSE (CASE WHEN i.index_using_type = 1 THEN 'HASH' ELSE 'UNKOWN' END)END AS INDEX_TYPE, CASE i.index_status WHEN 2 THEN 'VALID' WHEN 3 THEN 'CHECKING' WHEN 4 THEN 'INELEGIBLE' WHEN 5 THEN 'ERROR' ELSE 'UNUSABLE' END AS COMMENT, i.comment AS INDEX_COMMENT, CASE WHEN (i.index_attributes_set & 1) THEN 'NO' ELSE 'YES' END AS IS_VISIBLE, d_col2.cur_default_value_v2 AS EXPRESSION FROM oceanbase.__all_table i JOIN oceanbase.__all_table t ON i.data_table_id=t.table_id AND i.tenant_id = t.tenant_id AND i.database_id = t.database_id AND i.table_type = 5 AND i.table_mode >> 12 & 15 in (0,1) AND t.table_type in (0,3) JOIN oceanbase.__all_column c ON i.table_id=c.table_id AND i.tenant_id = c.tenant_id AND c.index_position > 0 JOIN oceanbase.__all_database db ON i.tenant_id = db.tenant_id AND i.database_id = db.database_id AND db.in_recyclebin = 0 AND db.database_name != '__recyclebin' LEFT JOIN oceanbase.__all_column d_col ON i.data_table_id = d_col.table_id AND i.tenant_id = d_col.tenant_id AND (case when (c.is_hidden = 1 and substr(c.column_name, 1, 8) = '__substr') then substr(c.column_name, 8 + instr(substr(c.column_name, 8), '_')) else 0 end) = d_col.column_id LEFT JOIN oceanbase.__all_column d_col2 ON i.data_table_id = d_col2.table_id AND i.tenant_id = d_col2.tenant_id AND c.column_id = d_col2.column_id AND d_col2.cur_default_value_v2 is not null AND d_col2.is_hidden = 1 AND (d_col2.column_flags & (0x1 << 0) = 1 or d_col2.column_flags & (0x1 << 1) = 1) AND substr(d_col2.column_name, 1, 6) = 'SYS_NC' UNION ALL SELECT db.database_name AS TABLE_SCHEMA, t.table_name AS TABLE_NAME, 0 AS NON_UNIQUE, db.database_name AS INDEX_SCHEMA, 'PRIMARY' AS INDEX_NAME, c.rowkey_position AS SEQ_IN_INDEX, c.column_name AS COLUMN_NAME, NULL AS SUB_PART, '' AS NULLABLE, CASE WHEN t.index_using_type = 0 THEN 'BTREE' ELSE ( CASE WHEN t.index_using_type = 1 THEN 'HASH' ELSE 'UNKOWN' END) END AS INDEX_TYPE, 'VALID' AS COMMENT, t.comment AS INDEX_COMMENT, 'YES' AS IS_VISIBLE, NULL AS EXPRESSION FROM oceanbase.__all_table t JOIN oceanbase.__all_column c ON t.table_id=c.table_id AND t.tenant_id = c.tenant_id AND c.rowkey_position > 0 AND c.is_hidden = 0 AND t.table_type in (0,3) JOIN oceanbase.__all_database db ON t.tenant_id = db.tenant_id AND t.database_id = db.database_id AND db.in_recyclebin = 0 AND db.database_name != '__recyclebin' UNION ALL SELECT db.database_name AS TABLE_SCHEMA, t.table_name AS TABLE_NAME, CASE WHEN i.index_type IN (2,4,8) THEN 0 ELSE 1 END AS NON_UNIQUE, db.database_name AS INDEX_SCHEMA, substr(i.table_name, 7 + instr(substr(i.table_name, 7), '_')) AS INDEX_NAME, c.index_position AS SEQ_IN_INDEX, CASE WHEN d_col.column_name IS NOT NULL THEN d_col.column_name ELSE c.column_name END AS COLUMN_NAME, CASE WHEN d_col.column_name IS NOT NULL THEN c.data_length ELSE NULL END AS SUB_PART, CASE WHEN c.nullable = 1 THEN 'YES' ELSE '' END AS NULLABLE, CASE WHEN i.index_using_type = 0 THEN 'BTREE' ELSE (CASE WHEN i.index_using_type = 1 THEN 'HASH' ELSE 'UNKOWN' END)END AS INDEX_TYPE, CASE i.index_status WHEN 2 THEN 'VALID' WHEN 3 THEN 'CHECKING' WHEN 4 THEN 'INELEGIBLE' WHEN 5 THEN 'ERROR' ELSE 'UNUSABLE' END AS COMMENT, i.comment AS INDEX_COMMENT, CASE WHEN (i.index_attributes_set & 1) THEN 'NO' ELSE 'YES' END AS IS_VISIBLE, d_col2.cur_default_value_v2 AS EXPRESSION FROM oceanbase.__ALL_VIRTUAL_CORE_ALL_TABLE i JOIN oceanbase.__ALL_VIRTUAL_CORE_ALL_TABLE t ON i.data_table_id=t.table_id AND i.tenant_id = t.tenant_id AND i.database_id = t.database_id AND i.table_type = 5 AND t.table_type in (0,3) AND t.tenant_id = EFFECTIVE_TENANT_ID() JOIN oceanbase.__ALL_VIRTUAL_CORE_COLUMN_TABLE c ON i.table_id=c.table_id AND i.tenant_id = c.tenant_id AND c.index_position > 0 JOIN oceanbase.__all_database db ON i.database_id = db.database_id LEFT JOIN oceanbase.__ALL_VIRTUAL_CORE_COLUMN_TABLE d_col ON i.data_table_id = d_col.table_id AND i.tenant_id = d_col.tenant_id AND (case when (c.is_hidden = 1 and substr(c.column_name, 1, 8) = '__substr') then substr(c.column_name, 8 + instr(substr(c.column_name, 8), '_')) else 0 end) = d_col.column_id LEFT JOIN oceanbase.__ALL_VIRTUAL_CORE_COLUMN_TABLE d_col2 ON i.data_table_id = d_col2.table_id AND i.tenant_id = d_col2.tenant_id AND c.column_id = d_col2.column_id AND d_col2.cur_default_value_v2 is not null AND d_col2.is_hidden = 1 AND (d_col2.column_flags & (0x1 << 0) = 1 or d_col2.column_flags & (0x1 << 1) = 1) AND substr(d_col2.column_name, 1, 6) = 'SYS_NC' UNION ALL SELECT db.database_name AS TABLE_SCHEMA, t.table_name AS TABLE_NAME, 0 AS NON_UNIQUE, db.database_name AS INDEX_SCHEMA, 'PRIMARY' AS INDEX_NAME, c.rowkey_position AS SEQ_IN_INDEX, c.column_name AS COLUMN_NAME, NULL AS SUB_PART, '' AS NULLABLE, CASE WHEN t.index_using_type = 0 THEN 'BTREE' ELSE ( CASE WHEN t.index_using_type = 1 THEN 'HASH' ELSE 'UNKOWN' END) END AS INDEX_TYPE, 'VALID' AS COMMENT, t.comment AS INDEX_COMMENT, 'YES' AS IS_VISIBLE, NULL AS EXPRESSION FROM oceanbase.__ALL_VIRTUAL_CORE_ALL_TABLE t JOIN oceanbase.__ALL_VIRTUAL_CORE_COLUMN_TABLE c ON t.table_id=c.table_id AND t.tenant_id = c.tenant_id AND t.tenant_id = EFFECTIVE_TENANT_ID() AND c.rowkey_position > 0 AND c.is_hidden = 0 AND t.table_type in (0,3) JOIN oceanbase.__all_database db ON t.database_id = db.database_id)V )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST('def' AS CHAR(512)) AS TABLE_CATALOG, V.TABLE_SCHEMA collate utf8mb4_name_case AS TABLE_SCHEMA, V.TABLE_NAME collate utf8mb4_name_case AS TABLE_NAME, CAST(V.NON_UNIQUE AS SIGNED) AS NON_UNIQUE, V.INDEX_SCHEMA collate utf8mb4_name_case AS INDEX_SCHEMA, V.INDEX_NAME collate utf8mb4_name_case AS INDEX_NAME, CAST(V.SEQ_IN_INDEX AS UNSIGNED) AS SEQ_IN_INDEX, V.COLUMN_NAME AS COLUMN_NAME, CAST('A' AS CHAR(1)) AS COLLATION, CAST(NULL AS SIGNED) AS CARDINALITY, CAST(V.SUB_PART AS SIGNED) AS SUB_PART, CAST(NULL AS CHAR(10)) AS PACKED, CAST(V.NULLABLE AS CHAR(3)) AS NULLABLE, CAST(V.INDEX_TYPE AS CHAR(16)) AS INDEX_TYPE, CAST(V.COMMENT AS CHAR(16)) AS COMMENT, CAST(V.INDEX_COMMENT AS CHAR(1024)) AS INDEX_COMMENT, CAST(V.IS_VISIBLE AS CHAR(3)) AS IS_VISIBLE, V.EXPRESSION AS EXPRESSION FROM (SELECT db.database_name AS TABLE_SCHEMA, t.table_name AS TABLE_NAME, CASE WHEN i.index_type IN (2,4,8) THEN 0 ELSE 1 END AS NON_UNIQUE, db.database_name AS INDEX_SCHEMA, substr(i.table_name, 7 + instr(substr(i.table_name, 7), '_')) AS INDEX_NAME, c.index_position AS SEQ_IN_INDEX, CASE WHEN d_col.column_name IS NOT NULL THEN d_col.column_name ELSE c.column_name END AS COLUMN_NAME, CASE WHEN d_col.column_name IS NOT NULL THEN c.data_length ELSE NULL END AS SUB_PART, CASE WHEN c.nullable = 1 THEN 'YES' ELSE '' END AS NULLABLE, CASE WHEN i.index_type in (15, 18, 21) THEN 'FULLTEXT' WHEN i.index_using_type = 0 THEN 'BTREE' WHEN i.index_using_type = 1 THEN 'HASH' ELSE 'UNKOWN' END AS INDEX_TYPE, CASE i.index_status WHEN 2 THEN 'VALID' WHEN 3 THEN 'CHECKING' WHEN 4 THEN 'INELEGIBLE' WHEN 5 THEN 'ERROR' ELSE 'UNUSABLE' END AS COMMENT, i.comment AS INDEX_COMMENT, CASE WHEN (i.index_attributes_set & 1) THEN 'NO' ELSE 'YES' END AS IS_VISIBLE, d_col2.cur_default_value_v2 AS EXPRESSION FROM oceanbase.__all_table i JOIN oceanbase.__all_table t ON i.data_table_id=t.table_id AND i.tenant_id = t.tenant_id AND i.database_id = t.database_id AND i.table_type = 5 AND i.index_type NOT IN (13, 14, 16, 17, 19, 20, 22) AND i.table_mode >> 12 & 15 in (0,1) AND t.table_type in (0,3) JOIN oceanbase.__all_column c ON i.table_id=c.table_id AND i.tenant_id = c.tenant_id AND c.index_position > 0 JOIN oceanbase.__all_database db ON i.tenant_id = db.tenant_id AND i.database_id = db.database_id AND db.in_recyclebin = 0 AND db.database_name != '__recyclebin' LEFT JOIN oceanbase.__all_column d_col ON i.data_table_id = d_col.table_id AND i.tenant_id = d_col.tenant_id AND (case when (c.is_hidden = 1 and substr(c.column_name, 1, 8) = '__substr') then substr(c.column_name, 8 + instr(substr(c.column_name, 8), '_')) else 0 end) = d_col.column_id LEFT JOIN oceanbase.__all_column d_col2 ON i.data_table_id = d_col2.table_id AND i.tenant_id = d_col2.tenant_id AND c.column_id = d_col2.column_id AND d_col2.cur_default_value_v2 is not null AND d_col2.is_hidden = 1 AND (d_col2.column_flags & (0x1 << 0) = 1 or d_col2.column_flags & (0x1 << 1) = 1) AND substr(d_col2.column_name, 1, 6) = 'SYS_NC' UNION ALL SELECT db.database_name AS TABLE_SCHEMA, t.table_name AS TABLE_NAME, 0 AS NON_UNIQUE, db.database_name AS INDEX_SCHEMA, 'PRIMARY' AS INDEX_NAME, c.rowkey_position AS SEQ_IN_INDEX, c.column_name AS COLUMN_NAME, NULL AS SUB_PART, '' AS NULLABLE, CASE WHEN t.index_using_type = 0 THEN 'BTREE' ELSE ( CASE WHEN t.index_using_type = 1 THEN 'HASH' ELSE 'UNKOWN' END) END AS INDEX_TYPE, 'VALID' AS COMMENT, t.comment AS INDEX_COMMENT, 'YES' AS IS_VISIBLE, NULL AS EXPRESSION FROM oceanbase.__all_table t JOIN oceanbase.__all_column c ON t.table_id=c.table_id AND t.tenant_id = c.tenant_id AND c.rowkey_position > 0 AND c.is_hidden = 0 AND t.table_type in (0,3) JOIN oceanbase.__all_database db ON t.tenant_id = db.tenant_id AND t.database_id = db.database_id AND db.in_recyclebin = 0 AND db.database_name != '__recyclebin' UNION ALL SELECT db.database_name AS TABLE_SCHEMA, t.table_name AS TABLE_NAME, CASE WHEN i.index_type IN (2,4,8) THEN 0 ELSE 1 END AS NON_UNIQUE, db.database_name AS INDEX_SCHEMA, substr(i.table_name, 7 + instr(substr(i.table_name, 7), '_')) AS INDEX_NAME, c.index_position AS SEQ_IN_INDEX, CASE WHEN d_col.column_name IS NOT NULL THEN d_col.column_name ELSE c.column_name END AS COLUMN_NAME, CASE WHEN d_col.column_name IS NOT NULL THEN c.data_length ELSE NULL END AS SUB_PART, CASE WHEN c.nullable = 1 THEN 'YES' ELSE '' END AS NULLABLE, CASE WHEN i.index_type in (15, 18, 21) THEN 'FULLTEXT' WHEN i.index_using_type = 0 THEN 'BTREE' WHEN i.index_using_type = 1 THEN 'HASH' ELSE 'UNKOWN' END AS INDEX_TYPE, CASE i.index_status WHEN 2 THEN 'VALID' WHEN 3 THEN 'CHECKING' WHEN 4 THEN 'INELEGIBLE' WHEN 5 THEN 'ERROR' ELSE 'UNUSABLE' END AS COMMENT, i.comment AS INDEX_COMMENT, CASE WHEN (i.index_attributes_set & 1) THEN 'NO' ELSE 'YES' END AS IS_VISIBLE, d_col2.cur_default_value_v2 AS EXPRESSION FROM oceanbase.__ALL_VIRTUAL_CORE_ALL_TABLE i JOIN oceanbase.__ALL_VIRTUAL_CORE_ALL_TABLE t ON i.data_table_id=t.table_id AND i.tenant_id = t.tenant_id AND i.database_id = t.database_id AND i.table_type = 5 AND i.index_type NOT IN (13, 14, 16, 17, 19, 20, 22) AND t.table_type in (0,3) AND t.tenant_id = EFFECTIVE_TENANT_ID() JOIN oceanbase.__ALL_VIRTUAL_CORE_COLUMN_TABLE c ON i.table_id=c.table_id AND i.tenant_id = c.tenant_id AND c.index_position > 0 JOIN oceanbase.__all_database db ON i.database_id = db.database_id LEFT JOIN oceanbase.__ALL_VIRTUAL_CORE_COLUMN_TABLE d_col ON i.data_table_id = d_col.table_id AND i.tenant_id = d_col.tenant_id AND (case when (c.is_hidden = 1 and substr(c.column_name, 1, 8) = '__substr') then substr(c.column_name, 8 + instr(substr(c.column_name, 8), '_')) else 0 end) = d_col.column_id LEFT JOIN oceanbase.__ALL_VIRTUAL_CORE_COLUMN_TABLE d_col2 ON i.data_table_id = d_col2.table_id AND i.tenant_id = d_col2.tenant_id AND c.column_id = d_col2.column_id AND d_col2.cur_default_value_v2 is not null AND d_col2.is_hidden = 1 AND (d_col2.column_flags & (0x1 << 0) = 1 or d_col2.column_flags & (0x1 << 1) = 1) AND substr(d_col2.column_name, 1, 6) = 'SYS_NC' UNION ALL SELECT db.database_name AS TABLE_SCHEMA, t.table_name AS TABLE_NAME, 0 AS NON_UNIQUE, db.database_name AS INDEX_SCHEMA, 'PRIMARY' AS INDEX_NAME, c.rowkey_position AS SEQ_IN_INDEX, c.column_name AS COLUMN_NAME, NULL AS SUB_PART, '' AS NULLABLE, CASE WHEN t.index_using_type = 0 THEN 'BTREE' ELSE ( CASE WHEN t.index_using_type = 1 THEN 'HASH' ELSE 'UNKOWN' END) END AS INDEX_TYPE, 'VALID' AS COMMENT, t.comment AS INDEX_COMMENT, 'YES' AS IS_VISIBLE, NULL AS EXPRESSION FROM oceanbase.__ALL_VIRTUAL_CORE_ALL_TABLE t JOIN oceanbase.__ALL_VIRTUAL_CORE_COLUMN_TABLE c ON t.table_id=c.table_id AND t.tenant_id = c.tenant_id AND t.tenant_id = EFFECTIVE_TENANT_ID() AND c.rowkey_position > 0 AND c.is_hidden = 0 AND t.table_type in (0,3) JOIN oceanbase.__all_database db ON t.database_id = db.database_id)V )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } diff --git a/src/share/inner_table/ob_inner_table_schema.21151_21200.cpp b/src/share/inner_table/ob_inner_table_schema.21151_21200.cpp index 06c1418bed..cebd0e1986 100644 --- a/src/share/inner_table/ob_inner_table_schema.21151_21200.cpp +++ b/src/share/inner_table/ob_inner_table_schema.21151_21200.cpp @@ -2110,7 +2110,7 @@ int ObInnerTableSchema::cdb_indexes_schema(ObTableSchema &table_schema) table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(C.TENANT_ID AS SIGNED) AS CON_ID, CAST(INDEX_OWNER AS CHAR(128)) AS OWNER, CAST(INDEX_NAME AS CHAR(128)) AS INDEX_NAME, CAST(INDEX_TYPE_NAME AS CHAR(27)) AS INDEX_TYPE, CAST(TABLE_OWNER AS CHAR(128)) AS TABLE_OWNER, CAST(TABLE_NAME AS CHAR(128)) AS TABLE_NAME, CAST('TABLE' AS CHAR(5)) AS TABLE_TYPE, CAST(UNIQUENESS AS CHAR(9)) AS UNIQUENESS, CAST(COMPRESSION AS CHAR(13)) AS COMPRESSION, CAST(NULL AS NUMBER) AS PREFIX_LENGTH, CAST(TABLESPACE_NAME AS CHAR(30)) AS TABLESPACE_NAME, CAST(NULL AS NUMBER) AS INI_TRANS, CAST(NULL AS NUMBER) AS MAX_TRANS, CAST(NULL AS NUMBER) AS INITIAL_EXTENT, CAST(NULL AS NUMBER) AS NEXT_EXTENT, CAST(NULL AS NUMBER) AS MIN_EXTENTS, CAST(NULL AS NUMBER) AS MAX_EXTENTS, CAST(NULL AS NUMBER) AS PCT_INCREASE, CAST(NULL AS NUMBER) AS PCT_THRESHOLD, CAST(NULL AS NUMBER) AS INCLUDE_COLUMN, CAST(NULL AS NUMBER) AS FREELISTS, CAST(NULL AS NUMBER) AS FREELIST_GROUPS, CAST(NULL AS NUMBER) AS PCT_FREE, CAST(NULL AS CHAR(3)) AS LOGGING, CAST(NULL AS NUMBER) AS BLEVEL, CAST(NULL AS NUMBER) AS LEAF_BLOCKS, CAST(NULL AS NUMBER) AS DISTINCT_KEYS, CAST(NULL AS NUMBER) AS AVG_LEAF_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS AVG_DATA_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS CLUSTERING_FACTOR, CAST(STATUS AS CHAR(8)) AS STATUS, CAST(NULL AS NUMBER) AS NUM_ROWS, CAST(NULL AS NUMBER) AS SAMPLE_SIZE, CAST(NULL AS DATE) AS LAST_ANALYZED, CAST(DOP_DEGREE AS CHAR(40)) AS DEGREE, CAST(NULL AS CHAR(40)) AS INSTANCES, CAST(CASE WHEN A_PART_LEVEL = 0 THEN 'NO' ELSE 'YES' END AS CHAR(3)) AS PARTITIONED, CAST(NULL AS CHAR(1)) AS TEMPORARY, CAST(NULL AS CHAR(1)) AS "GENERATED", CAST(NULL AS CHAR(1)) AS SECONDARY, CAST(NULL AS CHAR(7)) AS BUFFER_POOL, CAST(NULL AS CHAR(7)) AS FLASH_CACHE, CAST(NULL AS CHAR(7)) AS CELL_FLASH_CACHE, CAST(NULL AS CHAR(3)) AS USER_STATS, CAST(NULL AS CHAR(15)) AS DURATION, CAST(NULL AS NUMBER) AS PCT_DIRECT_ACCESS, CAST(NULL AS CHAR(128)) AS ITYP_OWNER, CAST(NULL AS CHAR(128)) AS ITYP_NAME, CAST(NULL AS CHAR(1000)) AS PARAMETERS, CAST(NULL AS CHAR(3)) AS GLOBAL_STATS, CAST(NULL AS CHAR(12)) AS DOMIDX_STATUS, CAST(NULL AS CHAR(6)) AS DOMIDX_OPSTATUS, CAST(FUNCIDX_STATUS AS CHAR(8)) AS FUNCIDX_STATUS, CAST('NO' AS CHAR(3)) AS JOIN_INDEX, CAST(NULL AS CHAR(3)) AS IOT_REDUNDANT_PKEY_ELIM, CAST(DROPPED AS CHAR(3)) AS DROPPED, CAST(VISIBILITY AS CHAR(9)) AS VISIBILITY, CAST(NULL AS CHAR(14)) AS DOMIDX_MANAGEMENT, CAST(NULL AS CHAR(3)) AS SEGMENT_CREATED, CAST(NULL AS CHAR(3)) AS ORPHANED_ENTRIES, CAST(NULL AS CHAR(7)) AS INDEXING, CAST(NULL AS CHAR(3)) AS AUTO FROM (SELECT A.TENANT_ID AS TENANT_ID, DATABASE_NAME AS INDEX_OWNER, CASE WHEN (TABLE_TYPE = 5 AND B.DATABASE_NAME != '__recyclebin') THEN SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) WHEN (TABLE_TYPE = 5 AND B.DATABASE_NAME = '__recyclebin') THEN TABLE_NAME WHEN (TABLE_TYPE = 3 AND CONS_TAB.CONSTRAINT_NAME IS NULL) THEN CONCAT('t_pk_obpk_', A.TABLE_ID) ELSE (CONS_TAB.CONSTRAINT_NAME) END AS INDEX_NAME, CASE WHEN A.TABLE_TYPE = 5 AND EXISTS ( SELECT 1 FROM OCEANBASE.__ALL_VIRTUAL_COLUMN T_COL_INDEX, OCEANBASE.__ALL_VIRTUAL_COLUMN T_COL_BASE WHERE T_COL_BASE.TABLE_ID = A.DATA_TABLE_ID AND T_COL_BASE.COLUMN_NAME = T_COL_INDEX.COLUMN_NAME AND T_COL_INDEX.TABLE_ID = A.TABLE_ID AND T_COL_BASE.TENANT_ID = A.TENANT_ID AND T_COL_INDEX.TENANT_ID = A.TENANT_ID AND (T_COL_BASE.COLUMN_FLAGS & 3) > 0 AND T_COL_INDEX.INDEX_POSITION != 0 ) THEN 'FUNCTION-BASED NORMAL' ELSE 'NORMAL' END AS INDEX_TYPE_NAME, DATABASE_NAME AS TABLE_OWNER, CASE WHEN (TABLE_TYPE = 3) THEN A.TABLE_ID ELSE A.DATA_TABLE_ID END AS TABLE_ID, A.TABLE_ID AS INDEX_ID, CASE WHEN TABLE_TYPE = 3 THEN 'UNIQUE' WHEN A.INDEX_TYPE IN (2, 4, 8) THEN 'UNIQUE' ELSE 'NONUNIQUE' END AS UNIQUENESS, CASE WHEN A.COMPRESS_FUNC_NAME = NULL THEN 'DISABLED' ELSE 'ENABLED' END AS COMPRESSION, CASE WHEN TABLE_TYPE = 3 THEN 'VALID' WHEN A.INDEX_STATUS = 1 THEN 'UNAVAILABLE' WHEN A.INDEX_STATUS = 2 THEN 'VALID' WHEN A.INDEX_STATUS = 3 THEN 'CHECKING' WHEN A.INDEX_STATUS = 4 THEN 'INELEGIBLE' WHEN A.INDEX_STATUS = 5 THEN 'ERROR' ELSE 'UNUSABLE' END AS STATUS, A.INDEX_TYPE AS A_INDEX_TYPE, A.PART_LEVEL AS A_PART_LEVEL, A.TABLE_TYPE AS A_TABLE_TYPE, CASE WHEN 0 = (SELECT COUNT(1) FROM OCEANBASE.__ALL_VIRTUAL_COLUMN WHERE TABLE_ID = A.TABLE_ID AND IS_HIDDEN = 0) THEN 'ENABLED' ELSE 'NULL' END AS FUNCIDX_STATUS, CASE WHEN B.IN_RECYCLEBIN = 1 THEN 'YES' ELSE 'NO' END AS DROPPED, CASE WHEN (A.INDEX_ATTRIBUTES_SET & 1) = 0 THEN 'VISIBLE' ELSE 'INVISIBLE' END AS VISIBILITY, A.TABLESPACE_ID, A.DOP AS DOP_DEGREE FROM OCEANBASE.__ALL_VIRTUAL_TABLE A JOIN OCEANBASE.__ALL_VIRTUAL_DATABASE B ON A.DATABASE_ID = B.DATABASE_ID AND A.TENANT_ID = B.TENANT_ID AND B.DATABASE_NAME != '__recyclebin' AND A.TABLE_MODE >> 12 & 15 in (0,1) LEFT JOIN OCEANBASE.__ALL_VIRTUAL_CONSTRAINT CONS_TAB ON CONS_TAB.TABLE_ID = A.TABLE_ID AND CONS_TAB.TENANT_ID = A.TENANT_ID AND CONS_TAB.CONSTRAINT_TYPE = 1 WHERE (A.TABLE_TYPE = 3 AND A.TABLE_MODE & 66048 = 0) OR (A.TABLE_TYPE = 5) ) C JOIN OCEANBASE.__ALL_VIRTUAL_TABLE D ON C.TABLE_ID = D.TABLE_ID AND C.TENANT_ID = D.TENANT_ID LEFT JOIN OCEANBASE.__ALL_VIRTUAL_TENANT_TABLESPACE TP ON C.TABLESPACE_ID = TP.TABLESPACE_ID AND TP.TENANT_ID = C.TENANT_ID )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(C.TENANT_ID AS SIGNED) AS CON_ID, CAST(INDEX_OWNER AS CHAR(128)) AS OWNER, CAST(INDEX_NAME AS CHAR(128)) AS INDEX_NAME, CAST(INDEX_TYPE_NAME AS CHAR(27)) AS INDEX_TYPE, CAST(TABLE_OWNER AS CHAR(128)) AS TABLE_OWNER, CAST(TABLE_NAME AS CHAR(128)) AS TABLE_NAME, CAST('TABLE' AS CHAR(5)) AS TABLE_TYPE, CAST(UNIQUENESS AS CHAR(9)) AS UNIQUENESS, CAST(COMPRESSION AS CHAR(13)) AS COMPRESSION, CAST(NULL AS NUMBER) AS PREFIX_LENGTH, CAST(TABLESPACE_NAME AS CHAR(30)) AS TABLESPACE_NAME, CAST(NULL AS NUMBER) AS INI_TRANS, CAST(NULL AS NUMBER) AS MAX_TRANS, CAST(NULL AS NUMBER) AS INITIAL_EXTENT, CAST(NULL AS NUMBER) AS NEXT_EXTENT, CAST(NULL AS NUMBER) AS MIN_EXTENTS, CAST(NULL AS NUMBER) AS MAX_EXTENTS, CAST(NULL AS NUMBER) AS PCT_INCREASE, CAST(NULL AS NUMBER) AS PCT_THRESHOLD, CAST(NULL AS NUMBER) AS INCLUDE_COLUMN, CAST(NULL AS NUMBER) AS FREELISTS, CAST(NULL AS NUMBER) AS FREELIST_GROUPS, CAST(NULL AS NUMBER) AS PCT_FREE, CAST(NULL AS CHAR(3)) AS LOGGING, CAST(NULL AS NUMBER) AS BLEVEL, CAST(NULL AS NUMBER) AS LEAF_BLOCKS, CAST(NULL AS NUMBER) AS DISTINCT_KEYS, CAST(NULL AS NUMBER) AS AVG_LEAF_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS AVG_DATA_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS CLUSTERING_FACTOR, CAST(STATUS AS CHAR(8)) AS STATUS, CAST(NULL AS NUMBER) AS NUM_ROWS, CAST(NULL AS NUMBER) AS SAMPLE_SIZE, CAST(NULL AS DATE) AS LAST_ANALYZED, CAST(DOP_DEGREE AS CHAR(40)) AS DEGREE, CAST(NULL AS CHAR(40)) AS INSTANCES, CAST(CASE WHEN A_PART_LEVEL = 0 THEN 'NO' ELSE 'YES' END AS CHAR(3)) AS PARTITIONED, CAST(NULL AS CHAR(1)) AS TEMPORARY, CAST(NULL AS CHAR(1)) AS "GENERATED", CAST(NULL AS CHAR(1)) AS SECONDARY, CAST(NULL AS CHAR(7)) AS BUFFER_POOL, CAST(NULL AS CHAR(7)) AS FLASH_CACHE, CAST(NULL AS CHAR(7)) AS CELL_FLASH_CACHE, CAST(NULL AS CHAR(3)) AS USER_STATS, CAST(NULL AS CHAR(15)) AS DURATION, CAST(NULL AS NUMBER) AS PCT_DIRECT_ACCESS, CAST(NULL AS CHAR(128)) AS ITYP_OWNER, CAST(NULL AS CHAR(128)) AS ITYP_NAME, CAST(NULL AS CHAR(1000)) AS PARAMETERS, CAST(NULL AS CHAR(3)) AS GLOBAL_STATS, CAST(NULL AS CHAR(12)) AS DOMIDX_STATUS, CAST(NULL AS CHAR(6)) AS DOMIDX_OPSTATUS, CAST(FUNCIDX_STATUS AS CHAR(8)) AS FUNCIDX_STATUS, CAST('NO' AS CHAR(3)) AS JOIN_INDEX, CAST(NULL AS CHAR(3)) AS IOT_REDUNDANT_PKEY_ELIM, CAST(DROPPED AS CHAR(3)) AS DROPPED, CAST(VISIBILITY AS CHAR(9)) AS VISIBILITY, CAST(NULL AS CHAR(14)) AS DOMIDX_MANAGEMENT, CAST(NULL AS CHAR(3)) AS SEGMENT_CREATED, CAST(NULL AS CHAR(3)) AS ORPHANED_ENTRIES, CAST(NULL AS CHAR(7)) AS INDEXING, CAST(NULL AS CHAR(3)) AS AUTO FROM (SELECT A.TENANT_ID AS TENANT_ID, DATABASE_NAME AS INDEX_OWNER, CASE WHEN (TABLE_TYPE = 5 AND B.DATABASE_NAME != '__recyclebin') THEN SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) WHEN (TABLE_TYPE = 5 AND B.DATABASE_NAME = '__recyclebin') THEN TABLE_NAME WHEN (TABLE_TYPE = 3 AND CONS_TAB.CONSTRAINT_NAME IS NULL) THEN CONCAT('t_pk_obpk_', A.TABLE_ID) ELSE (CONS_TAB.CONSTRAINT_NAME) END AS INDEX_NAME, CASE WHEN A.TABLE_TYPE = 5 AND A.INDEX_TYPE IN (15, 18, 21) THEN 'DOMAIN' WHEN A.TABLE_TYPE = 5 AND EXISTS ( SELECT 1 FROM OCEANBASE.__ALL_VIRTUAL_COLUMN T_COL_INDEX, OCEANBASE.__ALL_VIRTUAL_COLUMN T_COL_BASE WHERE T_COL_BASE.TABLE_ID = A.DATA_TABLE_ID AND T_COL_BASE.COLUMN_NAME = T_COL_INDEX.COLUMN_NAME AND T_COL_INDEX.TABLE_ID = A.TABLE_ID AND T_COL_BASE.TENANT_ID = A.TENANT_ID AND T_COL_INDEX.TENANT_ID = A.TENANT_ID AND (T_COL_BASE.COLUMN_FLAGS & 3) > 0 AND T_COL_INDEX.INDEX_POSITION != 0 ) THEN 'FUNCTION-BASED NORMAL' ELSE 'NORMAL' END AS INDEX_TYPE_NAME, DATABASE_NAME AS TABLE_OWNER, CASE WHEN (TABLE_TYPE = 3) THEN A.TABLE_ID ELSE A.DATA_TABLE_ID END AS TABLE_ID, A.TABLE_ID AS INDEX_ID, CASE WHEN TABLE_TYPE = 3 THEN 'UNIQUE' WHEN A.INDEX_TYPE IN (2, 4, 8) THEN 'UNIQUE' ELSE 'NONUNIQUE' END AS UNIQUENESS, CASE WHEN A.COMPRESS_FUNC_NAME = NULL THEN 'DISABLED' ELSE 'ENABLED' END AS COMPRESSION, CASE WHEN TABLE_TYPE = 3 THEN 'VALID' WHEN A.INDEX_STATUS = 1 THEN 'UNAVAILABLE' WHEN A.INDEX_STATUS = 2 THEN 'VALID' WHEN A.INDEX_STATUS = 3 THEN 'CHECKING' WHEN A.INDEX_STATUS = 4 THEN 'INELEGIBLE' WHEN A.INDEX_STATUS = 5 THEN 'ERROR' ELSE 'UNUSABLE' END AS STATUS, A.INDEX_TYPE AS A_INDEX_TYPE, A.PART_LEVEL AS A_PART_LEVEL, A.TABLE_TYPE AS A_TABLE_TYPE, CASE WHEN 0 = (SELECT COUNT(1) FROM OCEANBASE.__ALL_VIRTUAL_COLUMN WHERE TABLE_ID = A.TABLE_ID AND IS_HIDDEN = 0) THEN 'ENABLED' ELSE 'NULL' END AS FUNCIDX_STATUS, CASE WHEN B.IN_RECYCLEBIN = 1 THEN 'YES' ELSE 'NO' END AS DROPPED, CASE WHEN (A.INDEX_ATTRIBUTES_SET & 1) = 0 THEN 'VISIBLE' ELSE 'INVISIBLE' END AS VISIBILITY, A.TABLESPACE_ID, A.DOP AS DOP_DEGREE FROM OCEANBASE.__ALL_VIRTUAL_TABLE A JOIN OCEANBASE.__ALL_VIRTUAL_DATABASE B ON A.DATABASE_ID = B.DATABASE_ID AND A.TENANT_ID = B.TENANT_ID AND B.DATABASE_NAME != '__recyclebin' AND A.TABLE_MODE >> 12 & 15 in (0,1) LEFT JOIN OCEANBASE.__ALL_VIRTUAL_CONSTRAINT CONS_TAB ON CONS_TAB.TABLE_ID = A.TABLE_ID AND CONS_TAB.TENANT_ID = A.TENANT_ID AND CONS_TAB.CONSTRAINT_TYPE = 1 WHERE (A.TABLE_TYPE = 3 AND A.TABLE_MODE & 66048 = 0) OR (A.TABLE_TYPE = 5 AND A.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22)) ) C JOIN OCEANBASE.__ALL_VIRTUAL_TABLE D ON C.TABLE_ID = D.TABLE_ID AND C.TENANT_ID = D.TENANT_ID LEFT JOIN OCEANBASE.__ALL_VIRTUAL_TENANT_TABLESPACE TP ON C.TABLESPACE_ID = TP.TABLESPACE_ID AND TP.TENANT_ID = C.TENANT_ID )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -2410,7 +2410,7 @@ int ObInnerTableSchema::cdb_part_key_columns_schema(ObTableSchema &table_schema) table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(D.TENANT_ID AS SIGNED) AS CON_ID, CAST(D.DATABASE_NAME AS CHAR(128)) AS OWNER, CAST(T.TABLE_NAME AS CHAR(128)) AS NAME, CAST('TABLE' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS CHAR(4000)) AS COLUMN_NAME, CAST((C.PARTITION_KEY_POSITION & 255) AS SIGNED) AS COLUMN_POSITION, CAST(NULL AS SIGNED) AS COLLATED_COLUMN_ID FROM OCEANBASE.__ALL_VIRTUAL_COLUMN C, OCEANBASE.__ALL_VIRTUAL_TABLE T, OCEANBASE.__ALL_VIRTUAL_DATABASE D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND C.TABLE_ID = T.TABLE_ID AND T.DATABASE_ID = D.DATABASE_ID AND (C.PARTITION_KEY_POSITION & 255) > 0 AND T.TABLE_TYPE IN (3, 6, 8, 9) AND T.TABLE_MODE >> 12 & 15 in (0,1) UNION SELECT CAST(D.TENANT_ID AS SIGNED) AS CON_ID, CAST(D.DATABASE_NAME AS CHAR(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + POSITION('_' IN SUBSTR(T.TABLE_NAME, 7))) END AS CHAR(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS CHAR(4000)) AS COLUMN_NAME, CAST((C.PARTITION_KEY_POSITION & 255) AS SIGNED) AS COLUMN_POSITION, CAST(NULL AS SIGNED) AS COLLATED_COLUMN_ID FROM OCEANBASE.__ALL_VIRTUAL_COLUMN C, OCEANBASE.__ALL_VIRTUAL_TABLE T, OCEANBASE.__ALL_VIRTUAL_DATABASE D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 AND (C.PARTITION_KEY_POSITION & 255) > 0 UNION SELECT CAST(D.TENANT_ID AS SIGNED) AS CON_ID, CAST(D.DATABASE_NAME AS CHAR(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + POSITION('_' IN SUBSTR(T.TABLE_NAME, 7))) END AS CHAR(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS CHAR(4000)) AS COLUMN_NAME, CAST((C.PARTITION_KEY_POSITION & 255) AS SIGNED) AS COLUMN_POSITION, CAST(NULL AS SIGNED) AS COLLATED_COLUMN_ID FROM OCEANBASE.__ALL_VIRTUAL_COLUMN C, OCEANBASE.__ALL_VIRTUAL_TABLE T, OCEANBASE.__ALL_VIRTUAL_DATABASE D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE IN (1,2,10) AND (C.PARTITION_KEY_POSITION & 255) > 0 )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(D.TENANT_ID AS SIGNED) AS CON_ID, CAST(D.DATABASE_NAME AS CHAR(128)) AS OWNER, CAST(T.TABLE_NAME AS CHAR(128)) AS NAME, CAST('TABLE' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS CHAR(4000)) AS COLUMN_NAME, CAST((C.PARTITION_KEY_POSITION & 255) AS SIGNED) AS COLUMN_POSITION, CAST(NULL AS SIGNED) AS COLLATED_COLUMN_ID FROM OCEANBASE.__ALL_VIRTUAL_COLUMN C, OCEANBASE.__ALL_VIRTUAL_TABLE T, OCEANBASE.__ALL_VIRTUAL_DATABASE D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND C.TABLE_ID = T.TABLE_ID AND T.DATABASE_ID = D.DATABASE_ID AND (C.PARTITION_KEY_POSITION & 255) > 0 AND T.TABLE_TYPE IN (3, 6, 8, 9) AND T.TABLE_MODE >> 12 & 15 in (0,1) UNION SELECT CAST(D.TENANT_ID AS SIGNED) AS CON_ID, CAST(D.DATABASE_NAME AS CHAR(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + POSITION('_' IN SUBSTR(T.TABLE_NAME, 7))) END AS CHAR(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS CHAR(4000)) AS COLUMN_NAME, CAST((C.PARTITION_KEY_POSITION & 255) AS SIGNED) AS COLUMN_POSITION, CAST(NULL AS SIGNED) AS COLLATED_COLUMN_ID FROM OCEANBASE.__ALL_VIRTUAL_COLUMN C, OCEANBASE.__ALL_VIRTUAL_TABLE T, OCEANBASE.__ALL_VIRTUAL_DATABASE D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (17,19,20,22) AND (C.PARTITION_KEY_POSITION & 255) > 0 UNION SELECT CAST(D.TENANT_ID AS SIGNED) AS CON_ID, CAST(D.DATABASE_NAME AS CHAR(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + POSITION('_' IN SUBSTR(T.TABLE_NAME, 7))) END AS CHAR(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS CHAR(4000)) AS COLUMN_NAME, CAST((C.PARTITION_KEY_POSITION & 255) AS SIGNED) AS COLUMN_POSITION, CAST(NULL AS SIGNED) AS COLLATED_COLUMN_ID FROM OCEANBASE.__ALL_VIRTUAL_COLUMN C, OCEANBASE.__ALL_VIRTUAL_TABLE T, OCEANBASE.__ALL_VIRTUAL_DATABASE D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE IN (1,2,10,15,23,24) AND (C.PARTITION_KEY_POSITION & 255) > 0 )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -2460,7 +2460,7 @@ int ObInnerTableSchema::cdb_subpart_key_columns_schema(ObTableSchema &table_sche table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(D.TENANT_ID AS SIGNED) AS CON_ID, CAST(D.DATABASE_NAME AS CHAR(128)) AS OWNER, CAST(T.TABLE_NAME AS CHAR(128)) AS NAME, CAST('TABLE' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS CHAR(4000)) AS COLUMN_NAME, CAST((C.PARTITION_KEY_POSITION & 65280)/256 AS SIGNED) AS COLUMN_POSITION, CAST(NULL AS SIGNED) AS COLLATED_COLUMN_ID FROM OCEANBASE.__ALL_VIRTUAL_COLUMN C, OCEANBASE.__ALL_VIRTUAL_TABLE T, OCEANBASE.__ALL_VIRTUAL_DATABASE D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND C.TABLE_ID = T.TABLE_ID AND T.DATABASE_ID = D.DATABASE_ID AND (C.PARTITION_KEY_POSITION & 65280) > 0 AND T.TABLE_TYPE IN (3, 6, 8, 9) AND T.TABLE_MODE >> 12 & 15 in (0,1) UNION SELECT CAST(D.TENANT_ID AS SIGNED) AS CON_ID, CAST(D.DATABASE_NAME AS CHAR(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + POSITION('_' IN SUBSTR(T.TABLE_NAME, 7))) END AS CHAR(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS CHAR(4000)) AS COLUMN_NAME, CAST((C.PARTITION_KEY_POSITION & 65280)/256 AS SIGNED) AS COLUMN_POSITION, CAST(NULL AS SIGNED) AS COLLATED_COLUMN_ID FROM OCEANBASE.__ALL_VIRTUAL_COLUMN C, OCEANBASE.__ALL_VIRTUAL_TABLE T, OCEANBASE.__ALL_VIRTUAL_DATABASE D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 AND (C.PARTITION_KEY_POSITION & 65280) > 0 UNION SELECT CAST(D.TENANT_ID AS SIGNED) AS CON_ID, CAST(D.DATABASE_NAME AS CHAR(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + POSITION('_' IN SUBSTR(T.TABLE_NAME, 7))) END AS CHAR(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS CHAR(4000)) AS COLUMN_NAME, CAST((C.PARTITION_KEY_POSITION & 65280)/256 AS SIGNED) AS COLUMN_POSITION, CAST(NULL AS SIGNED) AS COLLATED_COLUMN_ID FROM OCEANBASE.__ALL_VIRTUAL_COLUMN C, OCEANBASE.__ALL_VIRTUAL_TABLE T, OCEANBASE.__ALL_VIRTUAL_DATABASE D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE IN (1,2,10) AND (C.PARTITION_KEY_POSITION & 65280) > 0 )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(D.TENANT_ID AS SIGNED) AS CON_ID, CAST(D.DATABASE_NAME AS CHAR(128)) AS OWNER, CAST(T.TABLE_NAME AS CHAR(128)) AS NAME, CAST('TABLE' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS CHAR(4000)) AS COLUMN_NAME, CAST((C.PARTITION_KEY_POSITION & 65280)/256 AS SIGNED) AS COLUMN_POSITION, CAST(NULL AS SIGNED) AS COLLATED_COLUMN_ID FROM OCEANBASE.__ALL_VIRTUAL_COLUMN C, OCEANBASE.__ALL_VIRTUAL_TABLE T, OCEANBASE.__ALL_VIRTUAL_DATABASE D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND C.TABLE_ID = T.TABLE_ID AND T.DATABASE_ID = D.DATABASE_ID AND (C.PARTITION_KEY_POSITION & 65280) > 0 AND T.TABLE_TYPE IN (3, 6, 8, 9) AND T.TABLE_MODE >> 12 & 15 in (0,1) UNION SELECT CAST(D.TENANT_ID AS SIGNED) AS CON_ID, CAST(D.DATABASE_NAME AS CHAR(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + POSITION('_' IN SUBSTR(T.TABLE_NAME, 7))) END AS CHAR(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS CHAR(4000)) AS COLUMN_NAME, CAST((C.PARTITION_KEY_POSITION & 65280)/256 AS SIGNED) AS COLUMN_POSITION, CAST(NULL AS SIGNED) AS COLLATED_COLUMN_ID FROM OCEANBASE.__ALL_VIRTUAL_COLUMN C, OCEANBASE.__ALL_VIRTUAL_TABLE T, OCEANBASE.__ALL_VIRTUAL_DATABASE D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (17,19,20,22) AND (C.PARTITION_KEY_POSITION & 65280) > 0 UNION SELECT CAST(D.TENANT_ID AS SIGNED) AS CON_ID, CAST(D.DATABASE_NAME AS CHAR(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + POSITION('_' IN SUBSTR(T.TABLE_NAME, 7))) END AS CHAR(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS CHAR(4000)) AS COLUMN_NAME, CAST((C.PARTITION_KEY_POSITION & 65280)/256 AS SIGNED) AS COLUMN_POSITION, CAST(NULL AS SIGNED) AS COLLATED_COLUMN_ID FROM OCEANBASE.__ALL_VIRTUAL_COLUMN C, OCEANBASE.__ALL_VIRTUAL_TABLE T, OCEANBASE.__ALL_VIRTUAL_DATABASE D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE IN (1,2,10,15,23,24) AND (C.PARTITION_KEY_POSITION & 65280) > 0 )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } diff --git a/src/share/inner_table/ob_inner_table_schema.21201_21250.cpp b/src/share/inner_table/ob_inner_table_schema.21201_21250.cpp index 9f815fef7d..54897d73bc 100644 --- a/src/share/inner_table/ob_inner_table_schema.21201_21250.cpp +++ b/src/share/inner_table/ob_inner_table_schema.21201_21250.cpp @@ -60,7 +60,7 @@ int ObInnerTableSchema::cdb_part_indexes_schema(ObTableSchema &table_schema) table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT /*+NO_USE_NL(I_T PKC)*/ CAST(I_T.TENANT_ID AS NUMBER) AS CON_ID, CAST(I_T.OWNER AS CHAR(128)) AS OWNER, CAST(I_T.INDEX_NAME AS CHAR(128)) AS INDEX_NAME, CAST(I_T.TABLE_NAME AS CHAR(128)) AS TABLE_NAME, CAST(CASE I_T.PART_FUNC_TYPE WHEN 0 THEN 'HASH' WHEN 1 THEN (CASE COMPATIBILITY_MODE WHEN 1 THEN 'HASH' ELSE 'KEY' END) WHEN 2 THEN (CASE COMPATIBILITY_MODE WHEN 1 THEN 'HASH' ELSE 'KEY' END) WHEN 3 THEN 'RANGE' WHEN 4 THEN (CASE COMPATIBILITY_MODE WHEN 1 THEN 'RANGE' ELSE 'RANGE COLUMNS' END) WHEN 5 THEN 'LIST' WHEN 6 THEN (CASE COMPATIBILITY_MODE WHEN 1 THEN 'LIST' ELSE 'LIST COLUMNS' END) WHEN 7 THEN 'RANGE' END AS CHAR(13)) AS PARTITIONING_TYPE, CAST(CASE WHEN I_T.PART_LEVEL < 2 THEN 'NONE' ELSE (CASE I_T.SUB_PART_FUNC_TYPE WHEN 0 THEN 'HASH' WHEN 1 THEN (CASE COMPATIBILITY_MODE WHEN 1 THEN 'HASH' ELSE 'KEY' END) WHEN 2 THEN (CASE COMPATIBILITY_MODE WHEN 1 THEN 'HASH' ELSE 'KEY' END) WHEN 3 THEN 'RANGE' WHEN 4 THEN (CASE COMPATIBILITY_MODE WHEN 1 THEN 'RANGE' ELSE 'RANGE COLUMNS' END) WHEN 5 THEN 'LIST' WHEN 6 THEN (CASE COMPATIBILITY_MODE WHEN 1 THEN 'LIST' ELSE 'LIST COLUMNS' END) WHEN 7 THEN 'RANGE' END) END AS CHAR(13)) AS SUBPARTITIONING_TYPE, CAST(I_T.PART_NUM AS SIGNED) AS PARTITION_COUNT, CAST(CASE WHEN (I_T.PART_LEVEL < 2 OR I_T.SUB_PART_TEMPLATE_FLAGS = 0) THEN 0 ELSE I_T.SUB_PART_NUM END AS SIGNED) AS DEF_SUBPARTITION_COUNT, CAST(PKC.PARTITIONING_KEY_COUNT AS SIGNED) AS PARTITIONING_KEY_COUNT, CAST(PKC.SUBPARTITIONING_KEY_COUNT AS SIGNED) AS SUBPARTITIONING_KEY_COUNT, CAST(CASE I_T.IS_LOCAL WHEN 1 THEN 'LOCAL' ELSE 'GLOBAL' END AS CHAR(6)) AS LOCALITY, CAST(CASE WHEN I_T.IS_LOCAL = 0 THEN 'PREFIXED' WHEN (I_T.IS_LOCAL = 1 AND LOCAL_PARTITIONED_PREFIX_INDEX.IS_PREFIXED = 1) THEN 'PREFIXED' ELSE 'NON_PREFIXED' END AS CHAR(12)) AS ALIGNMENT, CAST(TP.TABLESPACE_NAME AS CHAR(30)) AS DEF_TABLESPACE_NAME, CAST(0 AS SIGNED) AS DEF_PCT_FREE, CAST(0 AS SIGNED) AS DEF_INI_TRANS, CAST(0 AS SIGNED) AS DEF_MAX_TRANS, CAST(NULL AS CHAR(40)) AS DEF_INITIAL_EXTENT, CAST(NULL AS CHAR(40)) AS DEF_NEXT_EXTENT, CAST(NULL AS CHAR(40)) AS DEF_MIN_EXTENTS, CAST(NULL AS CHAR(40)) AS DEF_MAX_EXTENTS, CAST(NULL AS CHAR(40)) AS DEF_MAX_SIZE, CAST(NULL AS CHAR(40)) AS DEF_PCT_INCREASE, CAST(0 AS SIGNED) AS DEF_FREELISTS, CAST(0 AS SIGNED) AS DEF_FREELIST_GROUPS, CAST(NULL AS CHAR(7)) AS DEF_LOGGING, CAST(NULL AS CHAR(7)) AS DEF_BUFFER_POOL, CAST(NULL AS CHAR(7)) AS DEF_FLASH_CACHE, CAST(NULL AS CHAR(7)) AS DEF_CELL_FLASH_CACHE, CAST(NULL AS CHAR(1000)) AS DEF_PARAMETERS, CAST('NO' AS CHAR(1000)) AS "INTERVAL", CAST('NO' AS CHAR(3)) AS AUTOLIST, CAST(NULL AS CHAR(1000)) AS INTERVAL_SUBPARTITION, CAST(NULL AS CHAR(1000)) AS AUTOLIST_SUBPARTITION FROM (SELECT D.TENANT_ID, D.DATABASE_NAME AS OWNER, I.TABLE_ID AS INDEX_ID, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN I.TABLE_NAME ELSE SUBSTR(I.TABLE_NAME, 7 + POSITION('_' IN SUBSTR(I.TABLE_NAME, 7))) END AS CHAR(128)) AS INDEX_NAME, I.PART_LEVEL, I.PART_FUNC_TYPE, I.PART_NUM, I.SUB_PART_FUNC_TYPE, T.TABLE_NAME AS TABLE_NAME, T.SUB_PART_NUM, T.SUB_PART_TEMPLATE_FLAGS, T.TABLESPACE_ID, (CASE I.INDEX_TYPE WHEN 1 THEN 1 WHEN 2 THEN 1 WHEN 10 THEN 1 ELSE 0 END) AS IS_LOCAL, (CASE I.INDEX_TYPE WHEN 1 THEN T.TABLE_ID WHEN 2 THEN T.TABLE_ID WHEN 10 THEN T.TABLE_ID ELSE I.TABLE_ID END) AS JOIN_TABLE_ID FROM OCEANBASE.__ALL_VIRTUAL_TABLE I JOIN OCEANBASE.__ALL_VIRTUAL_TABLE T ON I.TENANT_ID = T.TENANT_ID AND I.DATA_TABLE_ID = T.TABLE_ID JOIN OCEANBASE.__ALL_VIRTUAL_DATABASE D ON T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID WHERE I.TABLE_TYPE = 5 AND I.PART_LEVEL != 0 AND T.TABLE_MODE >> 12 & 15 in (0,1) ) I_T JOIN OCEANBASE.__ALL_TENANT T ON I_T.TENANT_ID = T.TENANT_ID LEFT JOIN ( SELECT I.TENANT_ID, I.TABLE_ID AS INDEX_ID, 1 AS IS_PREFIXED FROM OCEANBASE.__ALL_VIRTUAL_TABLE I WHERE I.TABLE_TYPE = 5 AND I.INDEX_TYPE IN (1, 2, 10) AND I.PART_LEVEL != 0 AND NOT EXISTS (SELECT /*+NO_USE_NL(PART_COLUMNS INDEX_COLUMNS)*/ * FROM (SELECT * FROM OCEANBASE.__ALL_VIRTUAL_COLUMN C WHERE C.TABLE_ID = I.DATA_TABLE_ID AND C.TENANT_ID = I.TENANT_ID AND C.PARTITION_KEY_POSITION != 0 ) PART_COLUMNS LEFT JOIN (SELECT * FROM OCEANBASE.__ALL_VIRTUAL_COLUMN C WHERE C.TABLE_ID = I.TABLE_ID AND C.TENANT_ID = I.TENANT_ID AND C.INDEX_POSITION != 0 ) INDEX_COLUMNS ON PART_COLUMNS.COLUMN_ID = INDEX_COLUMNS.COLUMN_ID WHERE ((PART_COLUMNS.PARTITION_KEY_POSITION & 255) != 0 AND (INDEX_COLUMNS.INDEX_POSITION IS NULL OR (PART_COLUMNS.PARTITION_KEY_POSITION & 255) != INDEX_COLUMNS.INDEX_POSITION) ) OR ((PART_COLUMNS.PARTITION_KEY_POSITION & 65280)/256 != 0 AND (INDEX_COLUMNS.INDEX_POSITION IS NULL) ) ) ) LOCAL_PARTITIONED_PREFIX_INDEX ON I_T.TENANT_ID = LOCAL_PARTITIONED_PREFIX_INDEX.TENANT_ID AND I_T.INDEX_ID = LOCAL_PARTITIONED_PREFIX_INDEX.INDEX_ID JOIN (SELECT TENANT_ID, TABLE_ID, SUM(CASE WHEN (PARTITION_KEY_POSITION & 255) != 0 THEN 1 ELSE 0 END) AS PARTITIONING_KEY_COUNT, SUM(CASE WHEN (PARTITION_KEY_POSITION & 65280)/256 != 0 THEN 1 ELSE 0 END) AS SUBPARTITIONING_KEY_COUNT FROM OCEANBASE.__ALL_VIRTUAL_COLUMN GROUP BY TENANT_ID, TABLE_ID) PKC ON I_T.TENANT_ID = PKC.TENANT_ID AND I_T.JOIN_TABLE_ID = PKC.TABLE_ID LEFT JOIN OCEANBASE.__ALL_VIRTUAL_TENANT_TABLESPACE TP ON I_T.TENANT_ID = TP.TENANT_ID AND I_T.TABLESPACE_ID = TP.TABLESPACE_ID )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT /*+NO_USE_NL(I_T PKC)*/ CAST(I_T.TENANT_ID AS NUMBER) AS CON_ID, CAST(I_T.OWNER AS CHAR(128)) AS OWNER, CAST(I_T.INDEX_NAME AS CHAR(128)) AS INDEX_NAME, CAST(I_T.TABLE_NAME AS CHAR(128)) AS TABLE_NAME, CAST(CASE I_T.PART_FUNC_TYPE WHEN 0 THEN 'HASH' WHEN 1 THEN (CASE COMPATIBILITY_MODE WHEN 1 THEN 'HASH' ELSE 'KEY' END) WHEN 2 THEN (CASE COMPATIBILITY_MODE WHEN 1 THEN 'HASH' ELSE 'KEY' END) WHEN 3 THEN 'RANGE' WHEN 4 THEN (CASE COMPATIBILITY_MODE WHEN 1 THEN 'RANGE' ELSE 'RANGE COLUMNS' END) WHEN 5 THEN 'LIST' WHEN 6 THEN (CASE COMPATIBILITY_MODE WHEN 1 THEN 'LIST' ELSE 'LIST COLUMNS' END) WHEN 7 THEN 'RANGE' END AS CHAR(13)) AS PARTITIONING_TYPE, CAST(CASE WHEN I_T.PART_LEVEL < 2 THEN 'NONE' ELSE (CASE I_T.SUB_PART_FUNC_TYPE WHEN 0 THEN 'HASH' WHEN 1 THEN (CASE COMPATIBILITY_MODE WHEN 1 THEN 'HASH' ELSE 'KEY' END) WHEN 2 THEN (CASE COMPATIBILITY_MODE WHEN 1 THEN 'HASH' ELSE 'KEY' END) WHEN 3 THEN 'RANGE' WHEN 4 THEN (CASE COMPATIBILITY_MODE WHEN 1 THEN 'RANGE' ELSE 'RANGE COLUMNS' END) WHEN 5 THEN 'LIST' WHEN 6 THEN (CASE COMPATIBILITY_MODE WHEN 1 THEN 'LIST' ELSE 'LIST COLUMNS' END) WHEN 7 THEN 'RANGE' END) END AS CHAR(13)) AS SUBPARTITIONING_TYPE, CAST(I_T.PART_NUM AS SIGNED) AS PARTITION_COUNT, CAST(CASE WHEN (I_T.PART_LEVEL < 2 OR I_T.SUB_PART_TEMPLATE_FLAGS = 0) THEN 0 ELSE I_T.SUB_PART_NUM END AS SIGNED) AS DEF_SUBPARTITION_COUNT, CAST(PKC.PARTITIONING_KEY_COUNT AS SIGNED) AS PARTITIONING_KEY_COUNT, CAST(PKC.SUBPARTITIONING_KEY_COUNT AS SIGNED) AS SUBPARTITIONING_KEY_COUNT, CAST(CASE I_T.IS_LOCAL WHEN 1 THEN 'LOCAL' ELSE 'GLOBAL' END AS CHAR(6)) AS LOCALITY, CAST(CASE WHEN I_T.IS_LOCAL = 0 THEN 'PREFIXED' WHEN (I_T.IS_LOCAL = 1 AND LOCAL_PARTITIONED_PREFIX_INDEX.IS_PREFIXED = 1) THEN 'PREFIXED' ELSE 'NON_PREFIXED' END AS CHAR(12)) AS ALIGNMENT, CAST(TP.TABLESPACE_NAME AS CHAR(30)) AS DEF_TABLESPACE_NAME, CAST(0 AS SIGNED) AS DEF_PCT_FREE, CAST(0 AS SIGNED) AS DEF_INI_TRANS, CAST(0 AS SIGNED) AS DEF_MAX_TRANS, CAST(NULL AS CHAR(40)) AS DEF_INITIAL_EXTENT, CAST(NULL AS CHAR(40)) AS DEF_NEXT_EXTENT, CAST(NULL AS CHAR(40)) AS DEF_MIN_EXTENTS, CAST(NULL AS CHAR(40)) AS DEF_MAX_EXTENTS, CAST(NULL AS CHAR(40)) AS DEF_MAX_SIZE, CAST(NULL AS CHAR(40)) AS DEF_PCT_INCREASE, CAST(0 AS SIGNED) AS DEF_FREELISTS, CAST(0 AS SIGNED) AS DEF_FREELIST_GROUPS, CAST(NULL AS CHAR(7)) AS DEF_LOGGING, CAST(NULL AS CHAR(7)) AS DEF_BUFFER_POOL, CAST(NULL AS CHAR(7)) AS DEF_FLASH_CACHE, CAST(NULL AS CHAR(7)) AS DEF_CELL_FLASH_CACHE, CAST(NULL AS CHAR(1000)) AS DEF_PARAMETERS, CAST('NO' AS CHAR(1000)) AS "INTERVAL", CAST('NO' AS CHAR(3)) AS AUTOLIST, CAST(NULL AS CHAR(1000)) AS INTERVAL_SUBPARTITION, CAST(NULL AS CHAR(1000)) AS AUTOLIST_SUBPARTITION FROM (SELECT D.TENANT_ID, D.DATABASE_NAME AS OWNER, I.TABLE_ID AS INDEX_ID, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN I.TABLE_NAME ELSE SUBSTR(I.TABLE_NAME, 7 + POSITION('_' IN SUBSTR(I.TABLE_NAME, 7))) END AS CHAR(128)) AS INDEX_NAME, I.PART_LEVEL, I.PART_FUNC_TYPE, I.PART_NUM, I.SUB_PART_FUNC_TYPE, T.TABLE_NAME AS TABLE_NAME, T.SUB_PART_NUM, T.SUB_PART_TEMPLATE_FLAGS, T.TABLESPACE_ID, (CASE I.INDEX_TYPE WHEN 1 THEN 1 WHEN 2 THEN 1 WHEN 10 THEN 1 WHEN 15 THEN 1 WHEN 23 THEN 1 WHEN 24 THEN 1 ELSE 0 END) AS IS_LOCAL, (CASE I.INDEX_TYPE WHEN 1 THEN T.TABLE_ID WHEN 2 THEN T.TABLE_ID WHEN 10 THEN T.TABLE_ID WHEN 15 THEN T.TABLE_ID WHEN 23 THEN T.TABLE_ID WHEN 24 THEN T.TABLE_ID ELSE I.TABLE_ID END) AS JOIN_TABLE_ID FROM OCEANBASE.__ALL_VIRTUAL_TABLE I JOIN OCEANBASE.__ALL_VIRTUAL_TABLE T ON I.TENANT_ID = T.TENANT_ID AND I.DATA_TABLE_ID = T.TABLE_ID JOIN OCEANBASE.__ALL_VIRTUAL_DATABASE D ON T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID WHERE I.TABLE_TYPE = 5 AND I.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND I.PART_LEVEL != 0 AND T.TABLE_MODE >> 12 & 15 in (0,1) ) I_T JOIN OCEANBASE.__ALL_TENANT T ON I_T.TENANT_ID = T.TENANT_ID LEFT JOIN ( SELECT I.TENANT_ID, I.TABLE_ID AS INDEX_ID, 1 AS IS_PREFIXED FROM OCEANBASE.__ALL_VIRTUAL_TABLE I WHERE I.TABLE_TYPE = 5 AND I.INDEX_TYPE IN (1, 2, 10, 15, 23, 24) AND I.PART_LEVEL != 0 AND NOT EXISTS (SELECT /*+NO_USE_NL(PART_COLUMNS INDEX_COLUMNS)*/ * FROM (SELECT * FROM OCEANBASE.__ALL_VIRTUAL_COLUMN C WHERE C.TABLE_ID = I.DATA_TABLE_ID AND C.TENANT_ID = I.TENANT_ID AND C.PARTITION_KEY_POSITION != 0 ) PART_COLUMNS LEFT JOIN (SELECT * FROM OCEANBASE.__ALL_VIRTUAL_COLUMN C WHERE C.TABLE_ID = I.TABLE_ID AND C.TENANT_ID = I.TENANT_ID AND C.INDEX_POSITION != 0 ) INDEX_COLUMNS ON PART_COLUMNS.COLUMN_ID = INDEX_COLUMNS.COLUMN_ID WHERE ((PART_COLUMNS.PARTITION_KEY_POSITION & 255) != 0 AND (INDEX_COLUMNS.INDEX_POSITION IS NULL OR (PART_COLUMNS.PARTITION_KEY_POSITION & 255) != INDEX_COLUMNS.INDEX_POSITION) ) OR ((PART_COLUMNS.PARTITION_KEY_POSITION & 65280)/256 != 0 AND (INDEX_COLUMNS.INDEX_POSITION IS NULL) ) ) ) LOCAL_PARTITIONED_PREFIX_INDEX ON I_T.TENANT_ID = LOCAL_PARTITIONED_PREFIX_INDEX.TENANT_ID AND I_T.INDEX_ID = LOCAL_PARTITIONED_PREFIX_INDEX.INDEX_ID JOIN (SELECT TENANT_ID, TABLE_ID, SUM(CASE WHEN (PARTITION_KEY_POSITION & 255) != 0 THEN 1 ELSE 0 END) AS PARTITIONING_KEY_COUNT, SUM(CASE WHEN (PARTITION_KEY_POSITION & 65280)/256 != 0 THEN 1 ELSE 0 END) AS SUBPARTITIONING_KEY_COUNT FROM OCEANBASE.__ALL_VIRTUAL_COLUMN GROUP BY TENANT_ID, TABLE_ID) PKC ON I_T.TENANT_ID = PKC.TENANT_ID AND I_T.JOIN_TABLE_ID = PKC.TABLE_ID LEFT JOIN OCEANBASE.__ALL_VIRTUAL_TENANT_TABLESPACE TP ON I_T.TENANT_ID = TP.TENANT_ID AND I_T.TABLESPACE_ID = TP.TABLESPACE_ID )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -360,7 +360,7 @@ int ObInnerTableSchema::dba_part_key_columns_schema(ObTableSchema &table_schema) table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(D.DATABASE_NAME AS CHAR(128)) AS OWNER, CAST(T.TABLE_NAME AS CHAR(128)) AS NAME, CAST('TABLE' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS CHAR(4000)) AS COLUMN_NAME, CAST((C.PARTITION_KEY_POSITION & 255) AS SIGNED) AS COLUMN_POSITION, CAST(NULL AS SIGNED) AS COLLATED_COLUMN_ID FROM OCEANBASE.__ALL_COLUMN C, OCEANBASE.__ALL_TABLE T, OCEANBASE.__ALL_DATABASE D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND C.TABLE_ID = T.TABLE_ID AND T.DATABASE_ID = D.DATABASE_ID AND (C.PARTITION_KEY_POSITION & 255) > 0 AND T.TABLE_TYPE IN (3, 6) AND T.TABLE_MODE >> 12 & 15 in (0,1) AND C.TENANT_ID = 0 UNION SELECT CAST(D.DATABASE_NAME AS CHAR(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + POSITION('_' IN SUBSTR(T.TABLE_NAME, 7))) END AS CHAR(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS CHAR(4000)) AS COLUMN_NAME, CAST((C.PARTITION_KEY_POSITION & 255) AS SIGNED) AS COLUMN_POSITION, CAST(NULL AS SIGNED) AS COLLATED_COLUMN_ID FROM OCEANBASE.__ALL_COLUMN C, OCEANBASE.__ALL_TABLE T, OCEANBASE.__ALL_DATABASE D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 AND (C.PARTITION_KEY_POSITION & 255) > 0 AND C.TENANT_ID = 0 UNION SELECT CAST(D.DATABASE_NAME AS CHAR(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + POSITION('_' IN SUBSTR(T.TABLE_NAME, 7))) END AS CHAR(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS CHAR(4000)) AS COLUMN_NAME, CAST((C.PARTITION_KEY_POSITION & 255) AS SIGNED) AS COLUMN_POSITION, CAST(NULL AS SIGNED) AS COLLATED_COLUMN_ID FROM OCEANBASE.__ALL_COLUMN C, OCEANBASE.__ALL_TABLE T, OCEANBASE.__ALL_DATABASE D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE IN (1,2,10) AND (C.PARTITION_KEY_POSITION & 255) > 0 AND C.TENANT_ID = 0 )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(D.DATABASE_NAME AS CHAR(128)) AS OWNER, CAST(T.TABLE_NAME AS CHAR(128)) AS NAME, CAST('TABLE' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS CHAR(4000)) AS COLUMN_NAME, CAST((C.PARTITION_KEY_POSITION & 255) AS SIGNED) AS COLUMN_POSITION, CAST(NULL AS SIGNED) AS COLLATED_COLUMN_ID FROM OCEANBASE.__ALL_COLUMN C, OCEANBASE.__ALL_TABLE T, OCEANBASE.__ALL_DATABASE D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND C.TABLE_ID = T.TABLE_ID AND T.DATABASE_ID = D.DATABASE_ID AND (C.PARTITION_KEY_POSITION & 255) > 0 AND T.TABLE_TYPE IN (3, 6) AND T.TABLE_MODE >> 12 & 15 in (0,1) AND C.TENANT_ID = 0 UNION SELECT CAST(D.DATABASE_NAME AS CHAR(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + POSITION('_' IN SUBSTR(T.TABLE_NAME, 7))) END AS CHAR(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS CHAR(4000)) AS COLUMN_NAME, CAST((C.PARTITION_KEY_POSITION & 255) AS SIGNED) AS COLUMN_POSITION, CAST(NULL AS SIGNED) AS COLLATED_COLUMN_ID FROM OCEANBASE.__ALL_COLUMN C, OCEANBASE.__ALL_TABLE T, OCEANBASE.__ALL_DATABASE D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (17,19,20,22) AND (C.PARTITION_KEY_POSITION & 255) > 0 AND C.TENANT_ID = 0 UNION SELECT CAST(D.DATABASE_NAME AS CHAR(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + POSITION('_' IN SUBSTR(T.TABLE_NAME, 7))) END AS CHAR(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS CHAR(4000)) AS COLUMN_NAME, CAST((C.PARTITION_KEY_POSITION & 255) AS SIGNED) AS COLUMN_POSITION, CAST(NULL AS SIGNED) AS COLLATED_COLUMN_ID FROM OCEANBASE.__ALL_COLUMN C, OCEANBASE.__ALL_TABLE T, OCEANBASE.__ALL_DATABASE D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE IN (1,2,10,15,23,24) AND (C.PARTITION_KEY_POSITION & 255) > 0 AND C.TENANT_ID = 0 )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -410,7 +410,7 @@ int ObInnerTableSchema::dba_subpart_key_columns_schema(ObTableSchema &table_sche table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(D.DATABASE_NAME AS CHAR(128)) AS OWNER, CAST(T.TABLE_NAME AS CHAR(128)) AS NAME, CAST('TABLE' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS CHAR(4000)) AS COLUMN_NAME, CAST((C.PARTITION_KEY_POSITION & 65280)/256 AS SIGNED) AS COLUMN_POSITION, CAST(NULL AS SIGNED) AS COLLATED_COLUMN_ID FROM OCEANBASE.__ALL_COLUMN C, OCEANBASE.__ALL_TABLE T, OCEANBASE.__ALL_DATABASE D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND C.TABLE_ID = T.TABLE_ID AND T.DATABASE_ID = D.DATABASE_ID AND (C.PARTITION_KEY_POSITION & 65280) > 0 AND T.TABLE_TYPE IN (3, 6) AND T.TABLE_MODE >> 12 & 15 in (0,1) AND C.TENANT_ID = 0 UNION SELECT CAST(D.DATABASE_NAME AS CHAR(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + POSITION('_' IN SUBSTR(T.TABLE_NAME, 7))) END AS CHAR(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS CHAR(4000)) AS COLUMN_NAME, CAST((C.PARTITION_KEY_POSITION & 65280)/256 AS SIGNED) AS COLUMN_POSITION, CAST(NULL AS SIGNED) AS COLLATED_COLUMN_ID FROM OCEANBASE.__ALL_COLUMN C, OCEANBASE.__ALL_TABLE T, OCEANBASE.__ALL_DATABASE D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 AND (C.PARTITION_KEY_POSITION & 65280) > 0 AND C.TENANT_ID = 0 UNION SELECT CAST(D.DATABASE_NAME AS CHAR(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + POSITION('_' IN SUBSTR(T.TABLE_NAME, 7))) END AS CHAR(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS CHAR(4000)) AS COLUMN_NAME, CAST((C.PARTITION_KEY_POSITION & 65280)/256 AS SIGNED) AS COLUMN_POSITION, CAST(NULL AS SIGNED) AS COLLATED_COLUMN_ID FROM OCEANBASE.__ALL_COLUMN C, OCEANBASE.__ALL_TABLE T, OCEANBASE.__ALL_DATABASE D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE IN (1,2,10) AND (C.PARTITION_KEY_POSITION & 65280) > 0 AND C.TENANT_ID = 0 )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(D.DATABASE_NAME AS CHAR(128)) AS OWNER, CAST(T.TABLE_NAME AS CHAR(128)) AS NAME, CAST('TABLE' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS CHAR(4000)) AS COLUMN_NAME, CAST((C.PARTITION_KEY_POSITION & 65280)/256 AS SIGNED) AS COLUMN_POSITION, CAST(NULL AS SIGNED) AS COLLATED_COLUMN_ID FROM OCEANBASE.__ALL_COLUMN C, OCEANBASE.__ALL_TABLE T, OCEANBASE.__ALL_DATABASE D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND C.TABLE_ID = T.TABLE_ID AND T.DATABASE_ID = D.DATABASE_ID AND (C.PARTITION_KEY_POSITION & 65280) > 0 AND T.TABLE_TYPE IN (3, 6) AND T.TABLE_MODE >> 12 & 15 in (0,1) AND C.TENANT_ID = 0 UNION SELECT CAST(D.DATABASE_NAME AS CHAR(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + POSITION('_' IN SUBSTR(T.TABLE_NAME, 7))) END AS CHAR(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS CHAR(4000)) AS COLUMN_NAME, CAST((C.PARTITION_KEY_POSITION & 65280)/256 AS SIGNED) AS COLUMN_POSITION, CAST(NULL AS SIGNED) AS COLLATED_COLUMN_ID FROM OCEANBASE.__ALL_COLUMN C, OCEANBASE.__ALL_TABLE T, OCEANBASE.__ALL_DATABASE D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (17,19,20,22) AND (C.PARTITION_KEY_POSITION & 65280) > 0 AND C.TENANT_ID = 0 UNION SELECT CAST(D.DATABASE_NAME AS CHAR(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + POSITION('_' IN SUBSTR(T.TABLE_NAME, 7))) END AS CHAR(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS CHAR(4000)) AS COLUMN_NAME, CAST((C.PARTITION_KEY_POSITION & 65280)/256 AS SIGNED) AS COLUMN_POSITION, CAST(NULL AS SIGNED) AS COLLATED_COLUMN_ID FROM OCEANBASE.__ALL_COLUMN C, OCEANBASE.__ALL_TABLE T, OCEANBASE.__ALL_DATABASE D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE IN (1,2,10,15,23,24) AND (C.PARTITION_KEY_POSITION & 65280) > 0 AND C.TENANT_ID = 0 )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -610,7 +610,7 @@ int ObInnerTableSchema::dba_part_indexes_schema(ObTableSchema &table_schema) table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(I_T.OWNER AS CHAR(128)) AS OWNER, CAST(I_T.INDEX_NAME AS CHAR(128)) AS INDEX_NAME, CAST(I_T.TABLE_NAME AS CHAR(128)) AS TABLE_NAME, CAST(CASE I_T.PART_FUNC_TYPE WHEN 0 THEN 'HASH' WHEN 1 THEN 'KEY' WHEN 2 THEN 'KEY' WHEN 3 THEN 'RANGE' WHEN 4 THEN 'RANGE COLUMNS' WHEN 5 THEN 'LIST' WHEN 6 THEN 'LIST COLUMNS' WHEN 7 THEN 'RANGE' END AS CHAR(13)) AS PARTITIONING_TYPE, CAST(CASE WHEN I_T.PART_LEVEL < 2 THEN 'NONE' ELSE (CASE I_T.SUB_PART_FUNC_TYPE WHEN 0 THEN 'HASH' WHEN 1 THEN 'KEY' WHEN 2 THEN 'KEY' WHEN 3 THEN 'RANGE' WHEN 4 THEN 'RANGE COLUMNS' WHEN 5 THEN 'LIST' WHEN 6 THEN 'LIST COLUMNS' WHEN 7 THEN 'RANGE' END) END AS CHAR(13)) AS SUBPARTITIONING_TYPE, CAST(I_T.PART_NUM AS SIGNED) AS PARTITION_COUNT, CAST(CASE WHEN (I_T.PART_LEVEL < 2 OR I_T.SUB_PART_TEMPLATE_FLAGS = 0) THEN 0 ELSE I_T.SUB_PART_NUM END AS SIGNED) AS DEF_SUBPARTITION_COUNT, CAST(PKC.PARTITIONING_KEY_COUNT AS SIGNED) AS PARTITIONING_KEY_COUNT, CAST(PKC.SUBPARTITIONING_KEY_COUNT AS SIGNED) AS SUBPARTITIONING_KEY_COUNT, CAST(CASE I_T.IS_LOCAL WHEN 1 THEN 'LOCAL' ELSE 'GLOBAL' END AS CHAR(6)) AS LOCALITY, CAST(CASE WHEN I_T.IS_LOCAL = 0 THEN 'PREFIXED' WHEN (I_T.IS_LOCAL = 1 AND LOCAL_PARTITIONED_PREFIX_INDEX.IS_PREFIXED = 1) THEN 'PREFIXED' ELSE 'NON_PREFIXED' END AS CHAR(12)) AS ALIGNMENT, CAST(TP.TABLESPACE_NAME AS CHAR(30)) AS DEF_TABLESPACE_NAME, CAST(0 AS SIGNED) AS DEF_PCT_FREE, CAST(0 AS SIGNED) AS DEF_INI_TRANS, CAST(0 AS SIGNED) AS DEF_MAX_TRANS, CAST(NULL AS CHAR(40)) AS DEF_INITIAL_EXTENT, CAST(NULL AS CHAR(40)) AS DEF_NEXT_EXTENT, CAST(NULL AS CHAR(40)) AS DEF_MIN_EXTENTS, CAST(NULL AS CHAR(40)) AS DEF_MAX_EXTENTS, CAST(NULL AS CHAR(40)) AS DEF_MAX_SIZE, CAST(NULL AS CHAR(40)) AS DEF_PCT_INCREASE, CAST(0 AS SIGNED) AS DEF_FREELISTS, CAST(0 AS SIGNED) AS DEF_FREELIST_GROUPS, CAST(NULL AS CHAR(7)) AS DEF_LOGGING, CAST(NULL AS CHAR(7)) AS DEF_BUFFER_POOL, CAST(NULL AS CHAR(7)) AS DEF_FLASH_CACHE, CAST(NULL AS CHAR(7)) AS DEF_CELL_FLASH_CACHE, CAST(NULL AS CHAR(1000)) AS DEF_PARAMETERS, CAST('NO' AS CHAR(1000)) AS "INTERVAL", CAST('NO' AS CHAR(3)) AS AUTOLIST, CAST(NULL AS CHAR(1000)) AS INTERVAL_SUBPARTITION, CAST(NULL AS CHAR(1000)) AS AUTOLIST_SUBPARTITION FROM (SELECT D.TENANT_ID, D.DATABASE_NAME AS OWNER, I.TABLE_ID AS INDEX_ID, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN I.TABLE_NAME ELSE SUBSTR(I.TABLE_NAME, 7 + POSITION('_' IN SUBSTR(I.TABLE_NAME, 7))) END AS CHAR(128)) AS INDEX_NAME, I.PART_LEVEL, I.PART_FUNC_TYPE, I.PART_NUM, I.SUB_PART_FUNC_TYPE, T.TABLE_NAME AS TABLE_NAME, T.SUB_PART_NUM, T.SUB_PART_TEMPLATE_FLAGS, T.TABLESPACE_ID, (CASE I.INDEX_TYPE WHEN 1 THEN 1 WHEN 2 THEN 1 WHEN 10 THEN 1 ELSE 0 END) AS IS_LOCAL, (CASE I.INDEX_TYPE WHEN 1 THEN T.TABLE_ID WHEN 2 THEN T.TABLE_ID WHEN 10 THEN T.TABLE_ID ELSE I.TABLE_ID END) AS JOIN_TABLE_ID FROM OCEANBASE.__ALL_TABLE I JOIN OCEANBASE.__ALL_TABLE T ON I.TENANT_ID = T.TENANT_ID AND I.DATA_TABLE_ID = T.TABLE_ID JOIN OCEANBASE.__ALL_DATABASE D ON T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID WHERE I.TABLE_TYPE = 5 AND I.PART_LEVEL != 0 AND I.TABLE_MODE >> 12 & 15 in (0,1) ) I_T JOIN (SELECT TENANT_ID, TABLE_ID, SUM(CASE WHEN (PARTITION_KEY_POSITION & 255) != 0 THEN 1 ELSE 0 END) AS PARTITIONING_KEY_COUNT, SUM(CASE WHEN (PARTITION_KEY_POSITION & 65280)/256 != 0 THEN 1 ELSE 0 END) AS SUBPARTITIONING_KEY_COUNT FROM OCEANBASE.__ALL_COLUMN GROUP BY TENANT_ID, TABLE_ID) PKC ON I_T.TENANT_ID = PKC.TENANT_ID AND I_T.JOIN_TABLE_ID = PKC.TABLE_ID LEFT JOIN ( SELECT I.TENANT_ID, I.TABLE_ID AS INDEX_ID, 1 AS IS_PREFIXED FROM OCEANBASE.__ALL_TABLE I WHERE I.TABLE_TYPE = 5 AND I.INDEX_TYPE IN (1, 2, 10) AND I.PART_LEVEL != 0 AND I.TENANT_ID = 0 AND NOT EXISTS (SELECT * FROM (SELECT * FROM OCEANBASE.__ALL_COLUMN C WHERE C.TABLE_ID = I.DATA_TABLE_ID AND C.PARTITION_KEY_POSITION != 0 AND C.TENANT_ID = 0 ) PART_COLUMNS LEFT JOIN (SELECT * FROM OCEANBASE.__ALL_COLUMN C WHERE C.TABLE_ID = I.TABLE_ID AND C.TENANT_ID = 0 AND C.INDEX_POSITION != 0 ) INDEX_COLUMNS ON PART_COLUMNS.COLUMN_ID = INDEX_COLUMNS.COLUMN_ID WHERE ((PART_COLUMNS.PARTITION_KEY_POSITION & 255) != 0 AND (INDEX_COLUMNS.INDEX_POSITION IS NULL OR (PART_COLUMNS.PARTITION_KEY_POSITION & 255) != INDEX_COLUMNS.INDEX_POSITION) ) OR ((PART_COLUMNS.PARTITION_KEY_POSITION & 65280)/256 != 0 AND (INDEX_COLUMNS.INDEX_POSITION IS NULL) ) ) ) LOCAL_PARTITIONED_PREFIX_INDEX ON I_T.TENANT_ID = LOCAL_PARTITIONED_PREFIX_INDEX.TENANT_ID AND I_T.INDEX_ID = LOCAL_PARTITIONED_PREFIX_INDEX.INDEX_ID LEFT JOIN OCEANBASE.__ALL_TENANT_TABLESPACE TP ON I_T.TENANT_ID = TP.TENANT_ID AND I_T.TABLESPACE_ID = TP.TABLESPACE_ID WHERE I_T.TENANT_ID = 0 )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(I_T.OWNER AS CHAR(128)) AS OWNER, CAST(I_T.INDEX_NAME AS CHAR(128)) AS INDEX_NAME, CAST(I_T.TABLE_NAME AS CHAR(128)) AS TABLE_NAME, CAST(CASE I_T.PART_FUNC_TYPE WHEN 0 THEN 'HASH' WHEN 1 THEN 'KEY' WHEN 2 THEN 'KEY' WHEN 3 THEN 'RANGE' WHEN 4 THEN 'RANGE COLUMNS' WHEN 5 THEN 'LIST' WHEN 6 THEN 'LIST COLUMNS' WHEN 7 THEN 'RANGE' END AS CHAR(13)) AS PARTITIONING_TYPE, CAST(CASE WHEN I_T.PART_LEVEL < 2 THEN 'NONE' ELSE (CASE I_T.SUB_PART_FUNC_TYPE WHEN 0 THEN 'HASH' WHEN 1 THEN 'KEY' WHEN 2 THEN 'KEY' WHEN 3 THEN 'RANGE' WHEN 4 THEN 'RANGE COLUMNS' WHEN 5 THEN 'LIST' WHEN 6 THEN 'LIST COLUMNS' WHEN 7 THEN 'RANGE' END) END AS CHAR(13)) AS SUBPARTITIONING_TYPE, CAST(I_T.PART_NUM AS SIGNED) AS PARTITION_COUNT, CAST(CASE WHEN (I_T.PART_LEVEL < 2 OR I_T.SUB_PART_TEMPLATE_FLAGS = 0) THEN 0 ELSE I_T.SUB_PART_NUM END AS SIGNED) AS DEF_SUBPARTITION_COUNT, CAST(PKC.PARTITIONING_KEY_COUNT AS SIGNED) AS PARTITIONING_KEY_COUNT, CAST(PKC.SUBPARTITIONING_KEY_COUNT AS SIGNED) AS SUBPARTITIONING_KEY_COUNT, CAST(CASE I_T.IS_LOCAL WHEN 1 THEN 'LOCAL' ELSE 'GLOBAL' END AS CHAR(6)) AS LOCALITY, CAST(CASE WHEN I_T.IS_LOCAL = 0 THEN 'PREFIXED' WHEN (I_T.IS_LOCAL = 1 AND LOCAL_PARTITIONED_PREFIX_INDEX.IS_PREFIXED = 1) THEN 'PREFIXED' ELSE 'NON_PREFIXED' END AS CHAR(12)) AS ALIGNMENT, CAST(TP.TABLESPACE_NAME AS CHAR(30)) AS DEF_TABLESPACE_NAME, CAST(0 AS SIGNED) AS DEF_PCT_FREE, CAST(0 AS SIGNED) AS DEF_INI_TRANS, CAST(0 AS SIGNED) AS DEF_MAX_TRANS, CAST(NULL AS CHAR(40)) AS DEF_INITIAL_EXTENT, CAST(NULL AS CHAR(40)) AS DEF_NEXT_EXTENT, CAST(NULL AS CHAR(40)) AS DEF_MIN_EXTENTS, CAST(NULL AS CHAR(40)) AS DEF_MAX_EXTENTS, CAST(NULL AS CHAR(40)) AS DEF_MAX_SIZE, CAST(NULL AS CHAR(40)) AS DEF_PCT_INCREASE, CAST(0 AS SIGNED) AS DEF_FREELISTS, CAST(0 AS SIGNED) AS DEF_FREELIST_GROUPS, CAST(NULL AS CHAR(7)) AS DEF_LOGGING, CAST(NULL AS CHAR(7)) AS DEF_BUFFER_POOL, CAST(NULL AS CHAR(7)) AS DEF_FLASH_CACHE, CAST(NULL AS CHAR(7)) AS DEF_CELL_FLASH_CACHE, CAST(NULL AS CHAR(1000)) AS DEF_PARAMETERS, CAST('NO' AS CHAR(1000)) AS "INTERVAL", CAST('NO' AS CHAR(3)) AS AUTOLIST, CAST(NULL AS CHAR(1000)) AS INTERVAL_SUBPARTITION, CAST(NULL AS CHAR(1000)) AS AUTOLIST_SUBPARTITION FROM (SELECT D.TENANT_ID, D.DATABASE_NAME AS OWNER, I.TABLE_ID AS INDEX_ID, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN I.TABLE_NAME ELSE SUBSTR(I.TABLE_NAME, 7 + POSITION('_' IN SUBSTR(I.TABLE_NAME, 7))) END AS CHAR(128)) AS INDEX_NAME, I.PART_LEVEL, I.PART_FUNC_TYPE, I.PART_NUM, I.SUB_PART_FUNC_TYPE, T.TABLE_NAME AS TABLE_NAME, T.SUB_PART_NUM, T.SUB_PART_TEMPLATE_FLAGS, T.TABLESPACE_ID, (CASE I.INDEX_TYPE WHEN 1 THEN 1 WHEN 2 THEN 1 WHEN 10 THEN 1 WHEN 15 THEN 1 WHEN 23 THEN 1 WHEN 24 THEN 1 ELSE 0 END) AS IS_LOCAL, (CASE I.INDEX_TYPE WHEN 1 THEN T.TABLE_ID WHEN 2 THEN T.TABLE_ID WHEN 10 THEN T.TABLE_ID WHEN 15 THEN T.TABLE_ID WHEN 23 THEN T.TABLE_ID WHEN 24 THEN T.TABLE_ID ELSE I.TABLE_ID END) AS JOIN_TABLE_ID FROM OCEANBASE.__ALL_TABLE I JOIN OCEANBASE.__ALL_TABLE T ON I.TENANT_ID = T.TENANT_ID AND I.DATA_TABLE_ID = T.TABLE_ID JOIN OCEANBASE.__ALL_DATABASE D ON T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID WHERE I.TABLE_TYPE = 5 AND I.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND I.PART_LEVEL != 0 AND I.TABLE_MODE >> 12 & 15 in (0,1) ) I_T JOIN (SELECT TENANT_ID, TABLE_ID, SUM(CASE WHEN (PARTITION_KEY_POSITION & 255) != 0 THEN 1 ELSE 0 END) AS PARTITIONING_KEY_COUNT, SUM(CASE WHEN (PARTITION_KEY_POSITION & 65280)/256 != 0 THEN 1 ELSE 0 END) AS SUBPARTITIONING_KEY_COUNT FROM OCEANBASE.__ALL_COLUMN GROUP BY TENANT_ID, TABLE_ID) PKC ON I_T.TENANT_ID = PKC.TENANT_ID AND I_T.JOIN_TABLE_ID = PKC.TABLE_ID LEFT JOIN ( SELECT I.TENANT_ID, I.TABLE_ID AS INDEX_ID, 1 AS IS_PREFIXED FROM OCEANBASE.__ALL_TABLE I WHERE I.TABLE_TYPE = 5 AND I.INDEX_TYPE IN (1, 2, 10, 15, 23, 24) AND I.PART_LEVEL != 0 AND I.TENANT_ID = 0 AND NOT EXISTS (SELECT * FROM (SELECT * FROM OCEANBASE.__ALL_COLUMN C WHERE C.TABLE_ID = I.DATA_TABLE_ID AND C.PARTITION_KEY_POSITION != 0 AND C.TENANT_ID = 0 ) PART_COLUMNS LEFT JOIN (SELECT * FROM OCEANBASE.__ALL_COLUMN C WHERE C.TABLE_ID = I.TABLE_ID AND C.TENANT_ID = 0 AND C.INDEX_POSITION != 0 ) INDEX_COLUMNS ON PART_COLUMNS.COLUMN_ID = INDEX_COLUMNS.COLUMN_ID WHERE ((PART_COLUMNS.PARTITION_KEY_POSITION & 255) != 0 AND (INDEX_COLUMNS.INDEX_POSITION IS NULL OR (PART_COLUMNS.PARTITION_KEY_POSITION & 255) != INDEX_COLUMNS.INDEX_POSITION) ) OR ((PART_COLUMNS.PARTITION_KEY_POSITION & 65280)/256 != 0 AND (INDEX_COLUMNS.INDEX_POSITION IS NULL) ) ) ) LOCAL_PARTITIONED_PREFIX_INDEX ON I_T.TENANT_ID = LOCAL_PARTITIONED_PREFIX_INDEX.TENANT_ID AND I_T.INDEX_ID = LOCAL_PARTITIONED_PREFIX_INDEX.INDEX_ID LEFT JOIN OCEANBASE.__ALL_TENANT_TABLESPACE TP ON I_T.TENANT_ID = TP.TENANT_ID AND I_T.TABLESPACE_ID = TP.TABLESPACE_ID WHERE I_T.TENANT_ID = 0 )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } diff --git a/src/share/inner_table/ob_inner_table_schema.21251_21300.cpp b/src/share/inner_table/ob_inner_table_schema.21251_21300.cpp index 9e28659010..265b8cfb3e 100644 --- a/src/share/inner_table/ob_inner_table_schema.21251_21300.cpp +++ b/src/share/inner_table/ob_inner_table_schema.21251_21300.cpp @@ -510,7 +510,7 @@ int ObInnerTableSchema::dba_ind_statistics_schema(ObTableSchema &table_schema) table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__(SELECT CAST(DB.DATABASE_NAME AS CHAR(128)) AS OWNER, CAST(V.INDEX_NAME AS CHAR(128)) AS INDEX_NAME, CAST(DB.DATABASE_NAME AS CHAR(128)) AS TABLE_OWNER, CAST(T.TABLE_NAME AS CHAR(128)) AS TABLE_NAME, CAST(V.PARTITION_NAME AS CHAR(128)) AS PARTITION_NAME, CAST(V.PARTITION_POSITION AS NUMBER) AS PARTITION_POSITION, CAST(V.SUBPARTITION_NAME AS CHAR(128)) AS SUBPARTITION_NAME, CAST(V.SUBPARTITION_POSITION AS NUMBER) AS SUBPARTITION_POSITION, CAST(V.OBJECT_TYPE AS CHAR(12)) AS OBJECT_TYPE, CAST(NULL AS NUMBER) AS BLEVEL, CAST(NULL AS NUMBER) AS LEAF_BLOCKS, CAST(NULL AS NUMBER) AS DISTINCT_KEYS, CAST(NULL AS NUMBER) AS AVG_LEAF_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS AVG_DATA_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS CLUSTERING_FACTOR, CAST(STAT.ROW_CNT AS NUMBER) AS NUM_ROWS, CAST(NULL AS NUMBER) AS AVG_CACHED_BLOCKS, CAST(NULL AS NUMBER) AS AVG_CACHE_HIT_RATIO, CAST(NULL AS NUMBER) AS SAMPLE_SIZE, CAST(STAT.LAST_ANALYZED AS DATETIME(6)) AS LAST_ANALYZED, CAST((CASE STAT.GLOBAL_STATS WHEN 0 THEN 'NO' WHEN 1 THEN 'YES' ELSE NULL END) AS CHAR(3)) AS GLOBAL_STATS, CAST((CASE STAT.USER_STATS WHEN 0 THEN 'NO' WHEN 1 THEN 'YES' ELSE NULL END) AS CHAR(3)) AS USER_STATS, CAST((CASE WHEN STAT.STATTYPE_LOCKED & 15 IS NULL THEN NULL ELSE (CASE STAT.STATTYPE_LOCKED & 15 WHEN 0 THEN NULL WHEN 1 THEN 'DATA' WHEN 2 THEN 'CACHE' ELSE 'ALL' END) END) AS CHAR(5)) AS STATTYPE_LOCKED, CAST((CASE STAT.STALE_STATS WHEN 0 THEN 'NO' WHEN 1 THEN 'YES' ELSE NULL END) AS CHAR(3)) AS STALE_STATS, CAST(NULL AS CHAR(7)) AS SCOPE FROM ( (SELECT CAST(0 AS SIGNED) AS TENANT_ID, DATABASE_ID, TABLE_ID, DATA_TABLE_ID, -2 AS PARTITION_ID, SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) AS INDEX_NAME, NULL AS PARTITION_NAME, NULL AS SUBPARTITION_NAME, NULL AS PARTITION_POSITION, NULL AS SUBPARTITION_POSITION, 'INDEX' AS OBJECT_TYPE FROM OCEANBASE.__ALL_VIRTUAL_CORE_ALL_TABLE T WHERE T.TABLE_TYPE = 5 AND T.TENANT_ID = EFFECTIVE_TENANT_ID() UNION ALL SELECT TENANT_ID, DATABASE_ID, TABLE_ID, DATA_TABLE_ID, CASE WHEN PART_LEVEL = 0 THEN -2 ELSE -1 END AS PARTITION_ID, SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) AS INDEX_NAME, NULL AS PARTITION_NAME, NULL AS SUBPARTITION_NAME, NULL AS PARTITION_POSITION, NULL AS SUBPARTITION_POSITION, 'INDEX' AS OBJECT_TYPE FROM oceanbase.__all_table T WHERE T.TABLE_TYPE = 5 AND T.TABLE_MODE >> 12 & 15 in (0,1)) UNION ALL SELECT T.TENANT_ID, T.DATABASE_ID, T.TABLE_ID, T.DATA_TABLE_ID, P.PART_ID, SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) AS INDEX_NAME, P.PART_NAME, NULL, P.PART_IDX + 1, NULL, 'PARTITION' FROM oceanbase.__all_table T JOIN oceanbase.__all_part P ON T.TENANT_ID = P.TENANT_ID AND T.TABLE_ID = P.TABLE_ID WHERE T.TABLE_TYPE = 5 UNION ALL SELECT T.TENANT_ID, T.DATABASE_ID, T.TABLE_ID, T.DATA_TABLE_ID, SP.SUB_PART_ID AS PARTITION_ID, SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) AS INDEX_NAME, P.PART_NAME, SP.SUB_PART_NAME, P.PART_IDX + 1, SP.SUB_PART_IDX + 1, 'SUBPARTITION' FROM oceanbase.__all_table T JOIN oceanbase.__all_part P ON T.TENANT_ID = P.TENANT_ID AND T.TABLE_ID = P.TABLE_ID JOIN oceanbase.__all_sub_part SP ON T.TENANT_ID = SP.TENANT_ID AND T.TABLE_ID = SP.TABLE_ID AND P.PART_ID = SP.PART_ID WHERE T.TABLE_TYPE = 5 ) V JOIN oceanbase.__all_table T ON T.TABLE_ID = V.DATA_TABLE_ID AND T.TENANT_ID = V.TENANT_ID AND T.DATABASE_ID = V.DATABASE_ID JOIN oceanbase.__all_database DB ON DB.TENANT_ID = V.TENANT_ID AND DB.DATABASE_ID = V.DATABASE_ID AND V.TENANT_ID = 0 LEFT JOIN oceanbase.__all_table_stat STAT ON V.TENANT_ID = STAT.TENANT_ID AND V.TABLE_ID = STAT.TABLE_ID AND (V.PARTITION_ID = STAT.PARTITION_ID OR V.PARTITION_ID = -2) AND STAT.INDEX_TYPE = 1 )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__(SELECT CAST(DB.DATABASE_NAME AS CHAR(128)) AS OWNER, CAST(V.INDEX_NAME AS CHAR(128)) AS INDEX_NAME, CAST(DB.DATABASE_NAME AS CHAR(128)) AS TABLE_OWNER, CAST(T.TABLE_NAME AS CHAR(128)) AS TABLE_NAME, CAST(V.PARTITION_NAME AS CHAR(128)) AS PARTITION_NAME, CAST(V.PARTITION_POSITION AS NUMBER) AS PARTITION_POSITION, CAST(V.SUBPARTITION_NAME AS CHAR(128)) AS SUBPARTITION_NAME, CAST(V.SUBPARTITION_POSITION AS NUMBER) AS SUBPARTITION_POSITION, CAST(V.OBJECT_TYPE AS CHAR(12)) AS OBJECT_TYPE, CAST(NULL AS NUMBER) AS BLEVEL, CAST(NULL AS NUMBER) AS LEAF_BLOCKS, CAST(NULL AS NUMBER) AS DISTINCT_KEYS, CAST(NULL AS NUMBER) AS AVG_LEAF_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS AVG_DATA_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS CLUSTERING_FACTOR, CAST(STAT.ROW_CNT AS NUMBER) AS NUM_ROWS, CAST(NULL AS NUMBER) AS AVG_CACHED_BLOCKS, CAST(NULL AS NUMBER) AS AVG_CACHE_HIT_RATIO, CAST(NULL AS NUMBER) AS SAMPLE_SIZE, CAST(STAT.LAST_ANALYZED AS DATETIME(6)) AS LAST_ANALYZED, CAST((CASE STAT.GLOBAL_STATS WHEN 0 THEN 'NO' WHEN 1 THEN 'YES' ELSE NULL END) AS CHAR(3)) AS GLOBAL_STATS, CAST((CASE STAT.USER_STATS WHEN 0 THEN 'NO' WHEN 1 THEN 'YES' ELSE NULL END) AS CHAR(3)) AS USER_STATS, CAST((CASE WHEN STAT.STATTYPE_LOCKED & 15 IS NULL THEN NULL ELSE (CASE STAT.STATTYPE_LOCKED & 15 WHEN 0 THEN NULL WHEN 1 THEN 'DATA' WHEN 2 THEN 'CACHE' ELSE 'ALL' END) END) AS CHAR(5)) AS STATTYPE_LOCKED, CAST((CASE STAT.STALE_STATS WHEN 0 THEN 'NO' WHEN 1 THEN 'YES' ELSE NULL END) AS CHAR(3)) AS STALE_STATS, CAST(NULL AS CHAR(7)) AS SCOPE FROM ( (SELECT CAST(0 AS SIGNED) AS TENANT_ID, DATABASE_ID, TABLE_ID, DATA_TABLE_ID, -2 AS PARTITION_ID, SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) AS INDEX_NAME, NULL AS PARTITION_NAME, NULL AS SUBPARTITION_NAME, NULL AS PARTITION_POSITION, NULL AS SUBPARTITION_POSITION, 'INDEX' AS OBJECT_TYPE FROM OCEANBASE.__ALL_VIRTUAL_CORE_ALL_TABLE T WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND T.TENANT_ID = EFFECTIVE_TENANT_ID() UNION ALL SELECT TENANT_ID, DATABASE_ID, TABLE_ID, DATA_TABLE_ID, CASE WHEN PART_LEVEL = 0 THEN -2 ELSE -1 END AS PARTITION_ID, SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) AS INDEX_NAME, NULL AS PARTITION_NAME, NULL AS SUBPARTITION_NAME, NULL AS PARTITION_POSITION, NULL AS SUBPARTITION_POSITION, 'INDEX' AS OBJECT_TYPE FROM oceanbase.__all_table T WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND T.TABLE_MODE >> 12 & 15 in (0,1)) UNION ALL SELECT T.TENANT_ID, T.DATABASE_ID, T.TABLE_ID, T.DATA_TABLE_ID, P.PART_ID, SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) AS INDEX_NAME, P.PART_NAME, NULL, P.PART_IDX + 1, NULL, 'PARTITION' FROM oceanbase.__all_table T JOIN oceanbase.__all_part P ON T.TENANT_ID = P.TENANT_ID AND T.TABLE_ID = P.TABLE_ID WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) UNION ALL SELECT T.TENANT_ID, T.DATABASE_ID, T.TABLE_ID, T.DATA_TABLE_ID, SP.SUB_PART_ID AS PARTITION_ID, SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) AS INDEX_NAME, P.PART_NAME, SP.SUB_PART_NAME, P.PART_IDX + 1, SP.SUB_PART_IDX + 1, 'SUBPARTITION' FROM oceanbase.__all_table T JOIN oceanbase.__all_part P ON T.TENANT_ID = P.TENANT_ID AND T.TABLE_ID = P.TABLE_ID JOIN oceanbase.__all_sub_part SP ON T.TENANT_ID = SP.TENANT_ID AND T.TABLE_ID = SP.TABLE_ID AND P.PART_ID = SP.PART_ID WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) ) V JOIN oceanbase.__all_table T ON T.TABLE_ID = V.DATA_TABLE_ID AND T.TENANT_ID = V.TENANT_ID AND T.DATABASE_ID = V.DATABASE_ID JOIN oceanbase.__all_database DB ON DB.TENANT_ID = V.TENANT_ID AND DB.DATABASE_ID = V.DATABASE_ID AND V.TENANT_ID = 0 LEFT JOIN oceanbase.__all_table_stat STAT ON V.TENANT_ID = STAT.TENANT_ID AND V.TABLE_ID = STAT.TABLE_ID AND (V.PARTITION_ID = STAT.PARTITION_ID OR V.PARTITION_ID = -2) AND STAT.INDEX_TYPE = 1 )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } diff --git a/src/share/inner_table/ob_inner_table_schema.25001_25050.cpp b/src/share/inner_table/ob_inner_table_schema.25001_25050.cpp index 10f8562281..6e737401a8 100644 --- a/src/share/inner_table/ob_inner_table_schema.25001_25050.cpp +++ b/src/share/inner_table/ob_inner_table_schema.25001_25050.cpp @@ -1810,7 +1810,7 @@ int ObInnerTableSchema::dba_indexes_schema(ObTableSchema &table_schema) table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(INDEX_OWNER AS VARCHAR2(128)) AS OWNER, CAST(INDEX_NAME AS VARCHAR2(128)) AS INDEX_NAME, CAST(INDEX_TYPE_NAME AS VARCHAR2(27)) AS INDEX_TYPE, CAST(TABLE_OWNER AS VARCHAR2(128)) AS TABLE_OWNER, CAST(TABLE_NAME AS VARCHAR2(128)) AS TABLE_NAME, CAST('TABLE' AS CHAR(5)) AS TABLE_TYPE, CAST(UNIQUENESS AS VARCHAR2(9)) AS UNIQUENESS, CAST(COMPRESSION AS VARCHAR2(13)) AS COMPRESSION, CAST(NULL AS NUMBER) AS PREFIX_LENGTH, CAST(TABLESPACE_NAME AS VARCHAR2(30)) AS TABLESPACE_NAME, CAST(NULL AS NUMBER) AS INI_TRANS, CAST(NULL AS NUMBER) AS MAX_TRANS, CAST(NULL AS NUMBER) AS INITIAL_EXTENT, CAST(NULL AS NUMBER) AS NEXT_EXTENT, CAST(NULL AS NUMBER) AS MIN_EXTENTS, CAST(NULL AS NUMBER) AS MAX_EXTENTS, CAST(NULL AS NUMBER) AS PCT_INCREASE, CAST(NULL AS NUMBER) AS PCT_THRESHOLD, CAST(NULL AS NUMBER) AS INCLUDE_COLUMN, CAST(NULL AS NUMBER) AS FREELISTS, CAST(NULL AS NUMBER) AS FREELIST_GROUPS, CAST(NULL AS NUMBER) AS PCT_FREE, CAST(NULL AS VARCHAR2(3)) AS LOGGING, CAST(NULL AS NUMBER) AS BLEVEL, CAST(NULL AS NUMBER) AS LEAF_BLOCKS, CAST(NULL AS NUMBER) AS DISTINCT_KEYS, CAST(NULL AS NUMBER) AS AVG_LEAF_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS AVG_DATA_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS CLUSTERING_FACTOR, CAST(STATUS AS VARCHAR2(8)) AS STATUS, CAST(NULL AS NUMBER) AS NUM_ROWS, CAST(NULL AS NUMBER) AS SAMPLE_SIZE, CAST(NULL AS DATE) AS LAST_ANALYZED, CAST(DOP_DEGREE AS VARCHAR2(40)) AS DEGREE, CAST(NULL AS VARCHAR2(40)) AS INSTANCES, CAST(CASE WHEN A_PART_LEVEL = 0 THEN 'NO' ELSE 'YES' END AS VARCHAR2(3)) AS PARTITIONED, CAST(NULL AS VARCHAR2(1)) AS TEMPORARY, CAST(NULL AS VARCHAR2(1)) AS "GENERATED", CAST(NULL AS VARCHAR2(1)) AS SECONDARY, CAST(NULL AS VARCHAR2(7)) AS BUFFER_POOL, CAST(NULL AS VARCHAR2(7)) AS FLASH_CACHE, CAST(NULL AS VARCHAR2(7)) AS CELL_FLASH_CACHE, CAST(NULL AS VARCHAR2(3)) AS USER_STATS, CAST(NULL AS VARCHAR2(15)) AS DURATION, CAST(NULL AS NUMBER) AS PCT_DIRECT_ACCESS, CAST(NULL AS VARCHAR2(128)) AS ITYP_OWNER, CAST(NULL AS VARCHAR2(128)) AS ITYP_NAME, CAST(NULL AS VARCHAR2(1000)) AS PARAMETERS, CAST(NULL AS VARCHAR2(3)) AS GLOBAL_STATS, CAST(NULL AS VARCHAR2(12)) AS DOMIDX_STATUS, CAST(NULL AS VARCHAR2(6)) AS DOMIDX_OPSTATUS, CAST(FUNCIDX_STATUS AS VARCHAR2(8)) AS FUNCIDX_STATUS, CAST('NO' AS VARCHAR2(3)) AS JOIN_INDEX, CAST(NULL AS VARCHAR2(3)) AS IOT_REDUNDANT_PKEY_ELIM, CAST(DROPPED AS VARCHAR2(3)) AS DROPPED, CAST(VISIBILITY AS VARCHAR2(9)) AS VISIBILITY, CAST(NULL AS VARCHAR2(14)) AS DOMIDX_MANAGEMENT, CAST(NULL AS VARCHAR2(3)) AS SEGMENT_CREATED, CAST(NULL AS VARCHAR2(3)) AS ORPHANED_ENTRIES, CAST(NULL AS VARCHAR2(7)) AS INDEXING, CAST(NULL AS VARCHAR2(3)) AS AUTO FROM (SELECT A.TENANT_ID, DATABASE_NAME AS INDEX_OWNER, CASE WHEN (TABLE_TYPE = 5 AND B.DATABASE_NAME != '__recyclebin') THEN SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) WHEN (TABLE_TYPE = 5 AND B.DATABASE_NAME = '__recyclebin') THEN TABLE_NAME ELSE (CONS_TAB.CONSTRAINT_NAME) END AS INDEX_NAME, CASE WHEN A.TABLE_TYPE = 5 AND EXISTS ( SELECT 1 FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT T_COL_INDEX, SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT T_COL_BASE WHERE T_COL_BASE.TABLE_ID = A.DATA_TABLE_ID AND T_COL_BASE.COLUMN_NAME = T_COL_INDEX.COLUMN_NAME AND T_COL_INDEX.TABLE_ID = A.TABLE_ID AND T_COL_BASE.TENANT_ID = A.TENANT_ID AND T_COL_INDEX.TENANT_ID = A.TENANT_ID AND BITAND(T_COL_BASE.COLUMN_FLAGS,3) > 0 AND T_COL_INDEX.INDEX_POSITION != 0 ) THEN 'FUNCTION-BASED NORMAL' ELSE 'NORMAL' END AS INDEX_TYPE_NAME, DATABASE_NAME AS TABLE_OWNER, CASE WHEN (TABLE_TYPE IN (3, 15)) THEN A.TABLE_ID ELSE A.DATA_TABLE_ID END AS TABLE_ID, A.TABLE_ID AS INDEX_ID, CASE WHEN TABLE_TYPE = 3 THEN 'UNIQUE' WHEN A.INDEX_TYPE IN (2, 4, 8) THEN 'UNIQUE' ELSE 'NONUNIQUE' END AS UNIQUENESS, CASE WHEN A.COMPRESS_FUNC_NAME = NULL THEN 'DISABLED' ELSE 'ENABLED' END AS COMPRESSION, CASE WHEN TABLE_TYPE IN (3, 15) THEN 'VALID' WHEN A.INDEX_STATUS = 2 THEN 'VALID' WHEN A.INDEX_STATUS = 3 THEN 'CHECKING' WHEN A.INDEX_STATUS = 4 THEN 'INELEGIBLE' WHEN A.INDEX_STATUS = 5 THEN 'ERROR' ELSE 'UNUSABLE' END AS STATUS, A.INDEX_TYPE AS A_INDEX_TYPE, A.PART_LEVEL AS A_PART_LEVEL, A.TABLE_TYPE AS A_TABLE_TYPE, CASE WHEN 0 = (SELECT COUNT(1) FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT WHERE TABLE_ID = A.TABLE_ID AND IS_HIDDEN = 0 AND TENANT_ID = EFFECTIVE_TENANT_ID()) THEN 'ENABLED' ELSE 'NULL' END AS FUNCIDX_STATUS, CASE WHEN B.IN_RECYCLEBIN = 1 THEN 'YES' ELSE 'NO' END AS DROPPED, CASE WHEN BITAND(A.INDEX_ATTRIBUTES_SET, 1) = 0 THEN 'VISIBLE' ELSE 'INVISIBLE' END AS VISIBILITY, A.TABLESPACE_ID, A.DOP AS DOP_DEGREE FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT A JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT B ON A.DATABASE_ID = B.DATABASE_ID AND TABLE_TYPE IN (5, 3, 15) AND A.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((A.TABLE_MODE / 4096), 15) IN (0,1) AND B.TENANT_ID = EFFECTIVE_TENANT_ID() AND B.DATABASE_NAME != '__recyclebin' LEFT JOIN SYS.ALL_VIRTUAL_CONSTRAINT_REAL_AGENT CONS_TAB ON (CONS_TAB.TABLE_ID = A.TABLE_ID) AND CONS_TAB.TENANT_ID = EFFECTIVE_TENANT_ID() WHERE NOT(TABLE_TYPE = 3 AND CONSTRAINT_NAME IS NULL) AND (CONS_TAB.CONSTRAINT_TYPE IS NULL OR CONS_TAB.CONSTRAINT_TYPE = 1) ) C JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT D ON C.TABLE_ID = D.TABLE_ID AND C.TENANT_ID = D.TENANT_ID AND D.TABLE_TYPE != 12 AND D.TABLE_TYPE != 13 AND bitand((D.TABLE_MODE / 4096), 15) IN (0,1) LEFT JOIN SYS.ALL_VIRTUAL_TENANT_TABLESPACE_REAL_AGENT TP ON C.TABLESPACE_ID = TP.TABLESPACE_ID AND TP.TENANT_ID = EFFECTIVE_TENANT_ID() )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(INDEX_OWNER AS VARCHAR2(128)) AS OWNER, CAST(INDEX_NAME AS VARCHAR2(128)) AS INDEX_NAME, CAST(INDEX_TYPE_NAME AS VARCHAR2(27)) AS INDEX_TYPE, CAST(TABLE_OWNER AS VARCHAR2(128)) AS TABLE_OWNER, CAST(TABLE_NAME AS VARCHAR2(128)) AS TABLE_NAME, CAST('TABLE' AS CHAR(5)) AS TABLE_TYPE, CAST(UNIQUENESS AS VARCHAR2(9)) AS UNIQUENESS, CAST(COMPRESSION AS VARCHAR2(13)) AS COMPRESSION, CAST(NULL AS NUMBER) AS PREFIX_LENGTH, CAST(TABLESPACE_NAME AS VARCHAR2(30)) AS TABLESPACE_NAME, CAST(NULL AS NUMBER) AS INI_TRANS, CAST(NULL AS NUMBER) AS MAX_TRANS, CAST(NULL AS NUMBER) AS INITIAL_EXTENT, CAST(NULL AS NUMBER) AS NEXT_EXTENT, CAST(NULL AS NUMBER) AS MIN_EXTENTS, CAST(NULL AS NUMBER) AS MAX_EXTENTS, CAST(NULL AS NUMBER) AS PCT_INCREASE, CAST(NULL AS NUMBER) AS PCT_THRESHOLD, CAST(NULL AS NUMBER) AS INCLUDE_COLUMN, CAST(NULL AS NUMBER) AS FREELISTS, CAST(NULL AS NUMBER) AS FREELIST_GROUPS, CAST(NULL AS NUMBER) AS PCT_FREE, CAST(NULL AS VARCHAR2(3)) AS LOGGING, CAST(NULL AS NUMBER) AS BLEVEL, CAST(NULL AS NUMBER) AS LEAF_BLOCKS, CAST(NULL AS NUMBER) AS DISTINCT_KEYS, CAST(NULL AS NUMBER) AS AVG_LEAF_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS AVG_DATA_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS CLUSTERING_FACTOR, CAST(STATUS AS VARCHAR2(8)) AS STATUS, CAST(NULL AS NUMBER) AS NUM_ROWS, CAST(NULL AS NUMBER) AS SAMPLE_SIZE, CAST(NULL AS DATE) AS LAST_ANALYZED, CAST(DOP_DEGREE AS VARCHAR2(40)) AS DEGREE, CAST(NULL AS VARCHAR2(40)) AS INSTANCES, CAST(CASE WHEN A_PART_LEVEL = 0 THEN 'NO' ELSE 'YES' END AS VARCHAR2(3)) AS PARTITIONED, CAST(NULL AS VARCHAR2(1)) AS TEMPORARY, CAST(NULL AS VARCHAR2(1)) AS "GENERATED", CAST(NULL AS VARCHAR2(1)) AS SECONDARY, CAST(NULL AS VARCHAR2(7)) AS BUFFER_POOL, CAST(NULL AS VARCHAR2(7)) AS FLASH_CACHE, CAST(NULL AS VARCHAR2(7)) AS CELL_FLASH_CACHE, CAST(NULL AS VARCHAR2(3)) AS USER_STATS, CAST(NULL AS VARCHAR2(15)) AS DURATION, CAST(NULL AS NUMBER) AS PCT_DIRECT_ACCESS, CAST(NULL AS VARCHAR2(128)) AS ITYP_OWNER, CAST(NULL AS VARCHAR2(128)) AS ITYP_NAME, CAST(NULL AS VARCHAR2(1000)) AS PARAMETERS, CAST(NULL AS VARCHAR2(3)) AS GLOBAL_STATS, CAST(NULL AS VARCHAR2(12)) AS DOMIDX_STATUS, CAST(NULL AS VARCHAR2(6)) AS DOMIDX_OPSTATUS, CAST(FUNCIDX_STATUS AS VARCHAR2(8)) AS FUNCIDX_STATUS, CAST('NO' AS VARCHAR2(3)) AS JOIN_INDEX, CAST(NULL AS VARCHAR2(3)) AS IOT_REDUNDANT_PKEY_ELIM, CAST(DROPPED AS VARCHAR2(3)) AS DROPPED, CAST(VISIBILITY AS VARCHAR2(9)) AS VISIBILITY, CAST(NULL AS VARCHAR2(14)) AS DOMIDX_MANAGEMENT, CAST(NULL AS VARCHAR2(3)) AS SEGMENT_CREATED, CAST(NULL AS VARCHAR2(3)) AS ORPHANED_ENTRIES, CAST(NULL AS VARCHAR2(7)) AS INDEXING, CAST(NULL AS VARCHAR2(3)) AS AUTO FROM (SELECT A.TENANT_ID, DATABASE_NAME AS INDEX_OWNER, CASE WHEN (TABLE_TYPE = 5 AND B.DATABASE_NAME != '__recyclebin') THEN SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) WHEN (TABLE_TYPE = 5 AND B.DATABASE_NAME = '__recyclebin') THEN TABLE_NAME ELSE (CONS_TAB.CONSTRAINT_NAME) END AS INDEX_NAME, CASE WHEN A.TABLE_TYPE = 5 AND A.INDEX_TYPE IN (15, 18, 21) THEN 'DOMAIN' WHEN A.TABLE_TYPE = 5 AND EXISTS ( SELECT 1 FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT T_COL_INDEX, SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT T_COL_BASE WHERE T_COL_BASE.TABLE_ID = A.DATA_TABLE_ID AND T_COL_BASE.COLUMN_NAME = T_COL_INDEX.COLUMN_NAME AND T_COL_INDEX.TABLE_ID = A.TABLE_ID AND T_COL_BASE.TENANT_ID = A.TENANT_ID AND T_COL_INDEX.TENANT_ID = A.TENANT_ID AND BITAND(T_COL_BASE.COLUMN_FLAGS,3) > 0 AND T_COL_INDEX.INDEX_POSITION != 0 ) THEN 'FUNCTION-BASED NORMAL' ELSE 'NORMAL' END AS INDEX_TYPE_NAME, DATABASE_NAME AS TABLE_OWNER, CASE WHEN (TABLE_TYPE IN (3, 15)) THEN A.TABLE_ID ELSE A.DATA_TABLE_ID END AS TABLE_ID, A.TABLE_ID AS INDEX_ID, CASE WHEN TABLE_TYPE = 3 THEN 'UNIQUE' WHEN A.INDEX_TYPE IN (2, 4, 8) THEN 'UNIQUE' ELSE 'NONUNIQUE' END AS UNIQUENESS, CASE WHEN A.COMPRESS_FUNC_NAME = NULL THEN 'DISABLED' ELSE 'ENABLED' END AS COMPRESSION, CASE WHEN TABLE_TYPE IN (3, 15) THEN 'VALID' WHEN A.INDEX_STATUS = 2 THEN 'VALID' WHEN A.INDEX_STATUS = 3 THEN 'CHECKING' WHEN A.INDEX_STATUS = 4 THEN 'INELEGIBLE' WHEN A.INDEX_STATUS = 5 THEN 'ERROR' ELSE 'UNUSABLE' END AS STATUS, A.INDEX_TYPE AS A_INDEX_TYPE, A.PART_LEVEL AS A_PART_LEVEL, A.TABLE_TYPE AS A_TABLE_TYPE, CASE WHEN 0 = (SELECT COUNT(1) FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT WHERE TABLE_ID = A.TABLE_ID AND IS_HIDDEN = 0 AND TENANT_ID = EFFECTIVE_TENANT_ID()) THEN 'ENABLED' ELSE 'NULL' END AS FUNCIDX_STATUS, CASE WHEN B.IN_RECYCLEBIN = 1 THEN 'YES' ELSE 'NO' END AS DROPPED, CASE WHEN BITAND(A.INDEX_ATTRIBUTES_SET, 1) = 0 THEN 'VISIBLE' ELSE 'INVISIBLE' END AS VISIBILITY, A.TABLESPACE_ID, A.DOP AS DOP_DEGREE FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT A JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT B ON A.DATABASE_ID = B.DATABASE_ID AND TABLE_TYPE IN (5, 3, 15) AND A.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND A.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((A.TABLE_MODE / 4096), 15) IN (0,1) AND B.TENANT_ID = EFFECTIVE_TENANT_ID() AND B.DATABASE_NAME != '__recyclebin' LEFT JOIN SYS.ALL_VIRTUAL_CONSTRAINT_REAL_AGENT CONS_TAB ON (CONS_TAB.TABLE_ID = A.TABLE_ID) AND CONS_TAB.TENANT_ID = EFFECTIVE_TENANT_ID() WHERE NOT(TABLE_TYPE = 3 AND CONSTRAINT_NAME IS NULL) AND (CONS_TAB.CONSTRAINT_TYPE IS NULL OR CONS_TAB.CONSTRAINT_TYPE = 1) ) C JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT D ON C.TABLE_ID = D.TABLE_ID AND C.TENANT_ID = D.TENANT_ID AND D.TABLE_TYPE != 12 AND D.TABLE_TYPE != 13 AND bitand((D.TABLE_MODE / 4096), 15) IN (0,1) LEFT JOIN SYS.ALL_VIRTUAL_TENANT_TABLESPACE_REAL_AGENT TP ON C.TABLESPACE_ID = TP.TABLESPACE_ID AND TP.TENANT_ID = EFFECTIVE_TENANT_ID() )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -1860,7 +1860,7 @@ int ObInnerTableSchema::all_indexes_schema(ObTableSchema &table_schema) table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(INDEX_OWNER AS VARCHAR2(128)) AS OWNER, CAST(INDEX_NAME AS VARCHAR2(128)) AS INDEX_NAME, CAST(INDEX_TYPE_NAME AS VARCHAR2(27)) AS INDEX_TYPE, CAST(TABLE_OWNER AS VARCHAR2(128)) AS TABLE_OWNER, CAST(TABLE_NAME AS VARCHAR2(128)) AS TABLE_NAME, CAST('TABLE' AS CHAR(5)) AS TABLE_TYPE, CAST(UNIQUENESS AS VARCHAR2(9)) AS UNIQUENESS, CAST(COMPRESSION AS VARCHAR2(13)) AS COMPRESSION, CAST(NULL AS NUMBER) AS PREFIX_LENGTH, CAST(TABLESPACE_NAME AS VARCHAR2(30)) AS TABLESPACE_NAME, CAST(NULL AS NUMBER) AS INI_TRANS, CAST(NULL AS NUMBER) AS MAX_TRANS, CAST(NULL AS NUMBER) AS INITIAL_EXTENT, CAST(NULL AS NUMBER) AS NEXT_EXTENT, CAST(NULL AS NUMBER) AS MIN_EXTENTS, CAST(NULL AS NUMBER) AS MAX_EXTENTS, CAST(NULL AS NUMBER) AS PCT_INCREASE, CAST(NULL AS NUMBER) AS PCT_THRESHOLD, CAST(NULL AS NUMBER) AS INCLUDE_COLUMN, CAST(NULL AS NUMBER) AS FREELISTS, CAST(NULL AS NUMBER) AS FREELIST_GROUPS, CAST(NULL AS NUMBER) AS PCT_FREE, CAST(NULL AS VARCHAR2(3)) AS LOGGING, CAST(NULL AS NUMBER) AS BLEVEL, CAST(NULL AS NUMBER) AS LEAF_BLOCKS, CAST(NULL AS NUMBER) AS DISTINCT_KEYS, CAST(NULL AS NUMBER) AS AVG_LEAF_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS AVG_DATA_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS CLUSTERING_FACTOR, CAST(STATUS AS VARCHAR2(8)) AS STATUS, CAST(NULL AS NUMBER) AS NUM_ROWS, CAST(NULL AS NUMBER) AS SAMPLE_SIZE, CAST(NULL AS DATE) AS LAST_ANALYZED, CAST(DOP_DEGREE AS VARCHAR2(40)) AS DEGREE, CAST(NULL AS VARCHAR2(40)) AS INSTANCES, CAST(CASE WHEN A_PART_LEVEL = 0 THEN 'NO' ELSE 'YES' END AS VARCHAR2(3)) AS PARTITIONED, CAST(NULL AS VARCHAR2(1)) AS TEMPORARY, CAST(NULL AS VARCHAR2(1)) AS "GENERATED", CAST(NULL AS VARCHAR2(1)) AS SECONDARY, CAST(NULL AS VARCHAR2(7)) AS BUFFER_POOL, CAST(NULL AS VARCHAR2(7)) AS FLASH_CACHE, CAST(NULL AS VARCHAR2(7)) AS CELL_FLASH_CACHE, CAST(NULL AS VARCHAR2(3)) AS USER_STATS, CAST(NULL AS VARCHAR2(15)) AS DURATION, CAST(NULL AS NUMBER) AS PCT_DIRECT_ACCESS, CAST(NULL AS VARCHAR2(128)) AS ITYP_OWNER, CAST(NULL AS VARCHAR2(128)) AS ITYP_NAME, CAST(NULL AS VARCHAR2(1000)) AS PARAMETERS, CAST(NULL AS VARCHAR2(3)) AS GLOBAL_STATS, CAST(NULL AS VARCHAR2(12)) AS DOMIDX_STATUS, CAST(NULL AS VARCHAR2(6)) AS DOMIDX_OPSTATUS, CAST(FUNCIDX_STATUS AS VARCHAR2(8)) AS FUNCIDX_STATUS, CAST('NO' AS VARCHAR2(3)) AS JOIN_INDEX, CAST(NULL AS VARCHAR2(3)) AS IOT_REDUNDANT_PKEY_ELIM, CAST(DROPPED AS VARCHAR2(3)) AS DROPPED, CAST(VISIBILITY AS VARCHAR2(9)) AS VISIBILITY, CAST(NULL AS VARCHAR2(14)) AS DOMIDX_MANAGEMENT, CAST(NULL AS VARCHAR2(3)) AS SEGMENT_CREATED, CAST(NULL AS VARCHAR2(3)) AS ORPHANED_ENTRIES, CAST(NULL AS VARCHAR2(7)) AS INDEXING, CAST(NULL AS VARCHAR2(3)) AS AUTO FROM (SELECT A.TENANT_ID, DATABASE_NAME AS INDEX_OWNER, CASE WHEN (TABLE_TYPE = 5 AND B.DATABASE_NAME != '__recyclebin') THEN SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) WHEN (TABLE_TYPE = 5 AND B.DATABASE_NAME = '__recyclebin') THEN TABLE_NAME ELSE (CONS_TAB.CONSTRAINT_NAME) END AS INDEX_NAME, CASE WHEN A.TABLE_TYPE = 5 AND EXISTS ( SELECT 1 FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT T_COL_INDEX, SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT T_COL_BASE WHERE T_COL_BASE.TABLE_ID = A.DATA_TABLE_ID AND T_COL_BASE.COLUMN_NAME = T_COL_INDEX.COLUMN_NAME AND T_COL_INDEX.TABLE_ID = A.TABLE_ID AND T_COL_BASE.TENANT_ID = A.TENANT_ID AND T_COL_INDEX.TENANT_ID = A.TENANT_ID AND BITAND(T_COL_BASE.COLUMN_FLAGS,3) > 0 AND T_COL_INDEX.INDEX_POSITION != 0 ) THEN 'FUNCTION-BASED NORMAL' ELSE 'NORMAL' END AS INDEX_TYPE_NAME, DATABASE_NAME AS TABLE_OWNER, CASE WHEN (TABLE_TYPE IN (3, 15)) THEN A.TABLE_ID ELSE A.DATA_TABLE_ID END AS TABLE_ID, A.TABLE_ID AS INDEX_ID, CASE WHEN TABLE_TYPE IN (3, 15) THEN 'UNIQUE' WHEN A.INDEX_TYPE IN (2, 4, 8) THEN 'UNIQUE' ELSE 'NONUNIQUE' END AS UNIQUENESS, CASE WHEN A.COMPRESS_FUNC_NAME = NULL THEN 'DISABLED' ELSE 'ENABLED' END AS COMPRESSION, CASE WHEN TABLE_TYPE IN (3, 15) THEN 'VALID' WHEN A.INDEX_STATUS = 2 THEN 'VALID' WHEN A.INDEX_STATUS = 3 THEN 'CHECKING' WHEN A.INDEX_STATUS = 4 THEN 'INELEGIBLE' WHEN A.INDEX_STATUS = 5 THEN 'ERROR' ELSE 'UNUSABLE' END AS STATUS, A.INDEX_TYPE AS A_INDEX_TYPE, A.PART_LEVEL AS A_PART_LEVEL, A.TABLE_TYPE AS A_TABLE_TYPE, CASE WHEN 0 = (SELECT COUNT(1) FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT WHERE TABLE_ID = A.TABLE_ID AND IS_HIDDEN = 0 AND TENANT_ID = EFFECTIVE_TENANT_ID()) THEN 'ENABLED' ELSE 'NULL' END AS FUNCIDX_STATUS, CASE WHEN B.IN_RECYCLEBIN = 1 THEN 'YES' ELSE 'NO' END AS DROPPED, CASE WHEN BITAND(A.INDEX_ATTRIBUTES_SET, 1) = 0 THEN 'VISIBLE' ELSE 'INVISIBLE' END AS VISIBILITY, A.TABLESPACE_ID, A.DOP AS DOP_DEGREE FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT A JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT B ON A.DATABASE_ID = B.DATABASE_ID AND TABLE_TYPE IN (5, 3, 15) AND A.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((A.TABLE_MODE / 4096), 15) IN (0,1) AND B.TENANT_ID = EFFECTIVE_TENANT_ID() AND B.DATABASE_NAME != '__recyclebin' AND (A.DATABASE_ID = USERENV('SCHEMAID') OR USER_CAN_ACCESS_OBJ(1, DECODE(TABLE_TYPE, 3, A.TABLE_ID, 5, DATA_TABLE_ID), A.DATABASE_ID) = 1) LEFT JOIN SYS.ALL_VIRTUAL_CONSTRAINT_REAL_AGENT CONS_TAB ON (CONS_TAB.TABLE_ID = A.TABLE_ID) AND CONS_TAB.TENANT_ID = EFFECTIVE_TENANT_ID() WHERE NOT(TABLE_TYPE = 3 AND CONSTRAINT_NAME IS NULL) AND (CONS_TAB.CONSTRAINT_TYPE IS NULL OR CONS_TAB.CONSTRAINT_TYPE = 1) ) C JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT D ON C.TABLE_ID = D.TABLE_ID AND C.TENANT_ID = D.TENANT_ID AND D.TABLE_TYPE != 12 AND D.TABLE_TYPE != 13 AND bitand((D.TABLE_MODE / 4096), 15) IN (0,1) LEFT JOIN SYS.ALL_VIRTUAL_TENANT_TABLESPACE_REAL_AGENT TP ON C.TABLESPACE_ID = TP.TABLESPACE_ID AND TP.TENANT_ID = EFFECTIVE_TENANT_ID() )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(INDEX_OWNER AS VARCHAR2(128)) AS OWNER, CAST(INDEX_NAME AS VARCHAR2(128)) AS INDEX_NAME, CAST(INDEX_TYPE_NAME AS VARCHAR2(27)) AS INDEX_TYPE, CAST(TABLE_OWNER AS VARCHAR2(128)) AS TABLE_OWNER, CAST(TABLE_NAME AS VARCHAR2(128)) AS TABLE_NAME, CAST('TABLE' AS CHAR(5)) AS TABLE_TYPE, CAST(UNIQUENESS AS VARCHAR2(9)) AS UNIQUENESS, CAST(COMPRESSION AS VARCHAR2(13)) AS COMPRESSION, CAST(NULL AS NUMBER) AS PREFIX_LENGTH, CAST(TABLESPACE_NAME AS VARCHAR2(30)) AS TABLESPACE_NAME, CAST(NULL AS NUMBER) AS INI_TRANS, CAST(NULL AS NUMBER) AS MAX_TRANS, CAST(NULL AS NUMBER) AS INITIAL_EXTENT, CAST(NULL AS NUMBER) AS NEXT_EXTENT, CAST(NULL AS NUMBER) AS MIN_EXTENTS, CAST(NULL AS NUMBER) AS MAX_EXTENTS, CAST(NULL AS NUMBER) AS PCT_INCREASE, CAST(NULL AS NUMBER) AS PCT_THRESHOLD, CAST(NULL AS NUMBER) AS INCLUDE_COLUMN, CAST(NULL AS NUMBER) AS FREELISTS, CAST(NULL AS NUMBER) AS FREELIST_GROUPS, CAST(NULL AS NUMBER) AS PCT_FREE, CAST(NULL AS VARCHAR2(3)) AS LOGGING, CAST(NULL AS NUMBER) AS BLEVEL, CAST(NULL AS NUMBER) AS LEAF_BLOCKS, CAST(NULL AS NUMBER) AS DISTINCT_KEYS, CAST(NULL AS NUMBER) AS AVG_LEAF_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS AVG_DATA_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS CLUSTERING_FACTOR, CAST(STATUS AS VARCHAR2(8)) AS STATUS, CAST(NULL AS NUMBER) AS NUM_ROWS, CAST(NULL AS NUMBER) AS SAMPLE_SIZE, CAST(NULL AS DATE) AS LAST_ANALYZED, CAST(DOP_DEGREE AS VARCHAR2(40)) AS DEGREE, CAST(NULL AS VARCHAR2(40)) AS INSTANCES, CAST(CASE WHEN A_PART_LEVEL = 0 THEN 'NO' ELSE 'YES' END AS VARCHAR2(3)) AS PARTITIONED, CAST(NULL AS VARCHAR2(1)) AS TEMPORARY, CAST(NULL AS VARCHAR2(1)) AS "GENERATED", CAST(NULL AS VARCHAR2(1)) AS SECONDARY, CAST(NULL AS VARCHAR2(7)) AS BUFFER_POOL, CAST(NULL AS VARCHAR2(7)) AS FLASH_CACHE, CAST(NULL AS VARCHAR2(7)) AS CELL_FLASH_CACHE, CAST(NULL AS VARCHAR2(3)) AS USER_STATS, CAST(NULL AS VARCHAR2(15)) AS DURATION, CAST(NULL AS NUMBER) AS PCT_DIRECT_ACCESS, CAST(NULL AS VARCHAR2(128)) AS ITYP_OWNER, CAST(NULL AS VARCHAR2(128)) AS ITYP_NAME, CAST(NULL AS VARCHAR2(1000)) AS PARAMETERS, CAST(NULL AS VARCHAR2(3)) AS GLOBAL_STATS, CAST(NULL AS VARCHAR2(12)) AS DOMIDX_STATUS, CAST(NULL AS VARCHAR2(6)) AS DOMIDX_OPSTATUS, CAST(FUNCIDX_STATUS AS VARCHAR2(8)) AS FUNCIDX_STATUS, CAST('NO' AS VARCHAR2(3)) AS JOIN_INDEX, CAST(NULL AS VARCHAR2(3)) AS IOT_REDUNDANT_PKEY_ELIM, CAST(DROPPED AS VARCHAR2(3)) AS DROPPED, CAST(VISIBILITY AS VARCHAR2(9)) AS VISIBILITY, CAST(NULL AS VARCHAR2(14)) AS DOMIDX_MANAGEMENT, CAST(NULL AS VARCHAR2(3)) AS SEGMENT_CREATED, CAST(NULL AS VARCHAR2(3)) AS ORPHANED_ENTRIES, CAST(NULL AS VARCHAR2(7)) AS INDEXING, CAST(NULL AS VARCHAR2(3)) AS AUTO FROM (SELECT A.TENANT_ID, DATABASE_NAME AS INDEX_OWNER, CASE WHEN (TABLE_TYPE = 5 AND B.DATABASE_NAME != '__recyclebin') THEN SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) WHEN (TABLE_TYPE = 5 AND B.DATABASE_NAME = '__recyclebin') THEN TABLE_NAME ELSE (CONS_TAB.CONSTRAINT_NAME) END AS INDEX_NAME, CASE WHEN A.TABLE_TYPE = 5 AND A.INDEX_TYPE IN (15, 18, 21) THEN 'DOMAIN' WHEN A.TABLE_TYPE = 5 AND EXISTS ( SELECT 1 FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT T_COL_INDEX, SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT T_COL_BASE WHERE T_COL_BASE.TABLE_ID = A.DATA_TABLE_ID AND T_COL_BASE.COLUMN_NAME = T_COL_INDEX.COLUMN_NAME AND T_COL_INDEX.TABLE_ID = A.TABLE_ID AND T_COL_BASE.TENANT_ID = A.TENANT_ID AND T_COL_INDEX.TENANT_ID = A.TENANT_ID AND BITAND(T_COL_BASE.COLUMN_FLAGS,3) > 0 AND T_COL_INDEX.INDEX_POSITION != 0 ) THEN 'FUNCTION-BASED NORMAL' ELSE 'NORMAL' END AS INDEX_TYPE_NAME, DATABASE_NAME AS TABLE_OWNER, CASE WHEN (TABLE_TYPE IN (3, 15)) THEN A.TABLE_ID ELSE A.DATA_TABLE_ID END AS TABLE_ID, A.TABLE_ID AS INDEX_ID, CASE WHEN TABLE_TYPE IN (3, 15) THEN 'UNIQUE' WHEN A.INDEX_TYPE IN (2, 4, 8) THEN 'UNIQUE' ELSE 'NONUNIQUE' END AS UNIQUENESS, CASE WHEN A.COMPRESS_FUNC_NAME = NULL THEN 'DISABLED' ELSE 'ENABLED' END AS COMPRESSION, CASE WHEN TABLE_TYPE IN (3, 15) THEN 'VALID' WHEN A.INDEX_STATUS = 2 THEN 'VALID' WHEN A.INDEX_STATUS = 3 THEN 'CHECKING' WHEN A.INDEX_STATUS = 4 THEN 'INELEGIBLE' WHEN A.INDEX_STATUS = 5 THEN 'ERROR' ELSE 'UNUSABLE' END AS STATUS, A.INDEX_TYPE AS A_INDEX_TYPE, A.PART_LEVEL AS A_PART_LEVEL, A.TABLE_TYPE AS A_TABLE_TYPE, CASE WHEN 0 = (SELECT COUNT(1) FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT WHERE TABLE_ID = A.TABLE_ID AND IS_HIDDEN = 0 AND TENANT_ID = EFFECTIVE_TENANT_ID()) THEN 'ENABLED' ELSE 'NULL' END AS FUNCIDX_STATUS, CASE WHEN B.IN_RECYCLEBIN = 1 THEN 'YES' ELSE 'NO' END AS DROPPED, CASE WHEN BITAND(A.INDEX_ATTRIBUTES_SET, 1) = 0 THEN 'VISIBLE' ELSE 'INVISIBLE' END AS VISIBILITY, A.TABLESPACE_ID, A.DOP AS DOP_DEGREE FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT A JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT B ON A.DATABASE_ID = B.DATABASE_ID AND TABLE_TYPE IN (5, 3, 15) AND A.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND A.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((A.TABLE_MODE / 4096), 15) IN (0,1) AND B.TENANT_ID = EFFECTIVE_TENANT_ID() AND B.DATABASE_NAME != '__recyclebin' AND (A.DATABASE_ID = USERENV('SCHEMAID') OR USER_CAN_ACCESS_OBJ(1, DECODE(TABLE_TYPE, 3, A.TABLE_ID, 5, DATA_TABLE_ID), A.DATABASE_ID) = 1) LEFT JOIN SYS.ALL_VIRTUAL_CONSTRAINT_REAL_AGENT CONS_TAB ON (CONS_TAB.TABLE_ID = A.TABLE_ID) AND CONS_TAB.TENANT_ID = EFFECTIVE_TENANT_ID() WHERE NOT(TABLE_TYPE = 3 AND CONSTRAINT_NAME IS NULL) AND (CONS_TAB.CONSTRAINT_TYPE IS NULL OR CONS_TAB.CONSTRAINT_TYPE = 1) ) C JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT D ON C.TABLE_ID = D.TABLE_ID AND C.TENANT_ID = D.TENANT_ID AND D.TABLE_TYPE != 12 AND D.TABLE_TYPE != 13 AND bitand((D.TABLE_MODE / 4096), 15) IN (0,1) LEFT JOIN SYS.ALL_VIRTUAL_TENANT_TABLESPACE_REAL_AGENT TP ON C.TABLESPACE_ID = TP.TABLESPACE_ID AND TP.TENANT_ID = EFFECTIVE_TENANT_ID() )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -1910,7 +1910,7 @@ int ObInnerTableSchema::user_indexes_schema(ObTableSchema &table_schema) table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(INDEX_NAME AS VARCHAR2(128)) AS INDEX_NAME, CAST(INDEX_TYPE_NAME AS VARCHAR2(27)) AS INDEX_TYPE, CAST(TABLE_OWNER AS VARCHAR2(128)) AS TABLE_OWNER, CAST(TABLE_NAME AS VARCHAR2(128)) AS TABLE_NAME, CAST('TABLE' AS CHAR(5)) AS TABLE_TYPE, CAST(UNIQUENESS AS VARCHAR2(9)) AS UNIQUENESS, CAST(COMPRESSION AS VARCHAR2(13)) AS COMPRESSION, CAST(NULL AS NUMBER) AS PREFIX_LENGTH, CAST(TABLESPACE_NAME AS VARCHAR2(30)) AS TABLESPACE_NAME, CAST(NULL AS NUMBER) AS INI_TRANS, CAST(NULL AS NUMBER) AS MAX_TRANS, CAST(NULL AS NUMBER) AS INITIAL_EXTENT, CAST(NULL AS NUMBER) AS NEXT_EXTENT, CAST(NULL AS NUMBER) AS MIN_EXTENTS, CAST(NULL AS NUMBER) AS MAX_EXTENTS, CAST(NULL AS NUMBER) AS PCT_INCREASE, CAST(NULL AS NUMBER) AS PCT_THRESHOLD, CAST(NULL AS NUMBER) AS INCLUDE_COLUMN, CAST(NULL AS NUMBER) AS FREELISTS, CAST(NULL AS NUMBER) AS FREELIST_GROUPS, CAST(NULL AS NUMBER) AS PCT_FREE, CAST(NULL AS VARCHAR2(3)) AS LOGGING, CAST(NULL AS NUMBER) AS BLEVEL, CAST(NULL AS NUMBER) AS LEAF_BLOCKS, CAST(NULL AS NUMBER) AS DISTINCT_KEYS, CAST(NULL AS NUMBER) AS AVG_LEAF_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS AVG_DATA_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS CLUSTERING_FACTOR, CAST(STATUS AS VARCHAR2(8)) AS STATUS, CAST(NULL AS NUMBER) AS NUM_ROWS, CAST(NULL AS NUMBER) AS SAMPLE_SIZE, CAST(NULL AS DATE) AS LAST_ANALYZED, CAST(DOP_DEGREE AS VARCHAR2(40)) AS DEGREE, CAST(NULL AS VARCHAR2(40)) AS INSTANCES, CAST(CASE WHEN A_PART_LEVEL = 0 THEN 'NO' ELSE 'YES' END AS VARCHAR2(3)) AS PARTITIONED, CAST(NULL AS VARCHAR2(1)) AS TEMPORARY, CAST(NULL AS VARCHAR2(1)) AS "GENERATED", CAST(NULL AS VARCHAR2(1)) AS SECONDARY, CAST(NULL AS VARCHAR2(7)) AS BUFFER_POOL, CAST(NULL AS VARCHAR2(7)) AS FLASH_CACHE, CAST(NULL AS VARCHAR2(7)) AS CELL_FLASH_CACHE, CAST(NULL AS VARCHAR2(3)) AS USER_STATS, CAST(NULL AS VARCHAR2(15)) AS DURATION, CAST(NULL AS NUMBER) AS PCT_DIRECT_ACCESS, CAST(NULL AS VARCHAR2(128)) AS ITYP_OWNER, CAST(NULL AS VARCHAR2(128)) AS ITYP_NAME, CAST(NULL AS VARCHAR2(1000)) AS PARAMETERS, CAST(NULL AS VARCHAR2(3)) AS GLOBAL_STATS, CAST(NULL AS VARCHAR2(12)) AS DOMIDX_STATUS, CAST(NULL AS VARCHAR2(6)) AS DOMIDX_OPSTATUS, CAST(FUNCIDX_STATUS AS VARCHAR2(8)) AS FUNCIDX_STATUS, CAST('NO' AS VARCHAR2(3)) AS JOIN_INDEX, CAST(NULL AS VARCHAR2(3)) AS IOT_REDUNDANT_PKEY_ELIM, CAST(DROPPED AS VARCHAR2(3)) AS DROPPED, CAST(VISIBILITY AS VARCHAR2(9)) AS VISIBILITY, CAST(NULL AS VARCHAR2(14)) AS DOMIDX_MANAGEMENT, CAST(NULL AS VARCHAR2(3)) AS SEGMENT_CREATED, CAST(NULL AS VARCHAR2(3)) AS ORPHANED_ENTRIES, CAST(NULL AS VARCHAR2(7)) AS INDEXING, CAST(NULL AS VARCHAR2(3)) AS AUTO FROM (SELECT A.TENANT_ID, DATABASE_NAME AS INDEX_OWNER, CASE WHEN (TABLE_TYPE = 5 AND B.DATABASE_NAME != '__recyclebin') THEN SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) WHEN (TABLE_TYPE = 5 AND B.DATABASE_NAME = '__recyclebin') THEN TABLE_NAME ELSE (CONS_TAB.CONSTRAINT_NAME) END AS INDEX_NAME, CASE WHEN A.TABLE_TYPE = 5 AND EXISTS ( SELECT 1 FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT T_COL_INDEX, SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT T_COL_BASE WHERE T_COL_BASE.TABLE_ID = A.DATA_TABLE_ID AND T_COL_BASE.COLUMN_NAME = T_COL_INDEX.COLUMN_NAME AND T_COL_INDEX.TABLE_ID = A.TABLE_ID AND T_COL_BASE.TENANT_ID = A.TENANT_ID AND T_COL_INDEX.TENANT_ID = A.TENANT_ID AND BITAND(T_COL_BASE.COLUMN_FLAGS,3) > 0 AND T_COL_INDEX.INDEX_POSITION != 0 ) THEN 'FUNCTION-BASED NORMAL' ELSE 'NORMAL' END AS INDEX_TYPE_NAME, DATABASE_NAME AS TABLE_OWNER, CASE WHEN (TABLE_TYPE IN (3, 15)) THEN A.TABLE_ID ELSE A.DATA_TABLE_ID END AS TABLE_ID, A.TABLE_ID AS INDEX_ID, CASE WHEN TABLE_TYPE IN (3, 15) THEN 'UNIQUE' WHEN A.INDEX_TYPE IN (2, 4, 8) THEN 'UNIQUE' ELSE 'NONUNIQUE' END AS UNIQUENESS, CASE WHEN A.COMPRESS_FUNC_NAME = NULL THEN 'DISABLED' ELSE 'ENABLED' END AS COMPRESSION, CASE WHEN TABLE_TYPE IN (3, 15) THEN 'VALID' WHEN A.INDEX_STATUS = 2 THEN 'VALID' WHEN A.INDEX_STATUS = 3 THEN 'CHECKING' WHEN A.INDEX_STATUS = 4 THEN 'INELEGIBLE' WHEN A.INDEX_STATUS = 5 THEN 'ERROR' ELSE 'UNUSABLE' END AS STATUS, A.INDEX_TYPE AS A_INDEX_TYPE, A.PART_LEVEL AS A_PART_LEVEL, A.TABLE_TYPE AS A_TABLE_TYPE, CASE WHEN 0 = (SELECT COUNT(1) FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT WHERE TABLE_ID = A.TABLE_ID AND IS_HIDDEN = 0 AND TENANT_ID = EFFECTIVE_TENANT_ID()) THEN 'ENABLED' ELSE 'NULL' END AS FUNCIDX_STATUS, CASE WHEN B.IN_RECYCLEBIN = 1 THEN 'YES' ELSE 'NO' END AS DROPPED, CASE WHEN BITAND(A.INDEX_ATTRIBUTES_SET, 1) = 0 THEN 'VISIBLE' ELSE 'INVISIBLE' END AS VISIBILITY, A.TABLESPACE_ID, A.DOP AS DOP_DEGREE FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT A JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT B ON A.DATABASE_ID = B.DATABASE_ID AND TABLE_TYPE IN (5, 3, 15) AND A.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((A.TABLE_MODE / 4096), 15) IN (0,1) AND B.TENANT_ID = EFFECTIVE_TENANT_ID() AND A.DATABASE_ID = USERENV('SCHEMAID') AND B.DATABASE_NAME != '__recyclebin' LEFT JOIN SYS.ALL_VIRTUAL_CONSTRAINT_REAL_AGENT CONS_TAB ON (CONS_TAB.TABLE_ID = A.TABLE_ID) AND CONS_TAB.TENANT_ID = EFFECTIVE_TENANT_ID() WHERE NOT(TABLE_TYPE = 3 AND CONSTRAINT_NAME IS NULL) AND (CONS_TAB.CONSTRAINT_TYPE IS NULL OR CONS_TAB.CONSTRAINT_TYPE = 1) ) C JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT D ON C.TABLE_ID = D.TABLE_ID AND C.TENANT_ID = D.TENANT_ID AND D.TABLE_TYPE != 12 AND D.TABLE_TYPE != 13 AND bitand((D.TABLE_MODE / 4096), 15) IN (0,1) LEFT JOIN SYS.ALL_VIRTUAL_TENANT_TABLESPACE_REAL_AGENT TP ON C.TABLESPACE_ID = TP.TABLESPACE_ID AND TP.TENANT_ID = EFFECTIVE_TENANT_ID() )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(INDEX_NAME AS VARCHAR2(128)) AS INDEX_NAME, CAST(INDEX_TYPE_NAME AS VARCHAR2(27)) AS INDEX_TYPE, CAST(TABLE_OWNER AS VARCHAR2(128)) AS TABLE_OWNER, CAST(TABLE_NAME AS VARCHAR2(128)) AS TABLE_NAME, CAST('TABLE' AS CHAR(5)) AS TABLE_TYPE, CAST(UNIQUENESS AS VARCHAR2(9)) AS UNIQUENESS, CAST(COMPRESSION AS VARCHAR2(13)) AS COMPRESSION, CAST(NULL AS NUMBER) AS PREFIX_LENGTH, CAST(TABLESPACE_NAME AS VARCHAR2(30)) AS TABLESPACE_NAME, CAST(NULL AS NUMBER) AS INI_TRANS, CAST(NULL AS NUMBER) AS MAX_TRANS, CAST(NULL AS NUMBER) AS INITIAL_EXTENT, CAST(NULL AS NUMBER) AS NEXT_EXTENT, CAST(NULL AS NUMBER) AS MIN_EXTENTS, CAST(NULL AS NUMBER) AS MAX_EXTENTS, CAST(NULL AS NUMBER) AS PCT_INCREASE, CAST(NULL AS NUMBER) AS PCT_THRESHOLD, CAST(NULL AS NUMBER) AS INCLUDE_COLUMN, CAST(NULL AS NUMBER) AS FREELISTS, CAST(NULL AS NUMBER) AS FREELIST_GROUPS, CAST(NULL AS NUMBER) AS PCT_FREE, CAST(NULL AS VARCHAR2(3)) AS LOGGING, CAST(NULL AS NUMBER) AS BLEVEL, CAST(NULL AS NUMBER) AS LEAF_BLOCKS, CAST(NULL AS NUMBER) AS DISTINCT_KEYS, CAST(NULL AS NUMBER) AS AVG_LEAF_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS AVG_DATA_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS CLUSTERING_FACTOR, CAST(STATUS AS VARCHAR2(8)) AS STATUS, CAST(NULL AS NUMBER) AS NUM_ROWS, CAST(NULL AS NUMBER) AS SAMPLE_SIZE, CAST(NULL AS DATE) AS LAST_ANALYZED, CAST(DOP_DEGREE AS VARCHAR2(40)) AS DEGREE, CAST(NULL AS VARCHAR2(40)) AS INSTANCES, CAST(CASE WHEN A_PART_LEVEL = 0 THEN 'NO' ELSE 'YES' END AS VARCHAR2(3)) AS PARTITIONED, CAST(NULL AS VARCHAR2(1)) AS TEMPORARY, CAST(NULL AS VARCHAR2(1)) AS "GENERATED", CAST(NULL AS VARCHAR2(1)) AS SECONDARY, CAST(NULL AS VARCHAR2(7)) AS BUFFER_POOL, CAST(NULL AS VARCHAR2(7)) AS FLASH_CACHE, CAST(NULL AS VARCHAR2(7)) AS CELL_FLASH_CACHE, CAST(NULL AS VARCHAR2(3)) AS USER_STATS, CAST(NULL AS VARCHAR2(15)) AS DURATION, CAST(NULL AS NUMBER) AS PCT_DIRECT_ACCESS, CAST(NULL AS VARCHAR2(128)) AS ITYP_OWNER, CAST(NULL AS VARCHAR2(128)) AS ITYP_NAME, CAST(NULL AS VARCHAR2(1000)) AS PARAMETERS, CAST(NULL AS VARCHAR2(3)) AS GLOBAL_STATS, CAST(NULL AS VARCHAR2(12)) AS DOMIDX_STATUS, CAST(NULL AS VARCHAR2(6)) AS DOMIDX_OPSTATUS, CAST(FUNCIDX_STATUS AS VARCHAR2(8)) AS FUNCIDX_STATUS, CAST('NO' AS VARCHAR2(3)) AS JOIN_INDEX, CAST(NULL AS VARCHAR2(3)) AS IOT_REDUNDANT_PKEY_ELIM, CAST(DROPPED AS VARCHAR2(3)) AS DROPPED, CAST(VISIBILITY AS VARCHAR2(9)) AS VISIBILITY, CAST(NULL AS VARCHAR2(14)) AS DOMIDX_MANAGEMENT, CAST(NULL AS VARCHAR2(3)) AS SEGMENT_CREATED, CAST(NULL AS VARCHAR2(3)) AS ORPHANED_ENTRIES, CAST(NULL AS VARCHAR2(7)) AS INDEXING, CAST(NULL AS VARCHAR2(3)) AS AUTO FROM (SELECT A.TENANT_ID, DATABASE_NAME AS INDEX_OWNER, CASE WHEN (TABLE_TYPE = 5 AND B.DATABASE_NAME != '__recyclebin') THEN SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) WHEN (TABLE_TYPE = 5 AND B.DATABASE_NAME = '__recyclebin') THEN TABLE_NAME ELSE (CONS_TAB.CONSTRAINT_NAME) END AS INDEX_NAME, CASE WHEN A.TABLE_TYPE = 5 AND A.INDEX_TYPE IN (15, 18, 21) THEN 'DOMAIN' WHEN A.TABLE_TYPE = 5 AND EXISTS ( SELECT 1 FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT T_COL_INDEX, SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT T_COL_BASE WHERE T_COL_BASE.TABLE_ID = A.DATA_TABLE_ID AND T_COL_BASE.COLUMN_NAME = T_COL_INDEX.COLUMN_NAME AND T_COL_INDEX.TABLE_ID = A.TABLE_ID AND T_COL_BASE.TENANT_ID = A.TENANT_ID AND T_COL_INDEX.TENANT_ID = A.TENANT_ID AND BITAND(T_COL_BASE.COLUMN_FLAGS,3) > 0 AND T_COL_INDEX.INDEX_POSITION != 0 ) THEN 'FUNCTION-BASED NORMAL' ELSE 'NORMAL' END AS INDEX_TYPE_NAME, DATABASE_NAME AS TABLE_OWNER, CASE WHEN (TABLE_TYPE IN (3, 15)) THEN A.TABLE_ID ELSE A.DATA_TABLE_ID END AS TABLE_ID, A.TABLE_ID AS INDEX_ID, CASE WHEN TABLE_TYPE IN (3, 15) THEN 'UNIQUE' WHEN A.INDEX_TYPE IN (2, 4, 8) THEN 'UNIQUE' ELSE 'NONUNIQUE' END AS UNIQUENESS, CASE WHEN A.COMPRESS_FUNC_NAME = NULL THEN 'DISABLED' ELSE 'ENABLED' END AS COMPRESSION, CASE WHEN TABLE_TYPE IN (3, 15) THEN 'VALID' WHEN A.INDEX_STATUS = 2 THEN 'VALID' WHEN A.INDEX_STATUS = 3 THEN 'CHECKING' WHEN A.INDEX_STATUS = 4 THEN 'INELEGIBLE' WHEN A.INDEX_STATUS = 5 THEN 'ERROR' ELSE 'UNUSABLE' END AS STATUS, A.INDEX_TYPE AS A_INDEX_TYPE, A.PART_LEVEL AS A_PART_LEVEL, A.TABLE_TYPE AS A_TABLE_TYPE, CASE WHEN 0 = (SELECT COUNT(1) FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT WHERE TABLE_ID = A.TABLE_ID AND IS_HIDDEN = 0 AND TENANT_ID = EFFECTIVE_TENANT_ID()) THEN 'ENABLED' ELSE 'NULL' END AS FUNCIDX_STATUS, CASE WHEN B.IN_RECYCLEBIN = 1 THEN 'YES' ELSE 'NO' END AS DROPPED, CASE WHEN BITAND(A.INDEX_ATTRIBUTES_SET, 1) = 0 THEN 'VISIBLE' ELSE 'INVISIBLE' END AS VISIBILITY, A.TABLESPACE_ID, A.DOP AS DOP_DEGREE FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT A JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT B ON A.DATABASE_ID = B.DATABASE_ID AND TABLE_TYPE IN (5, 3, 15) AND A.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND A.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((A.TABLE_MODE / 4096), 15) IN (0,1) AND B.TENANT_ID = EFFECTIVE_TENANT_ID() AND A.DATABASE_ID = USERENV('SCHEMAID') AND B.DATABASE_NAME != '__recyclebin' LEFT JOIN SYS.ALL_VIRTUAL_CONSTRAINT_REAL_AGENT CONS_TAB ON (CONS_TAB.TABLE_ID = A.TABLE_ID) AND CONS_TAB.TENANT_ID = EFFECTIVE_TENANT_ID() WHERE NOT(TABLE_TYPE = 3 AND CONSTRAINT_NAME IS NULL) AND (CONS_TAB.CONSTRAINT_TYPE IS NULL OR CONS_TAB.CONSTRAINT_TYPE = 1) ) C JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT D ON C.TABLE_ID = D.TABLE_ID AND C.TENANT_ID = D.TENANT_ID AND D.TABLE_TYPE != 12 AND D.TABLE_TYPE != 13 AND bitand((D.TABLE_MODE / 4096), 15) IN (0,1) LEFT JOIN SYS.ALL_VIRTUAL_TENANT_TABLESPACE_REAL_AGENT TP ON C.TABLESPACE_ID = TP.TABLESPACE_ID AND TP.TENANT_ID = EFFECTIVE_TENANT_ID() )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } diff --git a/src/share/inner_table/ob_inner_table_schema.25051_25100.cpp b/src/share/inner_table/ob_inner_table_schema.25051_25100.cpp index 1e9fc2dc77..10b6c8f78e 100644 --- a/src/share/inner_table/ob_inner_table_schema.25051_25100.cpp +++ b/src/share/inner_table/ob_inner_table_schema.25051_25100.cpp @@ -660,7 +660,7 @@ int ObInnerTableSchema::dba_part_key_columns_ora_schema(ObTableSchema &table_sch table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(D.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(T.TABLE_NAME AS VARCHAR2(128)) AS NAME, CAST('TABLE' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 255) AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND C.TABLE_ID = T.TABLE_ID AND T.DATABASE_ID = D.DATABASE_ID AND BITAND(C.PARTITION_KEY_POSITION, 255) > 0 AND T.TABLE_TYPE IN (3, 8, 9) AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() UNION SELECT CAST(D.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 255) AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 AND BITAND(C.PARTITION_KEY_POSITION, 255) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() UNION SELECT CAST(D.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 255) AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE IN (1,2,10) AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND BITAND(C.PARTITION_KEY_POSITION, 255) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(D.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(T.TABLE_NAME AS VARCHAR2(128)) AS NAME, CAST('TABLE' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 255) AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND C.TABLE_ID = T.TABLE_ID AND T.DATABASE_ID = D.DATABASE_ID AND BITAND(C.PARTITION_KEY_POSITION, 255) > 0 AND T.TABLE_TYPE IN (3, 8, 9) AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() UNION SELECT CAST(D.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 255) AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (17,19,20,22) AND BITAND(C.PARTITION_KEY_POSITION, 255) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() UNION SELECT CAST(D.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 255) AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE IN (1,2,10,15,23,24) AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND BITAND(C.PARTITION_KEY_POSITION, 255) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -710,7 +710,7 @@ int ObInnerTableSchema::all_part_key_columns_schema(ObTableSchema &table_schema) table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(D.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(T.TABLE_NAME AS VARCHAR2(128)) AS NAME, CAST('TABLE' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 255) AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND C.TABLE_ID = T.TABLE_ID AND T.DATABASE_ID = D.DATABASE_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND BITAND(C.PARTITION_KEY_POSITION, 255) > 0 AND T.TABLE_TYPE IN (3, 8, 9) AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND (T.DATABASE_ID = USERENV('SCHEMAID') OR USER_CAN_ACCESS_OBJ(1, T.TABLE_ID, T.DATABASE_ID) = 1) UNION SELECT CAST(D.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 255) AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 AND BITAND(C.PARTITION_KEY_POSITION, 255) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND (T.DATABASE_ID = USERENV('SCHEMAID') OR USER_CAN_ACCESS_OBJ(1, T.DATA_TABLE_ID, T.DATABASE_ID) = 1) UNION SELECT CAST(D.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 255) AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE IN (1,2,10) AND BITAND(C.PARTITION_KEY_POSITION, 255) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND (T.DATABASE_ID = USERENV('SCHEMAID') OR USER_CAN_ACCESS_OBJ(1, T.DATA_TABLE_ID, T.DATABASE_ID) = 1) )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(D.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(T.TABLE_NAME AS VARCHAR2(128)) AS NAME, CAST('TABLE' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 255) AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND C.TABLE_ID = T.TABLE_ID AND T.DATABASE_ID = D.DATABASE_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND BITAND(C.PARTITION_KEY_POSITION, 255) > 0 AND T.TABLE_TYPE IN (3, 8, 9) AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND (T.DATABASE_ID = USERENV('SCHEMAID') OR USER_CAN_ACCESS_OBJ(1, T.TABLE_ID, T.DATABASE_ID) = 1) UNION SELECT CAST(D.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 255) AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (17,19,20,22) AND BITAND(C.PARTITION_KEY_POSITION, 255) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND (T.DATABASE_ID = USERENV('SCHEMAID') OR USER_CAN_ACCESS_OBJ(1, T.DATA_TABLE_ID, T.DATABASE_ID) = 1) UNION SELECT CAST(D.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 255) AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE IN (1,2,10,15,23,24) AND BITAND(C.PARTITION_KEY_POSITION, 255) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND (T.DATABASE_ID = USERENV('SCHEMAID') OR USER_CAN_ACCESS_OBJ(1, T.DATA_TABLE_ID, T.DATABASE_ID) = 1) )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -760,7 +760,7 @@ int ObInnerTableSchema::user_part_key_columns_schema(ObTableSchema &table_schema table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(T.TABLE_NAME AS VARCHAR2(128)) AS NAME, CAST('TABLE' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 255) AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND C.TABLE_ID = T.TABLE_ID AND T.DATABASE_ID = D.DATABASE_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND BITAND(C.PARTITION_KEY_POSITION, 255) > 0 AND T.TABLE_TYPE IN (3, 8, 9) AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.DATABASE_ID = USERENV('SCHEMAID') UNION SELECT CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 255) AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 AND BITAND(C.PARTITION_KEY_POSITION, 255) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.DATABASE_ID = USERENV('SCHEMAID') AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) UNION SELECT CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 255) AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE IN (1,2,10) AND BITAND(C.PARTITION_KEY_POSITION, 255) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.DATABASE_ID = USERENV('SCHEMAID') AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(T.TABLE_NAME AS VARCHAR2(128)) AS NAME, CAST('TABLE' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 255) AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND C.TABLE_ID = T.TABLE_ID AND T.DATABASE_ID = D.DATABASE_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND BITAND(C.PARTITION_KEY_POSITION, 255) > 0 AND T.TABLE_TYPE IN (3, 8, 9) AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.DATABASE_ID = USERENV('SCHEMAID') UNION SELECT CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 255) AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (17,19,20,22) AND BITAND(C.PARTITION_KEY_POSITION, 255) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.DATABASE_ID = USERENV('SCHEMAID') AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) UNION SELECT CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 255) AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE IN (1,2,10,15,23,24) AND BITAND(C.PARTITION_KEY_POSITION, 255) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.DATABASE_ID = USERENV('SCHEMAID') AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -810,7 +810,7 @@ int ObInnerTableSchema::dba_subpart_key_columns_ora_schema(ObTableSchema &table_ table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(D.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(T.TABLE_NAME AS VARCHAR2(128)) AS NAME, CAST('TABLE' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 65280)/256 AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND C.TABLE_ID = T.TABLE_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND T.DATABASE_ID = D.DATABASE_ID AND BITAND(C.PARTITION_KEY_POSITION, 65280) > 0 AND T.TABLE_TYPE IN (3, 8, 9) AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() UNION SELECT CAST(D.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 65280)/256 AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 AND BITAND(C.PARTITION_KEY_POSITION, 65280) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) UNION SELECT CAST(D.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 65280)/256 AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE IN (1,2,10) AND BITAND(C.PARTITION_KEY_POSITION, 65280) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(D.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(T.TABLE_NAME AS VARCHAR2(128)) AS NAME, CAST('TABLE' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 65280)/256 AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND C.TABLE_ID = T.TABLE_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND T.DATABASE_ID = D.DATABASE_ID AND BITAND(C.PARTITION_KEY_POSITION, 65280) > 0 AND T.TABLE_TYPE IN (3, 8, 9) AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() UNION SELECT CAST(D.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 65280)/256 AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (17,19,20,22) AND BITAND(C.PARTITION_KEY_POSITION, 65280) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) UNION SELECT CAST(D.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 65280)/256 AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE IN (1,2,10,15,23,24) AND BITAND(C.PARTITION_KEY_POSITION, 65280) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -860,7 +860,7 @@ int ObInnerTableSchema::all_subpart_key_columns_schema(ObTableSchema &table_sche table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(D.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(T.TABLE_NAME AS VARCHAR2(128)) AS NAME, CAST('TABLE' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 65280)/256 AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND C.TABLE_ID = T.TABLE_ID AND T.DATABASE_ID = D.DATABASE_ID AND BITAND(C.PARTITION_KEY_POSITION, 65280) > 0 AND T.TABLE_TYPE IN (3, 8, 9) AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND (T.DATABASE_ID = USERENV('SCHEMAID') OR USER_CAN_ACCESS_OBJ(1, T.TABLE_ID, T.DATABASE_ID) = 1) UNION SELECT CAST(D.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 65280)/256 AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND BITAND(C.PARTITION_KEY_POSITION, 65280) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND (T.DATABASE_ID = USERENV('SCHEMAID') OR USER_CAN_ACCESS_OBJ(1, T.DATA_TABLE_ID, T.DATABASE_ID) = 1) UNION SELECT CAST(D.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 65280)/256 AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE IN (1,2,10) AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND BITAND(C.PARTITION_KEY_POSITION, 65280) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND (T.DATABASE_ID = USERENV('SCHEMAID') OR USER_CAN_ACCESS_OBJ(1, T.DATA_TABLE_ID, T.DATABASE_ID) = 1) )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(D.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(T.TABLE_NAME AS VARCHAR2(128)) AS NAME, CAST('TABLE' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 65280)/256 AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND C.TABLE_ID = T.TABLE_ID AND T.DATABASE_ID = D.DATABASE_ID AND BITAND(C.PARTITION_KEY_POSITION, 65280) > 0 AND T.TABLE_TYPE IN (3, 8, 9) AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND (T.DATABASE_ID = USERENV('SCHEMAID') OR USER_CAN_ACCESS_OBJ(1, T.TABLE_ID, T.DATABASE_ID) = 1) UNION SELECT CAST(D.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 65280)/256 AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (17,19,20,22) AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND BITAND(C.PARTITION_KEY_POSITION, 65280) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND (T.DATABASE_ID = USERENV('SCHEMAID') OR USER_CAN_ACCESS_OBJ(1, T.DATA_TABLE_ID, T.DATABASE_ID) = 1) UNION SELECT CAST(D.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 65280)/256 AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE IN (1,2,10,15,23,24) AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND BITAND(C.PARTITION_KEY_POSITION, 65280) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND (T.DATABASE_ID = USERENV('SCHEMAID') OR USER_CAN_ACCESS_OBJ(1, T.DATA_TABLE_ID, T.DATABASE_ID) = 1) )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -910,7 +910,7 @@ int ObInnerTableSchema::user_subpart_key_columns_schema(ObTableSchema &table_sch table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(T.TABLE_NAME AS VARCHAR2(128)) AS NAME, CAST('TABLE' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 65280)/256 AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND C.TABLE_ID = T.TABLE_ID AND T.DATABASE_ID = D.DATABASE_ID AND BITAND(C.PARTITION_KEY_POSITION, 65280) > 0 AND T.TABLE_TYPE IN (3, 8, 9) AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.DATABASE_ID = USERENV('SCHEMAID') UNION SELECT CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 65280)/256 AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND BITAND(C.PARTITION_KEY_POSITION, 65280) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.DATABASE_ID = USERENV('SCHEMAID') UNION SELECT CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 65280)/256 AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE IN (1,2,10) AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND BITAND(C.PARTITION_KEY_POSITION, 65280) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.DATABASE_ID = USERENV('SCHEMAID') )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(T.TABLE_NAME AS VARCHAR2(128)) AS NAME, CAST('TABLE' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 65280)/256 AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND C.TABLE_ID = T.TABLE_ID AND T.DATABASE_ID = D.DATABASE_ID AND BITAND(C.PARTITION_KEY_POSITION, 65280) > 0 AND T.TABLE_TYPE IN (3, 8, 9) AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.DATABASE_ID = USERENV('SCHEMAID') UNION SELECT CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 65280)/256 AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (17,19,20,22) AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND BITAND(C.PARTITION_KEY_POSITION, 65280) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.DATABASE_ID = USERENV('SCHEMAID') UNION SELECT CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN T.TABLE_NAME ELSE SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS NAME, CAST('INDEX' AS CHAR(5)) AS OBJECT_TYPE, CAST(C.COLUMN_NAME AS VARCHAR2(4000)) AS COLUMN_NAME, CAST(BITAND(C.PARTITION_KEY_POSITION, 65280)/256 AS NUMBER) AS COLUMN_POSITION, CAST(NULL AS NUMBER) AS COLLATED_COLUMN_ID FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C, SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T, SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D WHERE C.TENANT_ID = T.TENANT_ID AND T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 AND T.INDEX_TYPE IN (1,2,10,15,23,24) AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND BITAND(C.PARTITION_KEY_POSITION, 65280) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND D.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.DATABASE_ID = USERENV('SCHEMAID') )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -1710,7 +1710,7 @@ int ObInnerTableSchema::dba_part_indexes_ora_schema(ObTableSchema &table_schema) table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(I_T.OWNER AS VARCHAR2(128)) AS OWNER, CAST(I_T.INDEX_NAME AS VARCHAR2(128)) AS INDEX_NAME, CAST(I_T.TABLE_NAME AS VARCHAR2(128)) AS TABLE_NAME, CAST(CASE I_T.PART_FUNC_TYPE WHEN 0 THEN 'HASH' WHEN 1 THEN 'HASH' WHEN 2 THEN 'HASH' WHEN 3 THEN 'RANGE' WHEN 4 THEN 'RANGE' WHEN 5 THEN 'LIST' WHEN 6 THEN 'LIST' WHEN 7 THEN 'RANGE' END AS VARCHAR2(9)) AS PARTITIONING_TYPE, CAST(CASE WHEN I_T.PART_LEVEL < 2 THEN 'NONE' ELSE (CASE I_T.SUB_PART_FUNC_TYPE WHEN 0 THEN 'HASH' WHEN 1 THEN 'KEY' WHEN 2 THEN 'KEY' WHEN 3 THEN 'RANGE' WHEN 4 THEN 'RANGE' WHEN 5 THEN 'LIST' WHEN 6 THEN 'LIST' WHEN 7 THEN 'RANGE' END) END AS VARCHAR2(9)) AS SUBPARTITIONING_TYPE, CAST(I_T.PART_NUM AS NUMBER) AS PARTITION_COUNT, CAST(CASE WHEN (I_T.PART_LEVEL < 2 OR I_T.SUB_PART_TEMPLATE_FLAGS = 0) THEN 0 ELSE I_T.SUB_PART_NUM END AS NUMBER) AS DEF_SUBPARTITION_COUNT, CAST(PKC.PARTITIONING_KEY_COUNT AS NUMBER) AS PARTITIONING_KEY_COUNT, CAST(PKC.SUBPARTITIONING_KEY_COUNT AS NUMBER) AS SUBPARTITIONING_KEY_COUNT, CAST(CASE I_T.IS_LOCAL WHEN 1 THEN 'LOCAL' ELSE 'GLOBAL' END AS VARCHAR2(6)) AS LOCALITY, CAST(CASE WHEN I_T.IS_LOCAL = 0 THEN 'PREFIXED' WHEN (I_T.IS_LOCAL = 1 AND LOCAL_PARTITIONED_PREFIX_INDEX.IS_PREFIXED = 1) THEN 'PREFIXED' ELSE 'NON_PREFIXED' END AS VARCHAR2(12)) AS ALIGNMENT, CAST(TP.TABLESPACE_NAME AS VARCHAR2(30)) AS DEF_TABLESPACE_NAME, CAST(0 AS NUMBER) AS DEF_PCT_FREE, CAST(0 AS NUMBER) AS DEF_INI_TRANS, CAST(0 AS NUMBER) AS DEF_MAX_TRANS, CAST(NULL AS VARCHAR2(40)) AS DEF_INITIAL_EXTENT, CAST(NULL AS VARCHAR2(40)) AS DEF_NEXT_EXTENT, CAST(NULL AS VARCHAR2(40)) AS DEF_MIN_EXTENTS, CAST(NULL AS VARCHAR2(40)) AS DEF_MAX_EXTENTS, CAST(NULL AS VARCHAR2(40)) AS DEF_MAX_SIZE, CAST(NULL AS VARCHAR2(40)) AS DEF_PCT_INCREASE, CAST(0 AS NUMBER) AS DEF_FREELISTS, CAST(0 AS NUMBER) AS DEF_FREELIST_GROUPS, CAST(NULL AS VARCHAR2(7)) AS DEF_LOGGING, CAST(NULL AS VARCHAR2(7)) AS DEF_BUFFER_POOL, CAST(NULL AS VARCHAR2(7)) AS DEF_FLASH_CACHE, CAST(NULL AS VARCHAR2(7)) AS DEF_CELL_FLASH_CACHE, CAST(NULL AS VARCHAR2(1000)) AS DEF_PARAMETERS, CAST('NO' AS VARCHAR2(1000)) AS "INTERVAL", CAST('NO' AS VARCHAR2(3)) AS AUTOLIST, CAST(NULL AS VARCHAR2(1000)) AS INTERVAL_SUBPARTITION, CAST(NULL AS VARCHAR2(1000)) AS AUTOLIST_SUBPARTITION FROM (SELECT D.TENANT_ID, D.DATABASE_NAME AS OWNER, I.TABLE_ID AS INDEX_ID, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN I.TABLE_NAME ELSE SUBSTR(I.TABLE_NAME, 7 + INSTR(SUBSTR(I.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS INDEX_NAME, I.PART_LEVEL, I.PART_FUNC_TYPE, I.PART_NUM, I.SUB_PART_FUNC_TYPE, T.TABLE_NAME AS TABLE_NAME, T.SUB_PART_NUM, T.SUB_PART_TEMPLATE_FLAGS, T.TABLESPACE_ID, (CASE I.INDEX_TYPE WHEN 1 THEN 1 WHEN 2 THEN 1 WHEN 10 THEN 1 ELSE 0 END) AS IS_LOCAL, (CASE I.INDEX_TYPE WHEN 1 THEN T.TABLE_ID WHEN 2 THEN T.TABLE_ID WHEN 10 THEN T.TABLE_ID ELSE I.TABLE_ID END) AS JOIN_TABLE_ID FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT I JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T ON I.TENANT_ID = T.TENANT_ID AND I.DATA_TABLE_ID = T.TABLE_ID AND bitand((I.TABLE_MODE / 4096), 15) IN (0,1) JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D ON T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID WHERE I.TABLE_TYPE = 5 AND I.PART_LEVEL != 0 AND T.TABLE_TYPE != 12 AND T.TABLE_TYPE != 13 ) I_T JOIN (SELECT TENANT_ID, TABLE_ID, SUM(CASE WHEN BITAND(PARTITION_KEY_POSITION, 255) != 0 THEN 1 ELSE 0 END) AS PARTITIONING_KEY_COUNT, SUM(CASE WHEN BITAND(PARTITION_KEY_POSITION, 65280)/256 != 0 THEN 1 ELSE 0 END) AS SUBPARTITIONING_KEY_COUNT FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT GROUP BY TENANT_ID, TABLE_ID) PKC ON I_T.TENANT_ID = PKC.TENANT_ID AND I_T.JOIN_TABLE_ID = PKC.TABLE_ID LEFT JOIN ( SELECT I.TENANT_ID, I.TABLE_ID AS INDEX_ID, 1 AS IS_PREFIXED FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT I WHERE I.TABLE_TYPE = 5 AND I.INDEX_TYPE IN (1, 2, 10) AND I.PART_LEVEL != 0 AND I.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((I.TABLE_MODE / 4096), 15) IN (0,1) AND NOT EXISTS (SELECT * FROM (SELECT * FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C WHERE C.TABLE_ID = I.DATA_TABLE_ID AND C.PARTITION_KEY_POSITION != 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() ) PART_COLUMNS LEFT JOIN (SELECT * FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C WHERE C.TABLE_ID = I.TABLE_ID AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND C.INDEX_POSITION != 0 ) INDEX_COLUMNS ON PART_COLUMNS.COLUMN_ID = INDEX_COLUMNS.COLUMN_ID WHERE (BITAND(PART_COLUMNS.PARTITION_KEY_POSITION, 255) != 0 AND (INDEX_COLUMNS.INDEX_POSITION IS NULL OR BITAND(PART_COLUMNS.PARTITION_KEY_POSITION, 255) != INDEX_COLUMNS.INDEX_POSITION) ) OR (BITAND(PART_COLUMNS.PARTITION_KEY_POSITION, 65280)/256 != 0 AND (INDEX_COLUMNS.INDEX_POSITION IS NULL) ) ) ) LOCAL_PARTITIONED_PREFIX_INDEX ON I_T.TENANT_ID = LOCAL_PARTITIONED_PREFIX_INDEX.TENANT_ID AND I_T.INDEX_ID = LOCAL_PARTITIONED_PREFIX_INDEX.INDEX_ID LEFT JOIN SYS.ALL_VIRTUAL_TENANT_TABLESPACE_REAL_AGENT TP ON I_T.TENANT_ID = TP.TENANT_ID AND I_T.TABLESPACE_ID = TP.TABLESPACE_ID WHERE I_T.TENANT_ID = EFFECTIVE_TENANT_ID() )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(I_T.OWNER AS VARCHAR2(128)) AS OWNER, CAST(I_T.INDEX_NAME AS VARCHAR2(128)) AS INDEX_NAME, CAST(I_T.TABLE_NAME AS VARCHAR2(128)) AS TABLE_NAME, CAST(CASE I_T.PART_FUNC_TYPE WHEN 0 THEN 'HASH' WHEN 1 THEN 'HASH' WHEN 2 THEN 'HASH' WHEN 3 THEN 'RANGE' WHEN 4 THEN 'RANGE' WHEN 5 THEN 'LIST' WHEN 6 THEN 'LIST' WHEN 7 THEN 'RANGE' END AS VARCHAR2(9)) AS PARTITIONING_TYPE, CAST(CASE WHEN I_T.PART_LEVEL < 2 THEN 'NONE' ELSE (CASE I_T.SUB_PART_FUNC_TYPE WHEN 0 THEN 'HASH' WHEN 1 THEN 'KEY' WHEN 2 THEN 'KEY' WHEN 3 THEN 'RANGE' WHEN 4 THEN 'RANGE' WHEN 5 THEN 'LIST' WHEN 6 THEN 'LIST' WHEN 7 THEN 'RANGE' END) END AS VARCHAR2(9)) AS SUBPARTITIONING_TYPE, CAST(I_T.PART_NUM AS NUMBER) AS PARTITION_COUNT, CAST(CASE WHEN (I_T.PART_LEVEL < 2 OR I_T.SUB_PART_TEMPLATE_FLAGS = 0) THEN 0 ELSE I_T.SUB_PART_NUM END AS NUMBER) AS DEF_SUBPARTITION_COUNT, CAST(PKC.PARTITIONING_KEY_COUNT AS NUMBER) AS PARTITIONING_KEY_COUNT, CAST(PKC.SUBPARTITIONING_KEY_COUNT AS NUMBER) AS SUBPARTITIONING_KEY_COUNT, CAST(CASE I_T.IS_LOCAL WHEN 1 THEN 'LOCAL' ELSE 'GLOBAL' END AS VARCHAR2(6)) AS LOCALITY, CAST(CASE WHEN I_T.IS_LOCAL = 0 THEN 'PREFIXED' WHEN (I_T.IS_LOCAL = 1 AND LOCAL_PARTITIONED_PREFIX_INDEX.IS_PREFIXED = 1) THEN 'PREFIXED' ELSE 'NON_PREFIXED' END AS VARCHAR2(12)) AS ALIGNMENT, CAST(TP.TABLESPACE_NAME AS VARCHAR2(30)) AS DEF_TABLESPACE_NAME, CAST(0 AS NUMBER) AS DEF_PCT_FREE, CAST(0 AS NUMBER) AS DEF_INI_TRANS, CAST(0 AS NUMBER) AS DEF_MAX_TRANS, CAST(NULL AS VARCHAR2(40)) AS DEF_INITIAL_EXTENT, CAST(NULL AS VARCHAR2(40)) AS DEF_NEXT_EXTENT, CAST(NULL AS VARCHAR2(40)) AS DEF_MIN_EXTENTS, CAST(NULL AS VARCHAR2(40)) AS DEF_MAX_EXTENTS, CAST(NULL AS VARCHAR2(40)) AS DEF_MAX_SIZE, CAST(NULL AS VARCHAR2(40)) AS DEF_PCT_INCREASE, CAST(0 AS NUMBER) AS DEF_FREELISTS, CAST(0 AS NUMBER) AS DEF_FREELIST_GROUPS, CAST(NULL AS VARCHAR2(7)) AS DEF_LOGGING, CAST(NULL AS VARCHAR2(7)) AS DEF_BUFFER_POOL, CAST(NULL AS VARCHAR2(7)) AS DEF_FLASH_CACHE, CAST(NULL AS VARCHAR2(7)) AS DEF_CELL_FLASH_CACHE, CAST(NULL AS VARCHAR2(1000)) AS DEF_PARAMETERS, CAST('NO' AS VARCHAR2(1000)) AS "INTERVAL", CAST('NO' AS VARCHAR2(3)) AS AUTOLIST, CAST(NULL AS VARCHAR2(1000)) AS INTERVAL_SUBPARTITION, CAST(NULL AS VARCHAR2(1000)) AS AUTOLIST_SUBPARTITION FROM (SELECT D.TENANT_ID, D.DATABASE_NAME AS OWNER, I.TABLE_ID AS INDEX_ID, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN I.TABLE_NAME ELSE SUBSTR(I.TABLE_NAME, 7 + INSTR(SUBSTR(I.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS INDEX_NAME, I.PART_LEVEL, I.PART_FUNC_TYPE, I.PART_NUM, I.SUB_PART_FUNC_TYPE, T.TABLE_NAME AS TABLE_NAME, T.SUB_PART_NUM, T.SUB_PART_TEMPLATE_FLAGS, T.TABLESPACE_ID, (CASE I.INDEX_TYPE WHEN 1 THEN 1 WHEN 2 THEN 1 WHEN 10 THEN 1 WHEN 15 THEN 1 WHEN 23 THEN 1 WHEN 24 THEN 1 ELSE 0 END) AS IS_LOCAL, (CASE I.INDEX_TYPE WHEN 1 THEN T.TABLE_ID WHEN 2 THEN T.TABLE_ID WHEN 10 THEN T.TABLE_ID WHEN 15 THEN T.TABLE_ID WHEN 23 THEN T.TABLE_ID WHEN 24 THEN T.TABLE_ID ELSE I.TABLE_ID END) AS JOIN_TABLE_ID FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT I JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T ON I.TENANT_ID = T.TENANT_ID AND I.DATA_TABLE_ID = T.TABLE_ID AND bitand((I.TABLE_MODE / 4096), 15) IN (0,1) JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D ON T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID WHERE I.TABLE_TYPE = 5 AND I.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND I.PART_LEVEL != 0 AND T.TABLE_TYPE != 12 AND T.TABLE_TYPE != 13 ) I_T JOIN (SELECT TENANT_ID, TABLE_ID, SUM(CASE WHEN BITAND(PARTITION_KEY_POSITION, 255) != 0 THEN 1 ELSE 0 END) AS PARTITIONING_KEY_COUNT, SUM(CASE WHEN BITAND(PARTITION_KEY_POSITION, 65280)/256 != 0 THEN 1 ELSE 0 END) AS SUBPARTITIONING_KEY_COUNT FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT GROUP BY TENANT_ID, TABLE_ID) PKC ON I_T.TENANT_ID = PKC.TENANT_ID AND I_T.JOIN_TABLE_ID = PKC.TABLE_ID LEFT JOIN ( SELECT I.TENANT_ID, I.TABLE_ID AS INDEX_ID, 1 AS IS_PREFIXED FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT I WHERE I.TABLE_TYPE = 5 AND I.INDEX_TYPE IN (1, 2, 10, 15, 23, 24) AND I.PART_LEVEL != 0 AND I.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((I.TABLE_MODE / 4096), 15) IN (0,1) AND NOT EXISTS (SELECT * FROM (SELECT * FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C WHERE C.TABLE_ID = I.DATA_TABLE_ID AND C.PARTITION_KEY_POSITION != 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() ) PART_COLUMNS LEFT JOIN (SELECT * FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C WHERE C.TABLE_ID = I.TABLE_ID AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND C.INDEX_POSITION != 0 ) INDEX_COLUMNS ON PART_COLUMNS.COLUMN_ID = INDEX_COLUMNS.COLUMN_ID WHERE (BITAND(PART_COLUMNS.PARTITION_KEY_POSITION, 255) != 0 AND (INDEX_COLUMNS.INDEX_POSITION IS NULL OR BITAND(PART_COLUMNS.PARTITION_KEY_POSITION, 255) != INDEX_COLUMNS.INDEX_POSITION) ) OR (BITAND(PART_COLUMNS.PARTITION_KEY_POSITION, 65280)/256 != 0 AND (INDEX_COLUMNS.INDEX_POSITION IS NULL) ) ) ) LOCAL_PARTITIONED_PREFIX_INDEX ON I_T.TENANT_ID = LOCAL_PARTITIONED_PREFIX_INDEX.TENANT_ID AND I_T.INDEX_ID = LOCAL_PARTITIONED_PREFIX_INDEX.INDEX_ID LEFT JOIN SYS.ALL_VIRTUAL_TENANT_TABLESPACE_REAL_AGENT TP ON I_T.TENANT_ID = TP.TENANT_ID AND I_T.TABLESPACE_ID = TP.TABLESPACE_ID WHERE I_T.TENANT_ID = EFFECTIVE_TENANT_ID() )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -1760,7 +1760,7 @@ int ObInnerTableSchema::all_part_indexes_ora_schema(ObTableSchema &table_schema) table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(I_T.OWNER AS VARCHAR2(128)) AS OWNER, CAST(I_T.INDEX_NAME AS VARCHAR2(128)) AS INDEX_NAME, CAST(I_T.TABLE_NAME AS VARCHAR2(128)) AS TABLE_NAME, CAST(CASE I_T.PART_FUNC_TYPE WHEN 0 THEN 'HASH' WHEN 1 THEN 'HASH' WHEN 2 THEN 'HASH' WHEN 3 THEN 'RANGE' WHEN 4 THEN 'RANGE' WHEN 5 THEN 'LIST' WHEN 6 THEN 'LIST' WHEN 7 THEN 'RANGE' END AS VARCHAR2(9)) AS PARTITIONING_TYPE, CAST(CASE WHEN I_T.PART_LEVEL < 2 THEN 'NONE' ELSE (CASE I_T.SUB_PART_FUNC_TYPE WHEN 0 THEN 'HASH' WHEN 1 THEN 'KEY' WHEN 2 THEN 'KEY' WHEN 3 THEN 'RANGE' WHEN 4 THEN 'RANGE' WHEN 5 THEN 'LIST' WHEN 6 THEN 'LIST' WHEN 7 THEN 'RANGE' END) END AS VARCHAR2(9)) AS SUBPARTITIONING_TYPE, CAST(I_T.PART_NUM AS NUMBER) AS PARTITION_COUNT, CAST(CASE WHEN (I_T.PART_LEVEL < 2 OR I_T.SUB_PART_TEMPLATE_FLAGS = 0) THEN 0 ELSE I_T.SUB_PART_NUM END AS NUMBER) AS DEF_SUBPARTITION_COUNT, CAST(PKC.PARTITIONING_KEY_COUNT AS NUMBER) AS PARTITIONING_KEY_COUNT, CAST(PKC.SUBPARTITIONING_KEY_COUNT AS NUMBER) AS SUBPARTITIONING_KEY_COUNT, CAST(CASE I_T.IS_LOCAL WHEN 1 THEN 'LOCAL' ELSE 'GLOBAL' END AS VARCHAR2(6)) AS LOCALITY, CAST(CASE WHEN I_T.IS_LOCAL = 0 THEN 'PREFIXED' WHEN (I_T.IS_LOCAL = 1 AND LOCAL_PARTITIONED_PREFIX_INDEX.IS_PREFIXED = 1) THEN 'PREFIXED' ELSE 'NON_PREFIXED' END AS VARCHAR2(12)) AS ALIGNMENT, CAST(TP.TABLESPACE_NAME AS VARCHAR2(30)) AS DEF_TABLESPACE_NAME, CAST(0 AS NUMBER) AS DEF_PCT_FREE, CAST(0 AS NUMBER) AS DEF_INI_TRANS, CAST(0 AS NUMBER) AS DEF_MAX_TRANS, CAST(NULL AS VARCHAR2(40)) AS DEF_INITIAL_EXTENT, CAST(NULL AS VARCHAR2(40)) AS DEF_NEXT_EXTENT, CAST(NULL AS VARCHAR2(40)) AS DEF_MIN_EXTENTS, CAST(NULL AS VARCHAR2(40)) AS DEF_MAX_EXTENTS, CAST(NULL AS VARCHAR2(40)) AS DEF_MAX_SIZE, CAST(NULL AS VARCHAR2(40)) AS DEF_PCT_INCREASE, CAST(0 AS NUMBER) AS DEF_FREELISTS, CAST(0 AS NUMBER) AS DEF_FREELIST_GROUPS, CAST(NULL AS VARCHAR2(7)) AS DEF_LOGGING, CAST(NULL AS VARCHAR2(7)) AS DEF_BUFFER_POOL, CAST(NULL AS VARCHAR2(7)) AS DEF_FLASH_CACHE, CAST(NULL AS VARCHAR2(7)) AS DEF_CELL_FLASH_CACHE, CAST(NULL AS VARCHAR2(1000)) AS DEF_PARAMETERS, CAST('NO' AS VARCHAR2(1000)) AS "INTERVAL", CAST('NO' AS VARCHAR2(3)) AS AUTOLIST, CAST(NULL AS VARCHAR2(1000)) AS INTERVAL_SUBPARTITION, CAST(NULL AS VARCHAR2(1000)) AS AUTOLIST_SUBPARTITION FROM (SELECT D.TENANT_ID, D.DATABASE_NAME AS OWNER, I.TABLE_ID AS INDEX_ID, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN I.TABLE_NAME ELSE SUBSTR(I.TABLE_NAME, 7 + INSTR(SUBSTR(I.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS INDEX_NAME, I.PART_LEVEL, I.PART_FUNC_TYPE, I.PART_NUM, I.SUB_PART_FUNC_TYPE, T.TABLE_NAME AS TABLE_NAME, T.SUB_PART_NUM, T.SUB_PART_TEMPLATE_FLAGS, T.TABLESPACE_ID, (CASE I.INDEX_TYPE WHEN 1 THEN 1 WHEN 2 THEN 1 WHEN 10 THEN 1 ELSE 0 END) AS IS_LOCAL, (CASE I.INDEX_TYPE WHEN 1 THEN T.TABLE_ID WHEN 2 THEN T.TABLE_ID WHEN 10 THEN T.TABLE_ID ELSE I.TABLE_ID END) AS JOIN_TABLE_ID FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT I JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T ON I.TENANT_ID = T.TENANT_ID AND I.DATA_TABLE_ID = T.TABLE_ID AND bitand((I.TABLE_MODE / 4096), 15) IN (0,1) JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D ON T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID WHERE I.TABLE_TYPE = 5 AND I.PART_LEVEL != 0 AND (T.DATABASE_ID = USERENV('SCHEMAID') OR USER_CAN_ACCESS_OBJ(1, T.TABLE_ID, T.DATABASE_ID) = 1) AND T.TABLE_TYPE != 12 AND T.TABLE_TYPE != 13 ) I_T JOIN (SELECT TENANT_ID, TABLE_ID, SUM(CASE WHEN BITAND(PARTITION_KEY_POSITION, 255) != 0 THEN 1 ELSE 0 END) AS PARTITIONING_KEY_COUNT, SUM(CASE WHEN BITAND(PARTITION_KEY_POSITION, 65280)/256 != 0 THEN 1 ELSE 0 END) AS SUBPARTITIONING_KEY_COUNT FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT GROUP BY TENANT_ID, TABLE_ID) PKC ON I_T.TENANT_ID = PKC.TENANT_ID AND I_T.JOIN_TABLE_ID = PKC.TABLE_ID LEFT JOIN ( SELECT I.TENANT_ID, I.TABLE_ID AS INDEX_ID, 1 AS IS_PREFIXED FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT I WHERE I.TABLE_TYPE = 5 AND I.INDEX_TYPE IN (1, 2, 10) AND I.PART_LEVEL != 0 AND I.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((I.TABLE_MODE / 4096), 15) IN (0,1) AND NOT EXISTS (SELECT * FROM (SELECT * FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C WHERE C.TABLE_ID = I.DATA_TABLE_ID AND C.PARTITION_KEY_POSITION != 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() ) PART_COLUMNS LEFT JOIN (SELECT * FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C WHERE C.TABLE_ID = I.TABLE_ID AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND C.INDEX_POSITION != 0 ) INDEX_COLUMNS ON PART_COLUMNS.COLUMN_ID = INDEX_COLUMNS.COLUMN_ID WHERE (BITAND(PART_COLUMNS.PARTITION_KEY_POSITION, 255) != 0 AND (INDEX_COLUMNS.INDEX_POSITION IS NULL OR BITAND(PART_COLUMNS.PARTITION_KEY_POSITION, 255) != INDEX_COLUMNS.INDEX_POSITION) ) OR (BITAND(PART_COLUMNS.PARTITION_KEY_POSITION, 65280)/256 != 0 AND (INDEX_COLUMNS.INDEX_POSITION IS NULL) ) ) ) LOCAL_PARTITIONED_PREFIX_INDEX ON I_T.TENANT_ID = LOCAL_PARTITIONED_PREFIX_INDEX.TENANT_ID AND I_T.INDEX_ID = LOCAL_PARTITIONED_PREFIX_INDEX.INDEX_ID LEFT JOIN SYS.ALL_VIRTUAL_TENANT_TABLESPACE_REAL_AGENT TP ON I_T.TENANT_ID = TP.TENANT_ID AND I_T.TABLESPACE_ID = TP.TABLESPACE_ID WHERE I_T.TENANT_ID = EFFECTIVE_TENANT_ID() )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(I_T.OWNER AS VARCHAR2(128)) AS OWNER, CAST(I_T.INDEX_NAME AS VARCHAR2(128)) AS INDEX_NAME, CAST(I_T.TABLE_NAME AS VARCHAR2(128)) AS TABLE_NAME, CAST(CASE I_T.PART_FUNC_TYPE WHEN 0 THEN 'HASH' WHEN 1 THEN 'HASH' WHEN 2 THEN 'HASH' WHEN 3 THEN 'RANGE' WHEN 4 THEN 'RANGE' WHEN 5 THEN 'LIST' WHEN 6 THEN 'LIST' WHEN 7 THEN 'RANGE' END AS VARCHAR2(9)) AS PARTITIONING_TYPE, CAST(CASE WHEN I_T.PART_LEVEL < 2 THEN 'NONE' ELSE (CASE I_T.SUB_PART_FUNC_TYPE WHEN 0 THEN 'HASH' WHEN 1 THEN 'KEY' WHEN 2 THEN 'KEY' WHEN 3 THEN 'RANGE' WHEN 4 THEN 'RANGE' WHEN 5 THEN 'LIST' WHEN 6 THEN 'LIST' WHEN 7 THEN 'RANGE' END) END AS VARCHAR2(9)) AS SUBPARTITIONING_TYPE, CAST(I_T.PART_NUM AS NUMBER) AS PARTITION_COUNT, CAST(CASE WHEN (I_T.PART_LEVEL < 2 OR I_T.SUB_PART_TEMPLATE_FLAGS = 0) THEN 0 ELSE I_T.SUB_PART_NUM END AS NUMBER) AS DEF_SUBPARTITION_COUNT, CAST(PKC.PARTITIONING_KEY_COUNT AS NUMBER) AS PARTITIONING_KEY_COUNT, CAST(PKC.SUBPARTITIONING_KEY_COUNT AS NUMBER) AS SUBPARTITIONING_KEY_COUNT, CAST(CASE I_T.IS_LOCAL WHEN 1 THEN 'LOCAL' ELSE 'GLOBAL' END AS VARCHAR2(6)) AS LOCALITY, CAST(CASE WHEN I_T.IS_LOCAL = 0 THEN 'PREFIXED' WHEN (I_T.IS_LOCAL = 1 AND LOCAL_PARTITIONED_PREFIX_INDEX.IS_PREFIXED = 1) THEN 'PREFIXED' ELSE 'NON_PREFIXED' END AS VARCHAR2(12)) AS ALIGNMENT, CAST(TP.TABLESPACE_NAME AS VARCHAR2(30)) AS DEF_TABLESPACE_NAME, CAST(0 AS NUMBER) AS DEF_PCT_FREE, CAST(0 AS NUMBER) AS DEF_INI_TRANS, CAST(0 AS NUMBER) AS DEF_MAX_TRANS, CAST(NULL AS VARCHAR2(40)) AS DEF_INITIAL_EXTENT, CAST(NULL AS VARCHAR2(40)) AS DEF_NEXT_EXTENT, CAST(NULL AS VARCHAR2(40)) AS DEF_MIN_EXTENTS, CAST(NULL AS VARCHAR2(40)) AS DEF_MAX_EXTENTS, CAST(NULL AS VARCHAR2(40)) AS DEF_MAX_SIZE, CAST(NULL AS VARCHAR2(40)) AS DEF_PCT_INCREASE, CAST(0 AS NUMBER) AS DEF_FREELISTS, CAST(0 AS NUMBER) AS DEF_FREELIST_GROUPS, CAST(NULL AS VARCHAR2(7)) AS DEF_LOGGING, CAST(NULL AS VARCHAR2(7)) AS DEF_BUFFER_POOL, CAST(NULL AS VARCHAR2(7)) AS DEF_FLASH_CACHE, CAST(NULL AS VARCHAR2(7)) AS DEF_CELL_FLASH_CACHE, CAST(NULL AS VARCHAR2(1000)) AS DEF_PARAMETERS, CAST('NO' AS VARCHAR2(1000)) AS "INTERVAL", CAST('NO' AS VARCHAR2(3)) AS AUTOLIST, CAST(NULL AS VARCHAR2(1000)) AS INTERVAL_SUBPARTITION, CAST(NULL AS VARCHAR2(1000)) AS AUTOLIST_SUBPARTITION FROM (SELECT D.TENANT_ID, D.DATABASE_NAME AS OWNER, I.TABLE_ID AS INDEX_ID, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN I.TABLE_NAME ELSE SUBSTR(I.TABLE_NAME, 7 + INSTR(SUBSTR(I.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS INDEX_NAME, I.PART_LEVEL, I.PART_FUNC_TYPE, I.PART_NUM, I.SUB_PART_FUNC_TYPE, T.TABLE_NAME AS TABLE_NAME, T.SUB_PART_NUM, T.SUB_PART_TEMPLATE_FLAGS, T.TABLESPACE_ID, (CASE I.INDEX_TYPE WHEN 1 THEN 1 WHEN 2 THEN 1 WHEN 10 THEN 1 WHEN 15 THEN 1 WHEN 23 THEN 1 WHEN 24 THEN 1 ELSE 0 END) AS IS_LOCAL, (CASE I.INDEX_TYPE WHEN 1 THEN T.TABLE_ID WHEN 2 THEN T.TABLE_ID WHEN 10 THEN T.TABLE_ID WHEN 15 THEN T.TABLE_ID WHEN 23 THEN T.TABLE_ID WHEN 24 THEN T.TABLE_ID ELSE I.TABLE_ID END) AS JOIN_TABLE_ID FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT I JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T ON I.TENANT_ID = T.TENANT_ID AND I.DATA_TABLE_ID = T.TABLE_ID AND bitand((I.TABLE_MODE / 4096), 15) IN (0,1) JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D ON T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID WHERE I.TABLE_TYPE = 5 AND I.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND I.PART_LEVEL != 0 AND (T.DATABASE_ID = USERENV('SCHEMAID') OR USER_CAN_ACCESS_OBJ(1, T.TABLE_ID, T.DATABASE_ID) = 1) AND T.TABLE_TYPE != 12 AND T.TABLE_TYPE != 13 ) I_T JOIN (SELECT TENANT_ID, TABLE_ID, SUM(CASE WHEN BITAND(PARTITION_KEY_POSITION, 255) != 0 THEN 1 ELSE 0 END) AS PARTITIONING_KEY_COUNT, SUM(CASE WHEN BITAND(PARTITION_KEY_POSITION, 65280)/256 != 0 THEN 1 ELSE 0 END) AS SUBPARTITIONING_KEY_COUNT FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT GROUP BY TENANT_ID, TABLE_ID) PKC ON I_T.TENANT_ID = PKC.TENANT_ID AND I_T.JOIN_TABLE_ID = PKC.TABLE_ID LEFT JOIN ( SELECT I.TENANT_ID, I.TABLE_ID AS INDEX_ID, 1 AS IS_PREFIXED FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT I WHERE I.TABLE_TYPE = 5 AND I.INDEX_TYPE IN (1, 2, 10, 15, 23, 24) AND I.PART_LEVEL != 0 AND I.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((I.TABLE_MODE / 4096), 15) IN (0,1) AND NOT EXISTS (SELECT * FROM (SELECT * FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C WHERE C.TABLE_ID = I.DATA_TABLE_ID AND C.PARTITION_KEY_POSITION != 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() ) PART_COLUMNS LEFT JOIN (SELECT * FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C WHERE C.TABLE_ID = I.TABLE_ID AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND C.INDEX_POSITION != 0 ) INDEX_COLUMNS ON PART_COLUMNS.COLUMN_ID = INDEX_COLUMNS.COLUMN_ID WHERE (BITAND(PART_COLUMNS.PARTITION_KEY_POSITION, 255) != 0 AND (INDEX_COLUMNS.INDEX_POSITION IS NULL OR BITAND(PART_COLUMNS.PARTITION_KEY_POSITION, 255) != INDEX_COLUMNS.INDEX_POSITION) ) OR (BITAND(PART_COLUMNS.PARTITION_KEY_POSITION, 65280)/256 != 0 AND (INDEX_COLUMNS.INDEX_POSITION IS NULL) ) ) ) LOCAL_PARTITIONED_PREFIX_INDEX ON I_T.TENANT_ID = LOCAL_PARTITIONED_PREFIX_INDEX.TENANT_ID AND I_T.INDEX_ID = LOCAL_PARTITIONED_PREFIX_INDEX.INDEX_ID LEFT JOIN SYS.ALL_VIRTUAL_TENANT_TABLESPACE_REAL_AGENT TP ON I_T.TENANT_ID = TP.TENANT_ID AND I_T.TABLESPACE_ID = TP.TABLESPACE_ID WHERE I_T.TENANT_ID = EFFECTIVE_TENANT_ID() )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -1810,7 +1810,7 @@ int ObInnerTableSchema::user_part_indexes_ora_schema(ObTableSchema &table_schema table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(I_T.INDEX_NAME AS VARCHAR2(128)) AS INDEX_NAME, CAST(I_T.TABLE_NAME AS VARCHAR2(128)) AS TABLE_NAME, CAST(CASE I_T.PART_FUNC_TYPE WHEN 0 THEN 'HASH' WHEN 1 THEN 'HASH' WHEN 2 THEN 'HASH' WHEN 3 THEN 'RANGE' WHEN 4 THEN 'RANGE' WHEN 5 THEN 'LIST' WHEN 6 THEN 'LIST' WHEN 7 THEN 'RANGE' END AS VARCHAR2(9)) AS PARTITIONING_TYPE, CAST(CASE WHEN I_T.PART_LEVEL < 2 THEN 'NONE' ELSE (CASE I_T.SUB_PART_FUNC_TYPE WHEN 0 THEN 'HASH' WHEN 1 THEN 'KEY' WHEN 2 THEN 'KEY' WHEN 3 THEN 'RANGE' WHEN 4 THEN 'RANGE' WHEN 5 THEN 'LIST' WHEN 6 THEN 'LIST' WHEN 7 THEN 'RANGE' END) END AS VARCHAR2(9)) AS SUBPARTITIONING_TYPE, CAST(I_T.PART_NUM AS NUMBER) AS PARTITION_COUNT, CAST(CASE WHEN (I_T.PART_LEVEL < 2 OR I_T.SUB_PART_TEMPLATE_FLAGS = 0) THEN 0 ELSE I_T.SUB_PART_NUM END AS NUMBER) AS DEF_SUBPARTITION_COUNT, CAST(PKC.PARTITIONING_KEY_COUNT AS NUMBER) AS PARTITIONING_KEY_COUNT, CAST(PKC.SUBPARTITIONING_KEY_COUNT AS NUMBER) AS SUBPARTITIONING_KEY_COUNT, CAST(CASE I_T.IS_LOCAL WHEN 1 THEN 'LOCAL' ELSE 'GLOBAL' END AS VARCHAR2(6)) AS LOCALITY, CAST(CASE WHEN I_T.IS_LOCAL = 0 THEN 'PREFIXED' WHEN (I_T.IS_LOCAL = 1 AND LOCAL_PARTITIONED_PREFIX_INDEX.IS_PREFIXED = 1) THEN 'PREFIXED' ELSE 'NON_PREFIXED' END AS VARCHAR2(12)) AS ALIGNMENT, CAST(TP.TABLESPACE_NAME AS VARCHAR2(30)) AS DEF_TABLESPACE_NAME, CAST(0 AS NUMBER) AS DEF_PCT_FREE, CAST(0 AS NUMBER) AS DEF_INI_TRANS, CAST(0 AS NUMBER) AS DEF_MAX_TRANS, CAST(NULL AS VARCHAR2(40)) AS DEF_INITIAL_EXTENT, CAST(NULL AS VARCHAR2(40)) AS DEF_NEXT_EXTENT, CAST(NULL AS VARCHAR2(40)) AS DEF_MIN_EXTENTS, CAST(NULL AS VARCHAR2(40)) AS DEF_MAX_EXTENTS, CAST(NULL AS VARCHAR2(40)) AS DEF_MAX_SIZE, CAST(NULL AS VARCHAR2(40)) AS DEF_PCT_INCREASE, CAST(0 AS NUMBER) AS DEF_FREELISTS, CAST(0 AS NUMBER) AS DEF_FREELIST_GROUPS, CAST(NULL AS VARCHAR2(7)) AS DEF_LOGGING, CAST(NULL AS VARCHAR2(7)) AS DEF_BUFFER_POOL, CAST(NULL AS VARCHAR2(7)) AS DEF_FLASH_CACHE, CAST(NULL AS VARCHAR2(7)) AS DEF_CELL_FLASH_CACHE, CAST(NULL AS VARCHAR2(1000)) AS DEF_PARAMETERS, CAST('NO' AS VARCHAR2(1000)) AS "INTERVAL", CAST('NO' AS VARCHAR2(3)) AS AUTOLIST, CAST(NULL AS VARCHAR2(1000)) AS INTERVAL_SUBPARTITION, CAST(NULL AS VARCHAR2(1000)) AS AUTOLIST_SUBPARTITION FROM (SELECT D.TENANT_ID, D.DATABASE_NAME AS OWNER, I.TABLE_ID AS INDEX_ID, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN I.TABLE_NAME ELSE SUBSTR(I.TABLE_NAME, 7 + INSTR(SUBSTR(I.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS INDEX_NAME, I.PART_LEVEL, I.PART_FUNC_TYPE, I.PART_NUM, I.SUB_PART_FUNC_TYPE, T.TABLE_NAME AS TABLE_NAME, T.SUB_PART_NUM, T.SUB_PART_TEMPLATE_FLAGS, T.TABLESPACE_ID, (CASE I.INDEX_TYPE WHEN 1 THEN 1 WHEN 2 THEN 1 WHEN 10 THEN 1 ELSE 0 END) AS IS_LOCAL, (CASE I.INDEX_TYPE WHEN 1 THEN T.TABLE_ID WHEN 2 THEN T.TABLE_ID WHEN 10 THEN T.TABLE_ID ELSE I.TABLE_ID END) AS JOIN_TABLE_ID FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT I JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T ON I.TENANT_ID = T.TENANT_ID AND I.DATA_TABLE_ID = T.TABLE_ID AND bitand((I.TABLE_MODE / 4096), 15) IN (0,1) JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D ON T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID WHERE I.TABLE_TYPE = 5 AND I.PART_LEVEL != 0 AND T.DATABASE_ID = USERENV('SCHEMAID') AND T.TABLE_TYPE != 12 AND T.TABLE_TYPE != 13 ) I_T JOIN (SELECT TENANT_ID, TABLE_ID, SUM(CASE WHEN BITAND(PARTITION_KEY_POSITION, 255) != 0 THEN 1 ELSE 0 END) AS PARTITIONING_KEY_COUNT, SUM(CASE WHEN BITAND(PARTITION_KEY_POSITION, 65280)/256 != 0 THEN 1 ELSE 0 END) AS SUBPARTITIONING_KEY_COUNT FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT GROUP BY TENANT_ID, TABLE_ID) PKC ON I_T.TENANT_ID = PKC.TENANT_ID AND I_T.JOIN_TABLE_ID = PKC.TABLE_ID LEFT JOIN ( SELECT I.TENANT_ID, I.TABLE_ID AS INDEX_ID, 1 AS IS_PREFIXED FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT I WHERE I.TABLE_TYPE = 5 AND I.INDEX_TYPE IN (1, 2, 10) AND I.PART_LEVEL != 0 AND I.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((I.TABLE_MODE / 4096), 15) IN (0,1) AND NOT EXISTS (SELECT * FROM (SELECT * FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C WHERE C.TABLE_ID = I.DATA_TABLE_ID AND C.PARTITION_KEY_POSITION != 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() ) PART_COLUMNS LEFT JOIN (SELECT * FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C WHERE C.TABLE_ID = I.TABLE_ID AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND C.INDEX_POSITION != 0 ) INDEX_COLUMNS ON PART_COLUMNS.COLUMN_ID = INDEX_COLUMNS.COLUMN_ID WHERE (BITAND(PART_COLUMNS.PARTITION_KEY_POSITION, 255) != 0 AND (INDEX_COLUMNS.INDEX_POSITION IS NULL OR BITAND(PART_COLUMNS.PARTITION_KEY_POSITION, 255) != INDEX_COLUMNS.INDEX_POSITION) ) OR (BITAND(PART_COLUMNS.PARTITION_KEY_POSITION, 65280)/256 != 0 AND (INDEX_COLUMNS.INDEX_POSITION IS NULL) ) ) ) LOCAL_PARTITIONED_PREFIX_INDEX ON I_T.TENANT_ID = LOCAL_PARTITIONED_PREFIX_INDEX.TENANT_ID AND I_T.INDEX_ID = LOCAL_PARTITIONED_PREFIX_INDEX.INDEX_ID LEFT JOIN SYS.ALL_VIRTUAL_TENANT_TABLESPACE_REAL_AGENT TP ON I_T.TENANT_ID = TP.TENANT_ID AND I_T.TABLESPACE_ID = TP.TABLESPACE_ID WHERE I_T.TENANT_ID = EFFECTIVE_TENANT_ID() )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(I_T.INDEX_NAME AS VARCHAR2(128)) AS INDEX_NAME, CAST(I_T.TABLE_NAME AS VARCHAR2(128)) AS TABLE_NAME, CAST(CASE I_T.PART_FUNC_TYPE WHEN 0 THEN 'HASH' WHEN 1 THEN 'HASH' WHEN 2 THEN 'HASH' WHEN 3 THEN 'RANGE' WHEN 4 THEN 'RANGE' WHEN 5 THEN 'LIST' WHEN 6 THEN 'LIST' WHEN 7 THEN 'RANGE' END AS VARCHAR2(9)) AS PARTITIONING_TYPE, CAST(CASE WHEN I_T.PART_LEVEL < 2 THEN 'NONE' ELSE (CASE I_T.SUB_PART_FUNC_TYPE WHEN 0 THEN 'HASH' WHEN 1 THEN 'KEY' WHEN 2 THEN 'KEY' WHEN 3 THEN 'RANGE' WHEN 4 THEN 'RANGE' WHEN 5 THEN 'LIST' WHEN 6 THEN 'LIST' WHEN 7 THEN 'RANGE' END) END AS VARCHAR2(9)) AS SUBPARTITIONING_TYPE, CAST(I_T.PART_NUM AS NUMBER) AS PARTITION_COUNT, CAST(CASE WHEN (I_T.PART_LEVEL < 2 OR I_T.SUB_PART_TEMPLATE_FLAGS = 0) THEN 0 ELSE I_T.SUB_PART_NUM END AS NUMBER) AS DEF_SUBPARTITION_COUNT, CAST(PKC.PARTITIONING_KEY_COUNT AS NUMBER) AS PARTITIONING_KEY_COUNT, CAST(PKC.SUBPARTITIONING_KEY_COUNT AS NUMBER) AS SUBPARTITIONING_KEY_COUNT, CAST(CASE I_T.IS_LOCAL WHEN 1 THEN 'LOCAL' ELSE 'GLOBAL' END AS VARCHAR2(6)) AS LOCALITY, CAST(CASE WHEN I_T.IS_LOCAL = 0 THEN 'PREFIXED' WHEN (I_T.IS_LOCAL = 1 AND LOCAL_PARTITIONED_PREFIX_INDEX.IS_PREFIXED = 1) THEN 'PREFIXED' ELSE 'NON_PREFIXED' END AS VARCHAR2(12)) AS ALIGNMENT, CAST(TP.TABLESPACE_NAME AS VARCHAR2(30)) AS DEF_TABLESPACE_NAME, CAST(0 AS NUMBER) AS DEF_PCT_FREE, CAST(0 AS NUMBER) AS DEF_INI_TRANS, CAST(0 AS NUMBER) AS DEF_MAX_TRANS, CAST(NULL AS VARCHAR2(40)) AS DEF_INITIAL_EXTENT, CAST(NULL AS VARCHAR2(40)) AS DEF_NEXT_EXTENT, CAST(NULL AS VARCHAR2(40)) AS DEF_MIN_EXTENTS, CAST(NULL AS VARCHAR2(40)) AS DEF_MAX_EXTENTS, CAST(NULL AS VARCHAR2(40)) AS DEF_MAX_SIZE, CAST(NULL AS VARCHAR2(40)) AS DEF_PCT_INCREASE, CAST(0 AS NUMBER) AS DEF_FREELISTS, CAST(0 AS NUMBER) AS DEF_FREELIST_GROUPS, CAST(NULL AS VARCHAR2(7)) AS DEF_LOGGING, CAST(NULL AS VARCHAR2(7)) AS DEF_BUFFER_POOL, CAST(NULL AS VARCHAR2(7)) AS DEF_FLASH_CACHE, CAST(NULL AS VARCHAR2(7)) AS DEF_CELL_FLASH_CACHE, CAST(NULL AS VARCHAR2(1000)) AS DEF_PARAMETERS, CAST('NO' AS VARCHAR2(1000)) AS "INTERVAL", CAST('NO' AS VARCHAR2(3)) AS AUTOLIST, CAST(NULL AS VARCHAR2(1000)) AS INTERVAL_SUBPARTITION, CAST(NULL AS VARCHAR2(1000)) AS AUTOLIST_SUBPARTITION FROM (SELECT D.TENANT_ID, D.DATABASE_NAME AS OWNER, I.TABLE_ID AS INDEX_ID, CAST(CASE WHEN D.DATABASE_NAME = '__recyclebin' THEN I.TABLE_NAME ELSE SUBSTR(I.TABLE_NAME, 7 + INSTR(SUBSTR(I.TABLE_NAME, 7), '_')) END AS VARCHAR2(128)) AS INDEX_NAME, I.PART_LEVEL, I.PART_FUNC_TYPE, I.PART_NUM, I.SUB_PART_FUNC_TYPE, T.TABLE_NAME AS TABLE_NAME, T.SUB_PART_NUM, T.SUB_PART_TEMPLATE_FLAGS, T.TABLESPACE_ID, (CASE I.INDEX_TYPE WHEN 1 THEN 1 WHEN 2 THEN 1 WHEN 10 THEN 1 WHEN 15 THEN 1 WHEN 23 THEN 1 WHEN 24 THEN 1 ELSE 0 END) AS IS_LOCAL, (CASE I.INDEX_TYPE WHEN 1 THEN T.TABLE_ID WHEN 2 THEN T.TABLE_ID WHEN 10 THEN T.TABLE_ID WHEN 15 THEN T.TABLE_ID WHEN 23 THEN T.TABLE_ID WHEN 24 THEN T.TABLE_ID ELSE I.TABLE_ID END) AS JOIN_TABLE_ID FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT I JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T ON I.TENANT_ID = T.TENANT_ID AND I.DATA_TABLE_ID = T.TABLE_ID AND bitand((I.TABLE_MODE / 4096), 15) IN (0,1) JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D ON T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID WHERE I.TABLE_TYPE = 5 AND I.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND I.PART_LEVEL != 0 AND T.DATABASE_ID = USERENV('SCHEMAID') AND T.TABLE_TYPE != 12 AND T.TABLE_TYPE != 13 ) I_T JOIN (SELECT TENANT_ID, TABLE_ID, SUM(CASE WHEN BITAND(PARTITION_KEY_POSITION, 255) != 0 THEN 1 ELSE 0 END) AS PARTITIONING_KEY_COUNT, SUM(CASE WHEN BITAND(PARTITION_KEY_POSITION, 65280)/256 != 0 THEN 1 ELSE 0 END) AS SUBPARTITIONING_KEY_COUNT FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT GROUP BY TENANT_ID, TABLE_ID) PKC ON I_T.TENANT_ID = PKC.TENANT_ID AND I_T.JOIN_TABLE_ID = PKC.TABLE_ID LEFT JOIN ( SELECT I.TENANT_ID, I.TABLE_ID AS INDEX_ID, 1 AS IS_PREFIXED FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT I WHERE I.TABLE_TYPE = 5 AND I.INDEX_TYPE IN (1, 2, 10, 15, 23, 24) AND I.PART_LEVEL != 0 AND I.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((I.TABLE_MODE / 4096), 15) IN (0,1) AND NOT EXISTS (SELECT * FROM (SELECT * FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C WHERE C.TABLE_ID = I.DATA_TABLE_ID AND C.PARTITION_KEY_POSITION != 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() ) PART_COLUMNS LEFT JOIN (SELECT * FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT C WHERE C.TABLE_ID = I.TABLE_ID AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND C.INDEX_POSITION != 0 ) INDEX_COLUMNS ON PART_COLUMNS.COLUMN_ID = INDEX_COLUMNS.COLUMN_ID WHERE (BITAND(PART_COLUMNS.PARTITION_KEY_POSITION, 255) != 0 AND (INDEX_COLUMNS.INDEX_POSITION IS NULL OR BITAND(PART_COLUMNS.PARTITION_KEY_POSITION, 255) != INDEX_COLUMNS.INDEX_POSITION) ) OR (BITAND(PART_COLUMNS.PARTITION_KEY_POSITION, 65280)/256 != 0 AND (INDEX_COLUMNS.INDEX_POSITION IS NULL) ) ) ) LOCAL_PARTITIONED_PREFIX_INDEX ON I_T.TENANT_ID = LOCAL_PARTITIONED_PREFIX_INDEX.TENANT_ID AND I_T.INDEX_ID = LOCAL_PARTITIONED_PREFIX_INDEX.INDEX_ID LEFT JOIN SYS.ALL_VIRTUAL_TENANT_TABLESPACE_REAL_AGENT TP ON I_T.TENANT_ID = TP.TENANT_ID AND I_T.TABLESPACE_ID = TP.TABLESPACE_ID WHERE I_T.TENANT_ID = EFFECTIVE_TENANT_ID() )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } diff --git a/src/share/inner_table/ob_inner_table_schema.25151_25200.cpp b/src/share/inner_table/ob_inner_table_schema.25151_25200.cpp index 612a3661e3..ec5b007086 100644 --- a/src/share/inner_table/ob_inner_table_schema.25151_25200.cpp +++ b/src/share/inner_table/ob_inner_table_schema.25151_25200.cpp @@ -1210,7 +1210,7 @@ int ObInnerTableSchema::all_ind_statistics_ora_schema(ObTableSchema &table_schem table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__(SELECT CAST(DB.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(V.INDEX_NAME AS VARCHAR2(128)) AS INDEX_NAME, CAST(DB.DATABASE_NAME AS VARCHAR2(128)) AS TABLE_OWNER, CAST(T.TABLE_NAME AS VARCHAR2(128)) AS TABLE_NAME, CAST(V.PARTITION_NAME AS VARCHAR2(128)) AS PARTITION_NAME, CAST(V.PARTITION_POSITION AS NUMBER) AS PARTITION_POSITION, CAST(V.SUBPARTITION_NAME AS VARCHAR2(128)) AS SUBPARTITION_NAME, CAST(V.SUBPARTITION_POSITION AS NUMBER) AS SUBPARTITION_POSITION, CAST(V.OBJECT_TYPE AS VARCHAR2(12)) AS OBJECT_TYPE, CAST(NULL AS NUMBER) AS BLEVEL, CAST(NULL AS NUMBER) AS LEAF_BLOCKS, CAST(NULL AS NUMBER) AS DISTINCT_KEYS, CAST(NULL AS NUMBER) AS AVG_LEAF_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS AVG_DATA_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS CLUSTERING_FACTOR, CAST(STAT.ROW_CNT AS NUMBER) AS NUM_ROWS, CAST(NULL AS NUMBER) AS AVG_CACHED_BLOCKS, CAST(NULL AS NUMBER) AS AVG_CACHE_HIT_RATIO, CAST(NULL AS NUMBER) AS SAMPLE_SIZE, CAST(STAT.LAST_ANALYZED AS DATE) AS LAST_ANALYZED, CAST(decode(STAT.GLOBAL_STATS, 0, 'NO', 1, 'YES', NULL) AS VARCHAR2(3)) AS GLOBAL_STATS, CAST(decode(STAT.USER_STATS, 0, 'NO', 1, 'YES', NULL) AS VARCHAR2(3)) AS USER_STATS, CAST(decode(bitand(STAT.STATTYPE_LOCKED, 15), NULL, NULL, 0, NULL, 1, 'DATA', 2, 'CACHE', 'ALL') AS VARCHAR2(5)) AS STATTYPE_LOCKED, CAST(decode(STAT.STALE_STATS, 0, 'NO', 1, 'YES', NULL) AS VARCHAR2(3)) AS STALE_STATS, CAST(NULL AS VARCHAR2(7)) AS SCOPE FROM ( (SELECT TENANT_ID, DATABASE_ID, TABLE_ID, DATA_TABLE_ID, TABLE_ID AS PARTITION_ID, SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) AS INDEX_NAME, NULL AS PARTITION_NAME, NULL AS SUBPARTITION_NAME, NULL AS PARTITION_POSITION, NULL AS SUBPARTITION_POSITION, 'INDEX' AS OBJECT_TYPE FROM SYS.ALL_VIRTUAL_CORE_ALL_TABLE T WHERE T.TABLE_TYPE = 5 UNION ALL SELECT TENANT_ID, DATABASE_ID, TABLE_ID, DATA_TABLE_ID, CASE WHEN PART_LEVEL = 0 THEN TABLE_ID ELSE -1 END AS PARTITION_ID, SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) AS INDEX_NAME, NULL AS PARTITION_NAME, NULL AS SUBPARTITION_NAME, NULL AS PARTITION_POSITION, NULL AS SUBPARTITION_POSITION, 'INDEX' AS OBJECT_TYPE FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T WHERE T.TABLE_TYPE = 5 AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1)) UNION ALL SELECT T.TENANT_ID, T.DATABASE_ID, T.TABLE_ID, T.DATA_TABLE_ID, P.PART_ID, SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) AS INDEX_NAME, P.PART_NAME, NULL, P.PART_IDX + 1, NULL, 'PARTITION' FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T JOIN SYS.ALL_VIRTUAL_PART_REAL_AGENT P ON T.TENANT_ID = P.TENANT_ID AND T.TABLE_ID = P.TABLE_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) WHERE T.TABLE_TYPE = 5 UNION ALL SELECT T.TENANT_ID, T.DATABASE_ID, T.TABLE_ID, T.DATA_TABLE_ID, SP.SUB_PART_ID AS PARTITION_ID, SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) AS INDEX_NAME, P.PART_NAME, SP.SUB_PART_NAME, P.PART_IDX + 1, SP.SUB_PART_IDX + 1, 'SUBPARTITION' FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T JOIN SYS.ALL_VIRTUAL_PART_REAL_AGENT P ON T.TENANT_ID = P.TENANT_ID AND T.TABLE_ID = P.TABLE_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) JOIN SYS.ALL_VIRTUAL_SUB_PART_REAL_AGENT SP ON T.TENANT_ID = SP.TENANT_ID AND T.TABLE_ID = SP.TABLE_ID AND P.PART_ID = SP.PART_ID WHERE T.TABLE_TYPE = 5 ) V JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T ON T.TABLE_ID = V.DATA_TABLE_ID AND T.TENANT_ID = V.TENANT_ID AND T.DATABASE_ID = V.DATABASE_ID AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT DB ON DB.TENANT_ID = V.TENANT_ID AND DB.DATABASE_ID = V.DATABASE_ID AND (V.DATABASE_ID = USERENV('SCHEMAID') OR USER_CAN_ACCESS_OBJ(1, V.TABLE_ID, V.DATABASE_ID) = 1) AND V.TENANT_ID = EFFECTIVE_TENANT_ID() AND DB.TENANT_ID = EFFECTIVE_TENANT_ID() LEFT JOIN SYS.ALL_VIRTUAL_TABLE_STAT_REAL_AGENT STAT ON V.TENANT_ID = STAT.TENANT_ID AND V.TABLE_ID = STAT.TABLE_ID AND V.PARTITION_ID = STAT.PARTITION_ID AND STAT.INDEX_TYPE = 1; )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__(SELECT CAST(DB.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(V.INDEX_NAME AS VARCHAR2(128)) AS INDEX_NAME, CAST(DB.DATABASE_NAME AS VARCHAR2(128)) AS TABLE_OWNER, CAST(T.TABLE_NAME AS VARCHAR2(128)) AS TABLE_NAME, CAST(V.PARTITION_NAME AS VARCHAR2(128)) AS PARTITION_NAME, CAST(V.PARTITION_POSITION AS NUMBER) AS PARTITION_POSITION, CAST(V.SUBPARTITION_NAME AS VARCHAR2(128)) AS SUBPARTITION_NAME, CAST(V.SUBPARTITION_POSITION AS NUMBER) AS SUBPARTITION_POSITION, CAST(V.OBJECT_TYPE AS VARCHAR2(12)) AS OBJECT_TYPE, CAST(NULL AS NUMBER) AS BLEVEL, CAST(NULL AS NUMBER) AS LEAF_BLOCKS, CAST(NULL AS NUMBER) AS DISTINCT_KEYS, CAST(NULL AS NUMBER) AS AVG_LEAF_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS AVG_DATA_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS CLUSTERING_FACTOR, CAST(STAT.ROW_CNT AS NUMBER) AS NUM_ROWS, CAST(NULL AS NUMBER) AS AVG_CACHED_BLOCKS, CAST(NULL AS NUMBER) AS AVG_CACHE_HIT_RATIO, CAST(NULL AS NUMBER) AS SAMPLE_SIZE, CAST(STAT.LAST_ANALYZED AS DATE) AS LAST_ANALYZED, CAST(decode(STAT.GLOBAL_STATS, 0, 'NO', 1, 'YES', NULL) AS VARCHAR2(3)) AS GLOBAL_STATS, CAST(decode(STAT.USER_STATS, 0, 'NO', 1, 'YES', NULL) AS VARCHAR2(3)) AS USER_STATS, CAST(decode(bitand(STAT.STATTYPE_LOCKED, 15), NULL, NULL, 0, NULL, 1, 'DATA', 2, 'CACHE', 'ALL') AS VARCHAR2(5)) AS STATTYPE_LOCKED, CAST(decode(STAT.STALE_STATS, 0, 'NO', 1, 'YES', NULL) AS VARCHAR2(3)) AS STALE_STATS, CAST(NULL AS VARCHAR2(7)) AS SCOPE FROM ( (SELECT TENANT_ID, DATABASE_ID, TABLE_ID, DATA_TABLE_ID, TABLE_ID AS PARTITION_ID, SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) AS INDEX_NAME, NULL AS PARTITION_NAME, NULL AS SUBPARTITION_NAME, NULL AS PARTITION_POSITION, NULL AS SUBPARTITION_POSITION, 'INDEX' AS OBJECT_TYPE FROM SYS.ALL_VIRTUAL_CORE_ALL_TABLE T WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) UNION ALL SELECT TENANT_ID, DATABASE_ID, TABLE_ID, DATA_TABLE_ID, CASE WHEN PART_LEVEL = 0 THEN TABLE_ID ELSE -1 END AS PARTITION_ID, SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) AS INDEX_NAME, NULL AS PARTITION_NAME, NULL AS SUBPARTITION_NAME, NULL AS PARTITION_POSITION, NULL AS SUBPARTITION_POSITION, 'INDEX' AS OBJECT_TYPE FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1)) UNION ALL SELECT T.TENANT_ID, T.DATABASE_ID, T.TABLE_ID, T.DATA_TABLE_ID, P.PART_ID, SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) AS INDEX_NAME, P.PART_NAME, NULL, P.PART_IDX + 1, NULL, 'PARTITION' FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T JOIN SYS.ALL_VIRTUAL_PART_REAL_AGENT P ON T.TENANT_ID = P.TENANT_ID AND T.TABLE_ID = P.TABLE_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) UNION ALL SELECT T.TENANT_ID, T.DATABASE_ID, T.TABLE_ID, T.DATA_TABLE_ID, SP.SUB_PART_ID AS PARTITION_ID, SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) AS INDEX_NAME, P.PART_NAME, SP.SUB_PART_NAME, P.PART_IDX + 1, SP.SUB_PART_IDX + 1, 'SUBPARTITION' FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T JOIN SYS.ALL_VIRTUAL_PART_REAL_AGENT P ON T.TENANT_ID = P.TENANT_ID AND T.TABLE_ID = P.TABLE_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) JOIN SYS.ALL_VIRTUAL_SUB_PART_REAL_AGENT SP ON T.TENANT_ID = SP.TENANT_ID AND T.TABLE_ID = SP.TABLE_ID AND P.PART_ID = SP.PART_ID WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) ) V JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T ON T.TABLE_ID = V.DATA_TABLE_ID AND T.TENANT_ID = V.TENANT_ID AND T.DATABASE_ID = V.DATABASE_ID AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT DB ON DB.TENANT_ID = V.TENANT_ID AND DB.DATABASE_ID = V.DATABASE_ID AND (V.DATABASE_ID = USERENV('SCHEMAID') OR USER_CAN_ACCESS_OBJ(1, V.TABLE_ID, V.DATABASE_ID) = 1) AND V.TENANT_ID = EFFECTIVE_TENANT_ID() AND DB.TENANT_ID = EFFECTIVE_TENANT_ID() LEFT JOIN SYS.ALL_VIRTUAL_TABLE_STAT_REAL_AGENT STAT ON V.TENANT_ID = STAT.TENANT_ID AND V.TABLE_ID = STAT.TABLE_ID AND V.PARTITION_ID = STAT.PARTITION_ID AND STAT.INDEX_TYPE = 1; )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -1260,7 +1260,7 @@ int ObInnerTableSchema::dba_ind_statistics_ora_schema(ObTableSchema &table_schem table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__(SELECT CAST(DB.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(V.INDEX_NAME AS VARCHAR2(128)) AS INDEX_NAME, CAST(DB.DATABASE_NAME AS VARCHAR2(128)) AS TABLE_OWNER, CAST(T.TABLE_NAME AS VARCHAR2(128)) AS TABLE_NAME, CAST(V.PARTITION_NAME AS VARCHAR2(128)) AS PARTITION_NAME, CAST(V.PARTITION_POSITION AS NUMBER) AS PARTITION_POSITION, CAST(V.SUBPARTITION_NAME AS VARCHAR2(128)) AS SUBPARTITION_NAME, CAST(V.SUBPARTITION_POSITION AS NUMBER) AS SUBPARTITION_POSITION, CAST(V.OBJECT_TYPE AS VARCHAR2(12)) AS OBJECT_TYPE, CAST(NULL AS NUMBER) AS BLEVEL, CAST(NULL AS NUMBER) AS LEAF_BLOCKS, CAST(NULL AS NUMBER) AS DISTINCT_KEYS, CAST(NULL AS NUMBER) AS AVG_LEAF_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS AVG_DATA_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS CLUSTERING_FACTOR, CAST(STAT.ROW_CNT AS NUMBER) AS NUM_ROWS, CAST(NULL AS NUMBER) AS AVG_CACHED_BLOCKS, CAST(NULL AS NUMBER) AS AVG_CACHE_HIT_RATIO, CAST(NULL AS NUMBER) AS SAMPLE_SIZE, CAST(STAT.LAST_ANALYZED AS DATE) AS LAST_ANALYZED, CAST(decode(STAT.GLOBAL_STATS, 0, 'NO', 1, 'YES', NULL) AS VARCHAR2(3)) AS GLOBAL_STATS, CAST(decode(STAT.USER_STATS, 0, 'NO', 1, 'YES', NULL) AS VARCHAR2(3)) AS USER_STATS, CAST(decode(bitand(STAT.STATTYPE_LOCKED, 15), NULL, NULL, 0, NULL, 1, 'DATA', 2, 'CACHE', 'ALL') AS VARCHAR2(5)) AS STATTYPE_LOCKED, CAST(decode(STAT.STALE_STATS, 0, 'NO', 1, 'YES', NULL) AS VARCHAR2(3)) AS STALE_STATS, CAST(NULL AS VARCHAR2(7)) AS SCOPE FROM ( (SELECT TENANT_ID, DATABASE_ID, TABLE_ID, DATA_TABLE_ID, TABLE_ID AS PARTITION_ID, SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) AS INDEX_NAME, NULL AS PARTITION_NAME, NULL AS SUBPARTITION_NAME, NULL AS PARTITION_POSITION, NULL AS SUBPARTITION_POSITION, 'INDEX' AS OBJECT_TYPE FROM SYS.ALL_VIRTUAL_CORE_ALL_TABLE T WHERE T.TABLE_TYPE = 5 UNION ALL SELECT TENANT_ID, DATABASE_ID, TABLE_ID, DATA_TABLE_ID, CASE WHEN PART_LEVEL = 0 THEN TABLE_ID ELSE -1 END AS PARTITION_ID, SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) AS INDEX_NAME, NULL AS PARTITION_NAME, NULL AS SUBPARTITION_NAME, NULL AS PARTITION_POSITION, NULL AS SUBPARTITION_POSITION, 'INDEX' AS OBJECT_TYPE FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T WHERE T.TABLE_TYPE = 5 AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1)) UNION ALL SELECT T.TENANT_ID, T.DATABASE_ID, T.TABLE_ID, T.DATA_TABLE_ID, P.PART_ID, SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) AS INDEX_NAME, P.PART_NAME, NULL, P.PART_IDX + 1, NULL, 'PARTITION' FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T JOIN SYS.ALL_VIRTUAL_PART_REAL_AGENT P ON T.TENANT_ID = P.TENANT_ID AND T.TABLE_ID = P.TABLE_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) WHERE T.TABLE_TYPE = 5 UNION ALL SELECT T.TENANT_ID, T.DATABASE_ID, T.TABLE_ID, T.DATA_TABLE_ID, SP.SUB_PART_ID AS PARTITION_ID, SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) AS INDEX_NAME, P.PART_NAME, SP.SUB_PART_NAME, P.PART_IDX + 1, SP.SUB_PART_IDX + 1, 'SUBPARTITION' FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T JOIN SYS.ALL_VIRTUAL_PART_REAL_AGENT P ON T.TENANT_ID = P.TENANT_ID AND T.TABLE_ID = P.TABLE_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) JOIN SYS.ALL_VIRTUAL_SUB_PART_REAL_AGENT SP ON T.TENANT_ID = SP.TENANT_ID AND T.TABLE_ID = SP.TABLE_ID AND P.PART_ID = SP.PART_ID WHERE T.TABLE_TYPE = 5 ) V JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T ON T.TABLE_ID = V.DATA_TABLE_ID AND T.TENANT_ID = V.TENANT_ID AND T.DATABASE_ID = V.DATABASE_ID AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT DB ON DB.TENANT_ID = V.TENANT_ID AND DB.DATABASE_ID = V.DATABASE_ID LEFT JOIN SYS.ALL_VIRTUAL_TABLE_STAT_REAL_AGENT STAT ON V.TENANT_ID = STAT.TENANT_ID AND V.TABLE_ID = STAT.TABLE_ID AND V.PARTITION_ID = STAT.PARTITION_ID AND STAT.INDEX_TYPE = 1; )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__(SELECT CAST(DB.DATABASE_NAME AS VARCHAR2(128)) AS OWNER, CAST(V.INDEX_NAME AS VARCHAR2(128)) AS INDEX_NAME, CAST(DB.DATABASE_NAME AS VARCHAR2(128)) AS TABLE_OWNER, CAST(T.TABLE_NAME AS VARCHAR2(128)) AS TABLE_NAME, CAST(V.PARTITION_NAME AS VARCHAR2(128)) AS PARTITION_NAME, CAST(V.PARTITION_POSITION AS NUMBER) AS PARTITION_POSITION, CAST(V.SUBPARTITION_NAME AS VARCHAR2(128)) AS SUBPARTITION_NAME, CAST(V.SUBPARTITION_POSITION AS NUMBER) AS SUBPARTITION_POSITION, CAST(V.OBJECT_TYPE AS VARCHAR2(12)) AS OBJECT_TYPE, CAST(NULL AS NUMBER) AS BLEVEL, CAST(NULL AS NUMBER) AS LEAF_BLOCKS, CAST(NULL AS NUMBER) AS DISTINCT_KEYS, CAST(NULL AS NUMBER) AS AVG_LEAF_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS AVG_DATA_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS CLUSTERING_FACTOR, CAST(STAT.ROW_CNT AS NUMBER) AS NUM_ROWS, CAST(NULL AS NUMBER) AS AVG_CACHED_BLOCKS, CAST(NULL AS NUMBER) AS AVG_CACHE_HIT_RATIO, CAST(NULL AS NUMBER) AS SAMPLE_SIZE, CAST(STAT.LAST_ANALYZED AS DATE) AS LAST_ANALYZED, CAST(decode(STAT.GLOBAL_STATS, 0, 'NO', 1, 'YES', NULL) AS VARCHAR2(3)) AS GLOBAL_STATS, CAST(decode(STAT.USER_STATS, 0, 'NO', 1, 'YES', NULL) AS VARCHAR2(3)) AS USER_STATS, CAST(decode(bitand(STAT.STATTYPE_LOCKED, 15), NULL, NULL, 0, NULL, 1, 'DATA', 2, 'CACHE', 'ALL') AS VARCHAR2(5)) AS STATTYPE_LOCKED, CAST(decode(STAT.STALE_STATS, 0, 'NO', 1, 'YES', NULL) AS VARCHAR2(3)) AS STALE_STATS, CAST(NULL AS VARCHAR2(7)) AS SCOPE FROM ( (SELECT TENANT_ID, DATABASE_ID, TABLE_ID, DATA_TABLE_ID, TABLE_ID AS PARTITION_ID, SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) AS INDEX_NAME, NULL AS PARTITION_NAME, NULL AS SUBPARTITION_NAME, NULL AS PARTITION_POSITION, NULL AS SUBPARTITION_POSITION, 'INDEX' AS OBJECT_TYPE FROM SYS.ALL_VIRTUAL_CORE_ALL_TABLE T WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) UNION ALL SELECT TENANT_ID, DATABASE_ID, TABLE_ID, DATA_TABLE_ID, CASE WHEN PART_LEVEL = 0 THEN TABLE_ID ELSE -1 END AS PARTITION_ID, SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) AS INDEX_NAME, NULL AS PARTITION_NAME, NULL AS SUBPARTITION_NAME, NULL AS PARTITION_POSITION, NULL AS SUBPARTITION_POSITION, 'INDEX' AS OBJECT_TYPE FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1)) UNION ALL SELECT T.TENANT_ID, T.DATABASE_ID, T.TABLE_ID, T.DATA_TABLE_ID, P.PART_ID, SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) AS INDEX_NAME, P.PART_NAME, NULL, P.PART_IDX + 1, NULL, 'PARTITION' FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T JOIN SYS.ALL_VIRTUAL_PART_REAL_AGENT P ON T.TENANT_ID = P.TENANT_ID AND T.TABLE_ID = P.TABLE_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) UNION ALL SELECT T.TENANT_ID, T.DATABASE_ID, T.TABLE_ID, T.DATA_TABLE_ID, SP.SUB_PART_ID AS PARTITION_ID, SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) AS INDEX_NAME, P.PART_NAME, SP.SUB_PART_NAME, P.PART_IDX + 1, SP.SUB_PART_IDX + 1, 'SUBPARTITION' FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T JOIN SYS.ALL_VIRTUAL_PART_REAL_AGENT P ON T.TENANT_ID = P.TENANT_ID AND T.TABLE_ID = P.TABLE_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) JOIN SYS.ALL_VIRTUAL_SUB_PART_REAL_AGENT SP ON T.TENANT_ID = SP.TENANT_ID AND T.TABLE_ID = SP.TABLE_ID AND P.PART_ID = SP.PART_ID WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) ) V JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T ON T.TABLE_ID = V.DATA_TABLE_ID AND T.TENANT_ID = V.TENANT_ID AND T.DATABASE_ID = V.DATABASE_ID AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT DB ON DB.TENANT_ID = V.TENANT_ID AND DB.DATABASE_ID = V.DATABASE_ID LEFT JOIN SYS.ALL_VIRTUAL_TABLE_STAT_REAL_AGENT STAT ON V.TENANT_ID = STAT.TENANT_ID AND V.TABLE_ID = STAT.TABLE_ID AND V.PARTITION_ID = STAT.PARTITION_ID AND STAT.INDEX_TYPE = 1; )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -1310,7 +1310,7 @@ int ObInnerTableSchema::user_ind_statistics_ora_schema(ObTableSchema &table_sche table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__(SELECT CAST(V.INDEX_NAME AS VARCHAR2(128)) AS INDEX_NAME, CAST(DB.DATABASE_NAME AS VARCHAR2(128)) AS TABLE_OWNER, CAST(T.TABLE_NAME AS VARCHAR2(128)) AS TABLE_NAME, CAST(V.PARTITION_NAME AS VARCHAR2(128)) AS PARTITION_NAME, CAST(V.PARTITION_POSITION AS NUMBER) AS PARTITION_POSITION, CAST(V.SUBPARTITION_NAME AS VARCHAR2(128)) AS SUBPARTITION_NAME, CAST(V.SUBPARTITION_POSITION AS NUMBER) AS SUBPARTITION_POSITION, CAST(V.OBJECT_TYPE AS VARCHAR2(12)) AS OBJECT_TYPE, CAST(NULL AS NUMBER) AS BLEVEL, CAST(NULL AS NUMBER) AS LEAF_BLOCKS, CAST(NULL AS NUMBER) AS DISTINCT_KEYS, CAST(NULL AS NUMBER) AS AVG_LEAF_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS AVG_DATA_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS CLUSTERING_FACTOR, CAST(STAT.ROW_CNT AS NUMBER) AS NUM_ROWS, CAST(NULL AS NUMBER) AS AVG_CACHED_BLOCKS, CAST(NULL AS NUMBER) AS AVG_CACHE_HIT_RATIO, CAST(NULL AS NUMBER) AS SAMPLE_SIZE, CAST(STAT.LAST_ANALYZED AS DATE) AS LAST_ANALYZED, CAST(decode(STAT.GLOBAL_STATS, 0, 'NO', 1, 'YES', NULL) AS VARCHAR2(3)) AS GLOBAL_STATS, CAST(decode(STAT.USER_STATS, 0, 'NO', 1, 'YES', NULL) AS VARCHAR2(3)) AS USER_STATS, CAST(decode(bitand(STAT.STATTYPE_LOCKED, 15), NULL, NULL, 0, NULL, 1, 'DATA', 2, 'CACHE', 'ALL') AS VARCHAR2(5)) AS STATTYPE_LOCKED, CAST(decode(STAT.STALE_STATS, 0, 'NO', 1, 'YES', NULL) AS VARCHAR2(3)) AS STALE_STATS, CAST(NULL AS VARCHAR2(7)) AS SCOPE FROM ( SELECT TENANT_ID, DATABASE_ID, TABLE_ID, DATA_TABLE_ID, CASE WHEN PART_LEVEL = 0 THEN TABLE_ID ELSE -1 END AS PARTITION_ID, SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) AS INDEX_NAME, NULL AS PARTITION_NAME, NULL AS SUBPARTITION_NAME, NULL AS PARTITION_POSITION, NULL AS SUBPARTITION_POSITION, 'INDEX' AS OBJECT_TYPE FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T WHERE T.TABLE_TYPE = 5 AND T.DATABASE_ID = USERENV('SCHEMAID') AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) UNION ALL SELECT T.TENANT_ID, T.DATABASE_ID, T.TABLE_ID, T.DATA_TABLE_ID, P.PART_ID, SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) AS INDEX_NAME, P.PART_NAME, NULL, P.PART_IDX + 1, NULL, 'PARTITION' FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T JOIN SYS.ALL_VIRTUAL_PART_REAL_AGENT P ON T.TENANT_ID = P.TENANT_ID AND T.TABLE_ID = P.TABLE_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) WHERE T.TABLE_TYPE = 5 AND T.DATABASE_ID = USERENV('SCHEMAID') UNION ALL SELECT T.TENANT_ID, T.DATABASE_ID, T.TABLE_ID, T.DATA_TABLE_ID, SP.SUB_PART_ID AS PARTITION_ID, SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) AS INDEX_NAME, P.PART_NAME, SP.SUB_PART_NAME, P.PART_IDX + 1, SP.SUB_PART_IDX + 1, 'SUBPARTITION' FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T JOIN SYS.ALL_VIRTUAL_PART_REAL_AGENT P ON T.TENANT_ID = P.TENANT_ID AND T.TABLE_ID = P.TABLE_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) JOIN SYS.ALL_VIRTUAL_SUB_PART_REAL_AGENT SP ON T.TENANT_ID = SP.TENANT_ID AND T.TABLE_ID = SP.TABLE_ID AND P.PART_ID = SP.PART_ID WHERE T.TABLE_TYPE = 5 AND T.DATABASE_ID = USERENV('SCHEMAID') ) V JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T ON T.TABLE_ID = V.DATA_TABLE_ID AND T.TENANT_ID = V.TENANT_ID AND T.DATABASE_ID = V.DATABASE_ID AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT DB ON DB.TENANT_ID = V.TENANT_ID AND DB.DATABASE_ID = V.DATABASE_ID LEFT JOIN SYS.ALL_VIRTUAL_TABLE_STAT_REAL_AGENT STAT ON V.TENANT_ID = STAT.TENANT_ID AND V.TABLE_ID = STAT.TABLE_ID AND V.PARTITION_ID = STAT.PARTITION_ID AND STAT.INDEX_TYPE = 1; WHERE T.TABLE_TYPE != 12 AND T.TABLE_TYPE != 13 )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__(SELECT CAST(V.INDEX_NAME AS VARCHAR2(128)) AS INDEX_NAME, CAST(DB.DATABASE_NAME AS VARCHAR2(128)) AS TABLE_OWNER, CAST(T.TABLE_NAME AS VARCHAR2(128)) AS TABLE_NAME, CAST(V.PARTITION_NAME AS VARCHAR2(128)) AS PARTITION_NAME, CAST(V.PARTITION_POSITION AS NUMBER) AS PARTITION_POSITION, CAST(V.SUBPARTITION_NAME AS VARCHAR2(128)) AS SUBPARTITION_NAME, CAST(V.SUBPARTITION_POSITION AS NUMBER) AS SUBPARTITION_POSITION, CAST(V.OBJECT_TYPE AS VARCHAR2(12)) AS OBJECT_TYPE, CAST(NULL AS NUMBER) AS BLEVEL, CAST(NULL AS NUMBER) AS LEAF_BLOCKS, CAST(NULL AS NUMBER) AS DISTINCT_KEYS, CAST(NULL AS NUMBER) AS AVG_LEAF_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS AVG_DATA_BLOCKS_PER_KEY, CAST(NULL AS NUMBER) AS CLUSTERING_FACTOR, CAST(STAT.ROW_CNT AS NUMBER) AS NUM_ROWS, CAST(NULL AS NUMBER) AS AVG_CACHED_BLOCKS, CAST(NULL AS NUMBER) AS AVG_CACHE_HIT_RATIO, CAST(NULL AS NUMBER) AS SAMPLE_SIZE, CAST(STAT.LAST_ANALYZED AS DATE) AS LAST_ANALYZED, CAST(decode(STAT.GLOBAL_STATS, 0, 'NO', 1, 'YES', NULL) AS VARCHAR2(3)) AS GLOBAL_STATS, CAST(decode(STAT.USER_STATS, 0, 'NO', 1, 'YES', NULL) AS VARCHAR2(3)) AS USER_STATS, CAST(decode(bitand(STAT.STATTYPE_LOCKED, 15), NULL, NULL, 0, NULL, 1, 'DATA', 2, 'CACHE', 'ALL') AS VARCHAR2(5)) AS STATTYPE_LOCKED, CAST(decode(STAT.STALE_STATS, 0, 'NO', 1, 'YES', NULL) AS VARCHAR2(3)) AS STALE_STATS, CAST(NULL AS VARCHAR2(7)) AS SCOPE FROM ( SELECT TENANT_ID, DATABASE_ID, TABLE_ID, DATA_TABLE_ID, CASE WHEN PART_LEVEL = 0 THEN TABLE_ID ELSE -1 END AS PARTITION_ID, SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) AS INDEX_NAME, NULL AS PARTITION_NAME, NULL AS SUBPARTITION_NAME, NULL AS PARTITION_POSITION, NULL AS SUBPARTITION_POSITION, 'INDEX' AS OBJECT_TYPE FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND T.DATABASE_ID = USERENV('SCHEMAID') AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) UNION ALL SELECT T.TENANT_ID, T.DATABASE_ID, T.TABLE_ID, T.DATA_TABLE_ID, P.PART_ID, SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) AS INDEX_NAME, P.PART_NAME, NULL, P.PART_IDX + 1, NULL, 'PARTITION' FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T JOIN SYS.ALL_VIRTUAL_PART_REAL_AGENT P ON T.TENANT_ID = P.TENANT_ID AND T.TABLE_ID = P.TABLE_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND T.DATABASE_ID = USERENV('SCHEMAID') UNION ALL SELECT T.TENANT_ID, T.DATABASE_ID, T.TABLE_ID, T.DATA_TABLE_ID, SP.SUB_PART_ID AS PARTITION_ID, SUBSTR(T.TABLE_NAME, 7 + INSTR(SUBSTR(T.TABLE_NAME, 7), '_')) AS INDEX_NAME, P.PART_NAME, SP.SUB_PART_NAME, P.PART_IDX + 1, SP.SUB_PART_IDX + 1, 'SUBPARTITION' FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T JOIN SYS.ALL_VIRTUAL_PART_REAL_AGENT P ON T.TENANT_ID = P.TENANT_ID AND T.TABLE_ID = P.TABLE_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) JOIN SYS.ALL_VIRTUAL_SUB_PART_REAL_AGENT SP ON T.TENANT_ID = SP.TENANT_ID AND T.TABLE_ID = SP.TABLE_ID AND P.PART_ID = SP.PART_ID WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND T.DATABASE_ID = USERENV('SCHEMAID') ) V JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T ON T.TABLE_ID = V.DATA_TABLE_ID AND T.TENANT_ID = V.TENANT_ID AND T.DATABASE_ID = V.DATABASE_ID AND T.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT DB ON DB.TENANT_ID = V.TENANT_ID AND DB.DATABASE_ID = V.DATABASE_ID LEFT JOIN SYS.ALL_VIRTUAL_TABLE_STAT_REAL_AGENT STAT ON V.TENANT_ID = STAT.TENANT_ID AND V.TABLE_ID = STAT.TABLE_ID AND V.PARTITION_ID = STAT.PARTITION_ID AND STAT.INDEX_TYPE = 1; WHERE T.TABLE_TYPE != 12 AND T.TABLE_TYPE != 13 )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } diff --git a/src/share/inner_table/ob_inner_table_schema.h b/src/share/inner_table/ob_inner_table_schema.h index b4a163d14c..79be1a7a15 100644 --- a/src/share/inner_table/ob_inner_table_schema.h +++ b/src/share/inner_table/ob_inner_table_schema.h @@ -12772,12 +12772,12 @@ static inline int get_sys_table_lob_aux_schema(const uint64_t tid, } const int64_t OB_CORE_TABLE_COUNT = 4; -const int64_t OB_SYS_TABLE_COUNT = 290; +const int64_t OB_SYS_TABLE_COUNT = 291; const int64_t OB_VIRTUAL_TABLE_COUNT = 806; const int64_t OB_SYS_VIEW_COUNT = 876; -const int64_t OB_SYS_TENANT_TABLE_COUNT = 1977; +const int64_t OB_SYS_TENANT_TABLE_COUNT = 1978; const int64_t OB_CORE_SCHEMA_VERSION = 1; -const int64_t OB_BOOTSTRAP_SCHEMA_VERSION = 1980; +const int64_t OB_BOOTSTRAP_SCHEMA_VERSION = 1981; } // end namespace share } // end namespace oceanbase diff --git a/src/share/inner_table/ob_inner_table_schema.lob.cpp b/src/share/inner_table/ob_inner_table_schema.lob.cpp index f9ee72212c..406bcb1932 100644 --- a/src/share/inner_table/ob_inner_table_schema.lob.cpp +++ b/src/share/inner_table/ob_inner_table_schema.lob.cpp @@ -21,7 +21,7 @@ inner_lob_map_t inner_lob_map; bool lob_mapping_init() { int ret = OB_SUCCESS; - if (OB_FAIL(inner_lob_map.create(293, ObModIds::OB_INNER_LOB_HASH_SET))) { + if (OB_FAIL(inner_lob_map.create(294, ObModIds::OB_INNER_LOB_HASH_SET))) { SERVER_LOG(WARN, "fail to create inner lob map", K(ret)); } else { for (int64_t i = 0; OB_SUCC(ret) && i < ARRAYSIZEOF(lob_aux_table_mappings); ++i) { diff --git a/src/share/inner_table/ob_inner_table_schema_def.py b/src/share/inner_table/ob_inner_table_schema_def.py index 65fd371d2d..c5baf631e2 100644 --- a/src/share/inner_table/ob_inner_table_schema_def.py +++ b/src/share/inner_table/ob_inner_table_schema_def.py @@ -14984,8 +14984,10 @@ def_table_schema( CASE WHEN d_col.column_name IS NOT NULL THEN d_col.column_name ELSE c.column_name END AS COLUMN_NAME, CASE WHEN d_col.column_name IS NOT NULL THEN c.data_length ELSE NULL END AS SUB_PART, CASE WHEN c.nullable = 1 THEN 'YES' ELSE '' END AS NULLABLE, - CASE WHEN i.index_using_type = 0 THEN 'BTREE' ELSE (CASE WHEN - i.index_using_type = 1 THEN 'HASH' ELSE 'UNKOWN' END)END AS INDEX_TYPE, + CASE WHEN i.index_type in (15, 18, 21) THEN 'FULLTEXT' + WHEN i.index_using_type = 0 THEN 'BTREE' + WHEN i.index_using_type = 1 THEN 'HASH' + ELSE 'UNKOWN' END AS INDEX_TYPE, CASE i.index_status WHEN 2 THEN 'VALID' WHEN 3 THEN 'CHECKING' @@ -15001,6 +15003,7 @@ def_table_schema( AND i.tenant_id = t.tenant_id AND i.database_id = t.database_id AND i.table_type = 5 + AND i.index_type NOT IN (13, 14, 16, 17, 19, 20, 22) AND i.table_mode >> 12 & 15 in (0,1) AND t.table_type in (0,3) JOIN oceanbase.__all_column c @@ -15063,8 +15066,10 @@ def_table_schema( CASE WHEN d_col.column_name IS NOT NULL THEN d_col.column_name ELSE c.column_name END AS COLUMN_NAME, CASE WHEN d_col.column_name IS NOT NULL THEN c.data_length ELSE NULL END AS SUB_PART, CASE WHEN c.nullable = 1 THEN 'YES' ELSE '' END AS NULLABLE, - CASE WHEN i.index_using_type = 0 THEN 'BTREE' ELSE (CASE WHEN - i.index_using_type = 1 THEN 'HASH' ELSE 'UNKOWN' END)END AS INDEX_TYPE, + CASE WHEN i.index_type in (15, 18, 21) THEN 'FULLTEXT' + WHEN i.index_using_type = 0 THEN 'BTREE' + WHEN i.index_using_type = 1 THEN 'HASH' + ELSE 'UNKOWN' END AS INDEX_TYPE, CASE i.index_status WHEN 2 THEN 'VALID' WHEN 3 THEN 'CHECKING' @@ -15080,6 +15085,7 @@ def_table_schema( AND i.tenant_id = t.tenant_id AND i.database_id = t.database_id AND i.table_type = 5 + AND i.index_type NOT IN (13, 14, 16, 17, 19, 20, 22) AND t.table_type in (0,3) AND t.tenant_id = EFFECTIVE_TENANT_ID() JOIN oceanbase.__ALL_VIRTUAL_CORE_COLUMN_TABLE c @@ -20877,6 +20883,7 @@ def_table_schema( ELSE (CONS_TAB.CONSTRAINT_NAME) END AS INDEX_NAME, CASE + WHEN A.TABLE_TYPE = 5 AND A.INDEX_TYPE IN (15, 18, 21) THEN 'DOMAIN' WHEN A.TABLE_TYPE = 5 AND EXISTS ( SELECT 1 FROM OCEANBASE.__ALL_VIRTUAL_COLUMN T_COL_INDEX, @@ -20943,7 +20950,7 @@ def_table_schema( AND CONS_TAB.TENANT_ID = A.TENANT_ID AND CONS_TAB.CONSTRAINT_TYPE = 1 WHERE - (A.TABLE_TYPE = 3 AND A.TABLE_MODE & 66048 = 0) OR (A.TABLE_TYPE = 5) + (A.TABLE_TYPE = 3 AND A.TABLE_MODE & 66048 = 0) OR (A.TABLE_TYPE = 5 AND A.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22)) ) C JOIN OCEANBASE.__ALL_VIRTUAL_TABLE D ON C.TABLE_ID = D.TABLE_ID @@ -21472,6 +21479,7 @@ def_table_schema( AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 + AND T.INDEX_TYPE NOT IN (17,19,20,22) AND (C.PARTITION_KEY_POSITION & 255) > 0 UNION SELECT CAST(D.TENANT_ID AS SIGNED) AS CON_ID, @@ -21488,7 +21496,7 @@ def_table_schema( AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 - AND T.INDEX_TYPE IN (1,2,10) + AND T.INDEX_TYPE IN (1,2,10,15,23,24) AND (C.PARTITION_KEY_POSITION & 255) > 0 """.replace("\n", " ") ) @@ -21533,6 +21541,7 @@ def_table_schema( AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 + AND T.INDEX_TYPE NOT IN (17,19,20,22) AND (C.PARTITION_KEY_POSITION & 65280) > 0 UNION SELECT CAST(D.TENANT_ID AS SIGNED) AS CON_ID, @@ -21549,7 +21558,7 @@ def_table_schema( AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 - AND T.INDEX_TYPE IN (1,2,10) + AND T.INDEX_TYPE IN (1,2,10,15,23,24) AND (C.PARTITION_KEY_POSITION & 65280) > 0 """.replace("\n", " ") ) @@ -21649,18 +21658,24 @@ FROM WHEN 1 THEN 1 WHEN 2 THEN 1 WHEN 10 THEN 1 + WHEN 15 THEN 1 + WHEN 23 THEN 1 + WHEN 24 THEN 1 ELSE 0 END) AS IS_LOCAL, (CASE I.INDEX_TYPE WHEN 1 THEN T.TABLE_ID WHEN 2 THEN T.TABLE_ID WHEN 10 THEN T.TABLE_ID + WHEN 15 THEN T.TABLE_ID + WHEN 23 THEN T.TABLE_ID + WHEN 24 THEN T.TABLE_ID ELSE I.TABLE_ID END) AS JOIN_TABLE_ID FROM OCEANBASE.__ALL_VIRTUAL_TABLE I JOIN OCEANBASE.__ALL_VIRTUAL_TABLE T ON I.TENANT_ID = T.TENANT_ID AND I.DATA_TABLE_ID = T.TABLE_ID JOIN OCEANBASE.__ALL_VIRTUAL_DATABASE D ON T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID - WHERE I.TABLE_TYPE = 5 AND I.PART_LEVEL != 0 + WHERE I.TABLE_TYPE = 5 AND I.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND I.PART_LEVEL != 0 AND T.TABLE_MODE >> 12 & 15 in (0,1) ) I_T @@ -21674,7 +21689,7 @@ LEFT JOIN 1 AS IS_PREFIXED FROM OCEANBASE.__ALL_VIRTUAL_TABLE I WHERE I.TABLE_TYPE = 5 - AND I.INDEX_TYPE IN (1, 2, 10) + AND I.INDEX_TYPE IN (1, 2, 10, 15, 23, 24) AND I.PART_LEVEL != 0 AND NOT EXISTS (SELECT /*+NO_USE_NL(PART_COLUMNS INDEX_COLUMNS)*/ * @@ -22453,6 +22468,7 @@ def_table_schema( AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 + AND T.INDEX_TYPE NOT IN (17,19,20,22) AND (C.PARTITION_KEY_POSITION & 255) > 0 AND C.TENANT_ID = 0 UNION @@ -22469,7 +22485,7 @@ def_table_schema( AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 - AND T.INDEX_TYPE IN (1,2,10) + AND T.INDEX_TYPE IN (1,2,10,15,23,24) AND (C.PARTITION_KEY_POSITION & 255) > 0 AND C.TENANT_ID = 0 """.replace("\n", " ") @@ -22515,6 +22531,7 @@ def_table_schema( AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 + AND T.INDEX_TYPE NOT IN (17,19,20,22) AND (C.PARTITION_KEY_POSITION & 65280) > 0 AND C.TENANT_ID = 0 UNION @@ -22531,7 +22548,7 @@ def_table_schema( AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 - AND T.INDEX_TYPE IN (1,2,10) + AND T.INDEX_TYPE IN (1,2,10,15,23,24) AND (C.PARTITION_KEY_POSITION & 65280) > 0 AND C.TENANT_ID = 0 """.replace("\n", " ") @@ -22922,18 +22939,24 @@ FROM WHEN 1 THEN 1 WHEN 2 THEN 1 WHEN 10 THEN 1 + WHEN 15 THEN 1 + WHEN 23 THEN 1 + WHEN 24 THEN 1 ELSE 0 END) AS IS_LOCAL, (CASE I.INDEX_TYPE WHEN 1 THEN T.TABLE_ID WHEN 2 THEN T.TABLE_ID WHEN 10 THEN T.TABLE_ID + WHEN 15 THEN T.TABLE_ID + WHEN 23 THEN T.TABLE_ID + WHEN 24 THEN T.TABLE_ID ELSE I.TABLE_ID END) AS JOIN_TABLE_ID FROM OCEANBASE.__ALL_TABLE I JOIN OCEANBASE.__ALL_TABLE T ON I.TENANT_ID = T.TENANT_ID AND I.DATA_TABLE_ID = T.TABLE_ID JOIN OCEANBASE.__ALL_DATABASE D ON T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID - WHERE I.TABLE_TYPE = 5 AND I.PART_LEVEL != 0 + WHERE I.TABLE_TYPE = 5 AND I.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND I.PART_LEVEL != 0 AND I.TABLE_MODE >> 12 & 15 in (0,1) ) I_T @@ -22954,7 +22977,7 @@ LEFT JOIN 1 AS IS_PREFIXED FROM OCEANBASE.__ALL_TABLE I WHERE I.TABLE_TYPE = 5 - AND I.INDEX_TYPE IN (1, 2, 10) + AND I.INDEX_TYPE IN (1, 2, 10, 15, 23, 24) AND I.PART_LEVEL != 0 AND I.TENANT_ID = 0 AND NOT EXISTS @@ -24948,7 +24971,7 @@ def_table_schema( 'INDEX' AS OBJECT_TYPE FROM OCEANBASE.__ALL_VIRTUAL_CORE_ALL_TABLE T - WHERE T.TABLE_TYPE = 5 AND T.TENANT_ID = EFFECTIVE_TENANT_ID() + WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND T.TENANT_ID = EFFECTIVE_TENANT_ID() UNION ALL SELECT TENANT_ID, DATABASE_ID, @@ -24963,7 +24986,7 @@ def_table_schema( 'INDEX' AS OBJECT_TYPE FROM oceanbase.__all_table T - WHERE T.TABLE_TYPE = 5 + WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND T.TABLE_MODE >> 12 & 15 in (0,1)) UNION ALL SELECT T.TENANT_ID, @@ -24983,7 +25006,7 @@ def_table_schema( oceanbase.__all_part P ON T.TENANT_ID = P.TENANT_ID AND T.TABLE_ID = P.TABLE_ID - WHERE T.TABLE_TYPE = 5 + WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) UNION ALL SELECT T.TENANT_ID, T.DATABASE_ID, @@ -25007,7 +25030,7 @@ def_table_schema( ON T.TENANT_ID = SP.TENANT_ID AND T.TABLE_ID = SP.TABLE_ID AND P.PART_ID = SP.PART_ID - WHERE T.TABLE_TYPE = 5 + WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) ) V JOIN oceanbase.__all_table T ON T.TABLE_ID = V.DATA_TABLE_ID @@ -38623,6 +38646,7 @@ def_table_schema( ELSE (CONS_TAB.CONSTRAINT_NAME) END AS INDEX_NAME, CASE + WHEN A.TABLE_TYPE = 5 AND A.INDEX_TYPE IN (15, 18, 21) THEN 'DOMAIN' WHEN A.TABLE_TYPE = 5 AND EXISTS ( SELECT 1 FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT T_COL_INDEX, @@ -38681,6 +38705,7 @@ def_table_schema( JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT B ON A.DATABASE_ID = B.DATABASE_ID AND TABLE_TYPE IN (5, 3, 15) + AND A.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND A.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((A.TABLE_MODE / 4096), 15) IN (0,1) AND B.TENANT_ID = EFFECTIVE_TENANT_ID() @@ -38789,6 +38814,7 @@ def_table_schema( ELSE (CONS_TAB.CONSTRAINT_NAME) END AS INDEX_NAME, CASE + WHEN A.TABLE_TYPE = 5 AND A.INDEX_TYPE IN (15, 18, 21) THEN 'DOMAIN' WHEN A.TABLE_TYPE = 5 AND EXISTS ( SELECT 1 FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT T_COL_INDEX, @@ -38847,6 +38873,7 @@ def_table_schema( JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT B ON A.DATABASE_ID = B.DATABASE_ID AND TABLE_TYPE IN (5, 3, 15) + AND A.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND A.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((A.TABLE_MODE / 4096), 15) IN (0,1) AND B.TENANT_ID = EFFECTIVE_TENANT_ID() @@ -38958,6 +38985,7 @@ def_table_schema( ELSE (CONS_TAB.CONSTRAINT_NAME) END AS INDEX_NAME, CASE + WHEN A.TABLE_TYPE = 5 AND A.INDEX_TYPE IN (15, 18, 21) THEN 'DOMAIN' WHEN A.TABLE_TYPE = 5 AND EXISTS ( SELECT 1 FROM SYS.ALL_VIRTUAL_COLUMN_REAL_AGENT T_COL_INDEX, @@ -39016,6 +39044,7 @@ def_table_schema( JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT B ON A.DATABASE_ID = B.DATABASE_ID AND TABLE_TYPE IN (5, 3, 15) + AND A.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND A.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((A.TABLE_MODE / 4096), 15) IN (0,1) AND B.TENANT_ID = EFFECTIVE_TENANT_ID() @@ -41553,6 +41582,7 @@ def_table_schema( AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 + AND T.INDEX_TYPE NOT IN (17,19,20,22) AND BITAND(C.PARTITION_KEY_POSITION, 255) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() @@ -41571,7 +41601,7 @@ def_table_schema( AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 - AND T.INDEX_TYPE IN (1,2,10) + AND T.INDEX_TYPE IN (1,2,10,15,23,24) AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND BITAND(C.PARTITION_KEY_POSITION, 255) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() @@ -41625,6 +41655,7 @@ def_table_schema( AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 + AND T.INDEX_TYPE NOT IN (17,19,20,22) AND BITAND(C.PARTITION_KEY_POSITION, 255) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() @@ -41646,7 +41677,7 @@ def_table_schema( AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 - AND T.INDEX_TYPE IN (1,2,10) + AND T.INDEX_TYPE IN (1,2,10,15,23,24) AND BITAND(C.PARTITION_KEY_POSITION, 255) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() @@ -41697,6 +41728,7 @@ def_table_schema( AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 + AND T.INDEX_TYPE NOT IN (17,19,20,22) AND BITAND(C.PARTITION_KEY_POSITION, 255) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() @@ -41716,7 +41748,7 @@ def_table_schema( AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 - AND T.INDEX_TYPE IN (1,2,10) + AND T.INDEX_TYPE IN (1,2,10,15,23,24) AND BITAND(C.PARTITION_KEY_POSITION, 255) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() @@ -41772,6 +41804,7 @@ def_table_schema( AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 + AND T.INDEX_TYPE NOT IN (17,19,20,22) AND BITAND(C.PARTITION_KEY_POSITION, 65280) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() @@ -41791,7 +41824,7 @@ def_table_schema( AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 - AND T.INDEX_TYPE IN (1,2,10) + AND T.INDEX_TYPE IN (1,2,10,15,23,24) AND BITAND(C.PARTITION_KEY_POSITION, 65280) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() AND T.TENANT_ID = EFFECTIVE_TENANT_ID() @@ -41844,6 +41877,7 @@ def_table_schema( AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 + AND T.INDEX_TYPE NOT IN (17,19,20,22) AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND BITAND(C.PARTITION_KEY_POSITION, 65280) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() @@ -41865,7 +41899,7 @@ def_table_schema( AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 - AND T.INDEX_TYPE IN (1,2,10) + AND T.INDEX_TYPE IN (1,2,10,15,23,24) AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND BITAND(C.PARTITION_KEY_POSITION, 65280) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() @@ -41917,6 +41951,7 @@ def_table_schema( AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.TABLE_ID AND T.TABLE_TYPE = 5 + AND T.INDEX_TYPE NOT IN (17,19,20,22) AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND BITAND(C.PARTITION_KEY_POSITION, 65280) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() @@ -41936,7 +41971,7 @@ def_table_schema( AND T.DATABASE_ID = D.DATABASE_ID AND C.TABLE_ID = T.DATA_TABLE_ID AND T.TABLE_TYPE = 5 - AND T.INDEX_TYPE IN (1,2,10) + AND T.INDEX_TYPE IN (1,2,10,15,23,24) AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) AND BITAND(C.PARTITION_KEY_POSITION, 65280) > 0 AND C.TENANT_ID = EFFECTIVE_TENANT_ID() @@ -43411,11 +43446,17 @@ FROM WHEN 1 THEN 1 WHEN 2 THEN 1 WHEN 10 THEN 1 + WHEN 15 THEN 1 + WHEN 23 THEN 1 + WHEN 24 THEN 1 ELSE 0 END) AS IS_LOCAL, (CASE I.INDEX_TYPE WHEN 1 THEN T.TABLE_ID WHEN 2 THEN T.TABLE_ID WHEN 10 THEN T.TABLE_ID + WHEN 15 THEN T.TABLE_ID + WHEN 23 THEN T.TABLE_ID + WHEN 24 THEN T.TABLE_ID ELSE I.TABLE_ID END) AS JOIN_TABLE_ID FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT I JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T @@ -43423,7 +43464,7 @@ FROM AND bitand((I.TABLE_MODE / 4096), 15) IN (0,1) JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D ON T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID - WHERE I.TABLE_TYPE = 5 AND I.PART_LEVEL != 0 AND T.TABLE_TYPE != 12 AND T.TABLE_TYPE != 13 + WHERE I.TABLE_TYPE = 5 AND I.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND I.PART_LEVEL != 0 AND T.TABLE_TYPE != 12 AND T.TABLE_TYPE != 13 ) I_T JOIN @@ -43443,7 +43484,7 @@ LEFT JOIN 1 AS IS_PREFIXED FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT I WHERE I.TABLE_TYPE = 5 - AND I.INDEX_TYPE IN (1, 2, 10) + AND I.INDEX_TYPE IN (1, 2, 10, 15, 23, 24) AND I.PART_LEVEL != 0 AND I.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((I.TABLE_MODE / 4096), 15) IN (0,1) @@ -43582,18 +43623,24 @@ FROM WHEN 1 THEN 1 WHEN 2 THEN 1 WHEN 10 THEN 1 + WHEN 15 THEN 1 + WHEN 23 THEN 1 + WHEN 24 THEN 1 ELSE 0 END) AS IS_LOCAL, (CASE I.INDEX_TYPE WHEN 1 THEN T.TABLE_ID WHEN 2 THEN T.TABLE_ID WHEN 10 THEN T.TABLE_ID + WHEN 15 THEN T.TABLE_ID + WHEN 23 THEN T.TABLE_ID + WHEN 24 THEN T.TABLE_ID ELSE I.TABLE_ID END) AS JOIN_TABLE_ID FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT I JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T ON I.TENANT_ID = T.TENANT_ID AND I.DATA_TABLE_ID = T.TABLE_ID AND bitand((I.TABLE_MODE / 4096), 15) IN (0,1) JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D ON T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID - WHERE I.TABLE_TYPE = 5 AND I.PART_LEVEL != 0 + WHERE I.TABLE_TYPE = 5 AND I.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND I.PART_LEVEL != 0 AND (T.DATABASE_ID = USERENV('SCHEMAID') OR USER_CAN_ACCESS_OBJ(1, T.TABLE_ID, T.DATABASE_ID) = 1) AND T.TABLE_TYPE != 12 AND T.TABLE_TYPE != 13 @@ -43616,7 +43663,7 @@ LEFT JOIN 1 AS IS_PREFIXED FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT I WHERE I.TABLE_TYPE = 5 - AND I.INDEX_TYPE IN (1, 2, 10) + AND I.INDEX_TYPE IN (1, 2, 10, 15, 23, 24) AND I.PART_LEVEL != 0 AND I.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((I.TABLE_MODE / 4096), 15) IN (0,1) @@ -43754,18 +43801,24 @@ FROM WHEN 1 THEN 1 WHEN 2 THEN 1 WHEN 10 THEN 1 + WHEN 15 THEN 1 + WHEN 23 THEN 1 + WHEN 24 THEN 1 ELSE 0 END) AS IS_LOCAL, (CASE I.INDEX_TYPE WHEN 1 THEN T.TABLE_ID WHEN 2 THEN T.TABLE_ID WHEN 10 THEN T.TABLE_ID + WHEN 15 THEN T.TABLE_ID + WHEN 23 THEN T.TABLE_ID + WHEN 24 THEN T.TABLE_ID ELSE I.TABLE_ID END) AS JOIN_TABLE_ID FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT I JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T ON I.TENANT_ID = T.TENANT_ID AND I.DATA_TABLE_ID = T.TABLE_ID AND bitand((I.TABLE_MODE / 4096), 15) IN (0,1) JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D ON T.TENANT_ID = D.TENANT_ID AND T.DATABASE_ID = D.DATABASE_ID - WHERE I.TABLE_TYPE = 5 AND I.PART_LEVEL != 0 + WHERE I.TABLE_TYPE = 5 AND I.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND I.PART_LEVEL != 0 AND T.DATABASE_ID = USERENV('SCHEMAID') AND T.TABLE_TYPE != 12 AND T.TABLE_TYPE != 13 ) I_T @@ -43787,7 +43840,7 @@ LEFT JOIN 1 AS IS_PREFIXED FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT I WHERE I.TABLE_TYPE = 5 - AND I.INDEX_TYPE IN (1, 2, 10) + AND I.INDEX_TYPE IN (1, 2, 10, 15, 23, 24) AND I.PART_LEVEL != 0 AND I.TENANT_ID = EFFECTIVE_TENANT_ID() AND bitand((I.TABLE_MODE / 4096), 15) IN (0,1) @@ -49019,7 +49072,7 @@ def_table_schema( 'INDEX' AS OBJECT_TYPE FROM SYS.ALL_VIRTUAL_CORE_ALL_TABLE T - WHERE T.TABLE_TYPE = 5 + WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) UNION ALL SELECT TENANT_ID, DATABASE_ID, @@ -49034,7 +49087,7 @@ def_table_schema( 'INDEX' AS OBJECT_TYPE FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T - WHERE T.TABLE_TYPE = 5 + WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1)) UNION ALL SELECT T.TENANT_ID, @@ -49055,7 +49108,7 @@ def_table_schema( ON T.TENANT_ID = P.TENANT_ID AND T.TABLE_ID = P.TABLE_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) - WHERE T.TABLE_TYPE = 5 + WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) UNION ALL SELECT T.TENANT_ID, T.DATABASE_ID, @@ -49080,7 +49133,7 @@ def_table_schema( ON T.TENANT_ID = SP.TENANT_ID AND T.TABLE_ID = SP.TABLE_ID AND P.PART_ID = SP.PART_ID - WHERE T.TABLE_TYPE = 5 + WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) ) V JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T ON T.TABLE_ID = V.DATA_TABLE_ID @@ -49157,7 +49210,7 @@ def_table_schema( 'INDEX' AS OBJECT_TYPE FROM SYS.ALL_VIRTUAL_CORE_ALL_TABLE T - WHERE T.TABLE_TYPE = 5 + WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) UNION ALL SELECT TENANT_ID, DATABASE_ID, @@ -49172,7 +49225,7 @@ def_table_schema( 'INDEX' AS OBJECT_TYPE FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T - WHERE T.TABLE_TYPE = 5 + WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1)) UNION ALL SELECT T.TENANT_ID, @@ -49193,7 +49246,7 @@ def_table_schema( ON T.TENANT_ID = P.TENANT_ID AND T.TABLE_ID = P.TABLE_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) - WHERE T.TABLE_TYPE = 5 + WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) UNION ALL SELECT T.TENANT_ID, T.DATABASE_ID, @@ -49218,7 +49271,7 @@ def_table_schema( ON T.TENANT_ID = SP.TENANT_ID AND T.TABLE_ID = SP.TABLE_ID AND P.PART_ID = SP.PART_ID - WHERE T.TABLE_TYPE = 5 + WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) ) V JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T ON T.TABLE_ID = V.DATA_TABLE_ID @@ -49290,7 +49343,7 @@ def_table_schema( 'INDEX' AS OBJECT_TYPE FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T - WHERE T.TABLE_TYPE = 5 AND T.DATABASE_ID = USERENV('SCHEMAID') + WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND T.DATABASE_ID = USERENV('SCHEMAID') AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) UNION ALL SELECT T.TENANT_ID, @@ -49311,7 +49364,7 @@ def_table_schema( ON T.TENANT_ID = P.TENANT_ID AND T.TABLE_ID = P.TABLE_ID AND bitand((T.TABLE_MODE / 4096), 15) IN (0,1) - WHERE T.TABLE_TYPE = 5 AND T.DATABASE_ID = USERENV('SCHEMAID') + WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND T.DATABASE_ID = USERENV('SCHEMAID') UNION ALL SELECT T.TENANT_ID, T.DATABASE_ID, @@ -49336,7 +49389,7 @@ def_table_schema( ON T.TENANT_ID = SP.TENANT_ID AND T.TABLE_ID = SP.TABLE_ID AND P.PART_ID = SP.PART_ID - WHERE T.TABLE_TYPE = 5 AND T.DATABASE_ID = USERENV('SCHEMAID') + WHERE T.TABLE_TYPE = 5 AND T.INDEX_TYPE NOT IN (13, 14, 16, 17, 19, 20, 22) AND T.DATABASE_ID = USERENV('SCHEMAID') ) V JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T ON T.TABLE_ID = V.DATA_TABLE_ID diff --git a/src/share/ob_ddl_common.cpp b/src/share/ob_ddl_common.cpp index 3bd087a386..4466f7f819 100644 --- a/src/share/ob_ddl_common.cpp +++ b/src/share/ob_ddl_common.cpp @@ -1587,7 +1587,7 @@ int ObDDLUtil::get_data_information( task_status = static_cast(cur_task_status); if (ObDDLType::DDL_CREATE_INDEX == ddl_type) { SMART_VAR(rootserver::ObIndexBuildTask, task) { - if (OB_FAIL(task.deserlize_params_from_message(tenant_id, task_message.ptr(), task_message.length(), pos))) { + if (OB_FAIL(task.deserialize_params_from_message(tenant_id, task_message.ptr(), task_message.length(), pos))) { LOG_WARN("deserialize from msg failed", K(ret)); } else { data_format_version = task.get_data_format_version(); @@ -1595,7 +1595,7 @@ int ObDDLUtil::get_data_information( } } else { SMART_VAR(rootserver::ObTableRedefinitionTask, task) { - if (OB_FAIL(task.deserlize_params_from_message(tenant_id, task_message.ptr(), task_message.length(), pos))) { + if (OB_FAIL(task.deserialize_params_from_message(tenant_id, task_message.ptr(), task_message.length(), pos))) { LOG_WARN("deserialize from msg failed", K(ret)); } else { data_format_version = task.get_data_format_version(); diff --git a/src/share/ob_ddl_error_message_table_operator.cpp b/src/share/ob_ddl_error_message_table_operator.cpp index eedee9690a..91c0183df0 100644 --- a/src/share/ob_ddl_error_message_table_operator.cpp +++ b/src/share/ob_ddl_error_message_table_operator.cpp @@ -275,7 +275,8 @@ int ObDDLErrorMessageTableOperator::get_ddl_error_message( EXTRACT_VARCHAR_FIELD_MYSQL(*result, "user_message", str_user_message); forward_user_msg_len = str_user_message.length(); const int64_t buf_size = str_user_message.length() + 1; - if (OB_ISNULL(error_message.user_message_ = static_cast(error_message.allocator_.alloc(buf_size)))) { + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(error_message.user_message_ = static_cast(error_message.allocator_.alloc(buf_size)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("alloc memory failed", K(ret)); } else if (OB_FAIL(databuff_printf(error_message.dba_message_, OB_MAX_ERROR_MSG_LEN, "%.*s", str_dba_message.length(), str_dba_message.ptr()))) { @@ -289,6 +290,71 @@ int ObDDLErrorMessageTableOperator::get_ddl_error_message( return ret; } +int ObDDLErrorMessageTableOperator::get_ddl_error_message( + const uint64_t tenant_id, + const int64_t task_id, + const int64_t target_object_id, + const int64_t object_id, + common::ObMySQLProxy &sql_proxy, + ObBuildDDLErrorMessage &error_message, + int64_t &forward_user_msg_len) +{ + int ret = OB_SUCCESS; + ObSqlString sql; + forward_user_msg_len = 0; + SMART_VAR(ObMySQLProxy::MySQLResult, res) { + const uint64_t exec_tenant_id = ObSchemaUtils::get_exec_tenant_id(tenant_id); + sqlclient::ObMySQLResult *result = NULL; + char ip[common::OB_MAX_SERVER_ADDR_SIZE] = ""; + if (OB_UNLIKELY(OB_INVALID_ID == tenant_id || task_id <= 0 || object_id < -1)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(tenant_id), K(task_id), K(object_id)); + } else if (OB_FAIL(sql.assign_fmt( + "SELECT ret_code, ddl_type, affected_rows, dba_message, user_message from %s " + "WHERE tenant_id = %ld AND task_id = %ld AND target_object_id = %ld AND object_id = %ld ", + OB_ALL_DDL_ERROR_MESSAGE_TNAME, + ObSchemaUtils::get_extract_tenant_id(exec_tenant_id, tenant_id), task_id, target_object_id, object_id))) { + LOG_WARN("fail to assign sql", K(ret)); + } else if (OB_FAIL(sql_proxy.read(res, tenant_id, sql.ptr()))) { + LOG_WARN("fail to execute sql", K(ret), K(sql)); + } else if (OB_ISNULL(result = res.get_result())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("error unexpected, query result must not be NULL", K(ret)); + } else if (OB_FAIL(result->next())) { + if (OB_LIKELY(OB_ITER_END == ret)) { + ret = OB_ENTRY_NOT_EXIST; + } else { + LOG_WARN("fail to get next row", K(ret)); + } + } else { + char *buf = nullptr; + int ddl_type = 0; + ObString str_dba_message; + ObString str_user_message; + EXTRACT_INT_FIELD_MYSQL(*result, "ddl_type", ddl_type, int); + EXTRACT_INT_FIELD_MYSQL(*result, "affected_rows", error_message.affected_rows_, int); + error_message.ddl_type_ = static_cast(ddl_type); + EXTRACT_INT_FIELD_MYSQL(*result, "ret_code", error_message.ret_code_, int); + EXTRACT_VARCHAR_FIELD_MYSQL(*result, "dba_message", str_dba_message); + EXTRACT_VARCHAR_FIELD_MYSQL(*result, "user_message", str_user_message); + forward_user_msg_len = str_user_message.length(); + const int64_t buf_size = str_user_message.length() + 1; + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(error_message.user_message_ = static_cast(error_message.allocator_.alloc(buf_size)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc memory failed", K(ret)); + } else if (OB_FAIL(databuff_printf(error_message.dba_message_, OB_MAX_ERROR_MSG_LEN, "%.*s", str_dba_message.length(), str_dba_message.ptr()))) { + LOG_WARN("print to buffer failed", K(ret), K(str_dba_message)); + } else { + error_message.user_message_[buf_size - 1] = '\0'; + MEMCPY(error_message.user_message_, str_user_message.ptr(), str_user_message.length()); + } + } + } + return ret; + +} + int ObDDLErrorMessageTableOperator::report_ddl_error_message(const ObBuildDDLErrorMessage &error_message, const uint64_t tenant_id, const ObCurTraceId::TraceId &trace_id, const int64_t task_id, const int64_t parent_task_id, const uint64_t table_id, const int64_t schema_version, const int64_t object_id, const ObAddr &addr, ObMySQLProxy &sql_proxy) diff --git a/src/share/ob_ddl_error_message_table_operator.h b/src/share/ob_ddl_error_message_table_operator.h index 2cc7c65c53..3a9e8ce8b2 100644 --- a/src/share/ob_ddl_error_message_table_operator.h +++ b/src/share/ob_ddl_error_message_table_operator.h @@ -95,12 +95,23 @@ public: static int get_ddl_error_message(const uint64_t tenant_id, const int64_t task_id, const int64_t target_object_id, const common::ObAddr &addr, const bool is_ddl_retry_task, common::ObMySQLProxy &sql_proxy, ObBuildDDLErrorMessage &error_message, int64_t &forward_user_msg_len); + static int get_ddl_error_message( + const uint64_t tenant_id, + const int64_t task_id, + const int64_t target_object_id, + const int64_t object_id, + common::ObMySQLProxy &sql_proxy, + ObBuildDDLErrorMessage &error_message, + int64_t &forward_user_msg_len); static int report_ddl_error_message(const ObBuildDDLErrorMessage &error_message, const uint64_t tenant_id, const char *trace_id, const int64_t task_id, const int64_t parent_task_id, const uint64_t table_id, const int64_t schema_version, const int64_t object_id, const common::ObAddr &addr, common::ObMySQLProxy &sql_proxy); static int report_ddl_error_message(const ObBuildDDLErrorMessage &error_message, const uint64_t tenant_id, const ObCurTraceId::TraceId &trace_id, const int64_t task_id, const int64_t parent_task_id, const uint64_t table_id, const int64_t schema_version, const int64_t object_id, const common::ObAddr &addr, common::ObMySQLProxy &sql_proxy); + static int report_ddl_error_message(const ObBuildDDLErrorMessage &error_message, const uint64_t tenant_id, + const int64_t task_id, const uint64_t table_id, const int64_t schema_version, const int64_t object_id, + const int64_t parent_task_id, const common::ObAddr &addr, common::ObMySQLProxy &sql_proxy); static int build_ddl_error_message(const int ret_code, const uint64_t tenant_id, const uint64_t table_id, ObBuildDDLErrorMessage &error_message, const common::ObString index_name, const uint64_t index_id, const ObDDLType ddl_type, const char *message, int &report_ret_code); diff --git a/src/share/ob_fts_index_builder_util.cpp b/src/share/ob_fts_index_builder_util.cpp new file mode 100644 index 0000000000..3ae97e8f6c --- /dev/null +++ b/src/share/ob_fts_index_builder_util.cpp @@ -0,0 +1,2480 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE_FTS +#include +#include "ob_fts_index_builder_util.h" +#include "ob_index_builder_util.h" +#include "sql/resolver/ddl/ob_ddl_resolver.h" +#include "sql/session/ob_sql_session_info.h" +#include "sql/engine/ob_exec_context.h" +#include "sql/resolver/expr/ob_raw_expr_util.h" +#include "storage/fts/ob_fts_plugin_helper.h" +#include "storage/fts/ob_fts_plugin_mgr.h" + +namespace oceanbase +{ +using namespace common; +using namespace obrpc; +using namespace share::schema; + +namespace share +{ + +int ObFtsIndexBuilderUtil::append_fts_rowkey_doc_arg( + const ObCreateIndexArg &index_arg, + ObIAllocator *allocator, + ObIArray &index_arg_list) +{ + int ret = OB_SUCCESS; + ObCreateIndexArg fts_rowkey_doc_arg; + if (OB_ISNULL(allocator) || + !(is_fts_index(index_arg.index_type_) || is_multivalue_index(index_arg.index_type_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator is nullptr", K(ret), K(index_arg.index_type_)); + } else if (OB_FAIL(fts_rowkey_doc_arg.assign(index_arg))) { + LOG_WARN("failed to assign to fts rowkey doc arg", K(ret)); + } else if (FALSE_IT(fts_rowkey_doc_arg.index_option_.parser_name_.reset())) { + } else if (FALSE_IT(fts_rowkey_doc_arg.index_type_ = + INDEX_TYPE_ROWKEY_DOC_ID_LOCAL)) { + } else if (OB_FAIL(generate_fts_aux_index_name(fts_rowkey_doc_arg, allocator))) { + LOG_WARN("failed to generate fts aux index name", K(ret)); + } else if (OB_FAIL(index_arg_list.push_back(fts_rowkey_doc_arg))) { + LOG_WARN("failed to push back fts rowkey doc arg", K(ret)); + } + return ret; +} + +int ObFtsIndexBuilderUtil::append_fts_doc_rowkey_arg( + const obrpc::ObCreateIndexArg &index_arg, + ObIAllocator *allocator, + ObIArray &index_arg_list) +{ + int ret = OB_SUCCESS; + ObCreateIndexArg fts_doc_rowkey_arg; + // NOTE index_arg.index_type_ is fts doc rowkey + if (OB_ISNULL(allocator) || + !(is_fts_index(index_arg.index_type_) || + is_multivalue_index(index_arg.index_type_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator is nullptr", K(ret), K(index_arg.index_type_)); + } else if (OB_FAIL(fts_doc_rowkey_arg.assign(index_arg))) { + LOG_WARN("failed to assign to fts rowkey doc arg", K(ret)); + } else { + fts_doc_rowkey_arg.index_option_.parser_name_.reset(); + if (is_local_fts_index(index_arg.index_type_) || + is_local_multivalue_index(index_arg.index_type_)) { + fts_doc_rowkey_arg.index_type_ = INDEX_TYPE_DOC_ID_ROWKEY_LOCAL; + } else if (is_global_fts_index(index_arg.index_type_)) { + fts_doc_rowkey_arg.index_type_ = INDEX_TYPE_DOC_ID_ROWKEY_GLOBAL; + } else if (is_global_local_fts_index(index_arg.index_type_)) { + fts_doc_rowkey_arg.index_type_ = INDEX_TYPE_DOC_ID_ROWKEY_GLOBAL_LOCAL_STORAGE; + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(generate_fts_aux_index_name(fts_doc_rowkey_arg, allocator))) { + LOG_WARN("failed to generate fts aux index name", K(ret)); + } else if (OB_FAIL(index_arg_list.push_back(fts_doc_rowkey_arg))) { + LOG_WARN("failed to push back fts doc rowkey arg", K(ret)); + } + return ret; +} + +int ObFtsIndexBuilderUtil::append_fts_index_arg( + const ObCreateIndexArg &index_arg, + ObIAllocator *allocator, + ObIArray &index_arg_list) +{ + int ret = OB_SUCCESS; + ObCreateIndexArg fts_index_arg; + if (OB_ISNULL(allocator) || + !share::schema::is_fts_index(index_arg.index_type_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator is nullptr", K(ret), K(index_arg)); + } else if (OB_FAIL(fts_index_arg.assign(index_arg))) { + LOG_WARN("failed to assign to fts index arg", K(ret)); + } else { + if (is_local_fts_index(index_arg.index_type_)) { + fts_index_arg.index_type_ = INDEX_TYPE_FTS_INDEX_LOCAL; + } else if (is_global_fts_index(index_arg.index_type_)) { + fts_index_arg.index_type_ = INDEX_TYPE_FTS_INDEX_GLOBAL; + } else if (is_global_local_fts_index(index_arg.index_type_)) { + fts_index_arg.index_type_ = INDEX_TYPE_FTS_INDEX_GLOBAL_LOCAL_STORAGE; + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(generate_fts_parser_name(fts_index_arg, allocator))) { + LOG_WARN("fail to generate fts parser name", K(ret)); + } else if (OB_FAIL(generate_fts_aux_index_name(fts_index_arg, allocator))) { + LOG_WARN("failed to generate fts aux index name", K(ret)); + } else if (OB_FAIL(index_arg_list.push_back(fts_index_arg))) { + LOG_WARN("failed to push back fts index arg", K(ret)); + } + return ret; +} + +int ObFtsIndexBuilderUtil::append_fts_doc_word_arg( + const ObCreateIndexArg &index_arg, + ObIAllocator *allocator, + ObIArray &index_arg_list) +{ + int ret = OB_SUCCESS; + ObCreateIndexArg fts_doc_word_arg; + if (OB_ISNULL(allocator) || + !share::schema::is_fts_index(index_arg.index_type_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator is nullptr", K(ret), K(index_arg)); + } else if (OB_FAIL(fts_doc_word_arg.assign(index_arg))) { + LOG_WARN("failed to assign to fts doc word arg", K(ret)); + } else { + if (is_local_fts_index(index_arg.index_type_)) { + fts_doc_word_arg.index_type_ = INDEX_TYPE_FTS_DOC_WORD_LOCAL; + } else if (is_global_fts_index(index_arg.index_type_)) { + fts_doc_word_arg.index_type_ = INDEX_TYPE_FTS_DOC_WORD_GLOBAL; + } else if (is_global_local_fts_index(index_arg.index_type_)) { + fts_doc_word_arg.index_type_ = INDEX_TYPE_FTS_DOC_WORD_GLOBAL_LOCAL_STORAGE; + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(generate_fts_parser_name(fts_doc_word_arg, allocator))) { + LOG_WARN("fail to generate fts parser name", K(ret)); + } else if (OB_FAIL(generate_fts_aux_index_name(fts_doc_word_arg, allocator))) { + LOG_WARN("failed to generate fts aux index name", K(ret)); + } else if (OB_FAIL(index_arg_list.push_back(fts_doc_word_arg))) { + LOG_WARN("failed to push back fts doc word arg", K(ret)); + } + return ret; +} + +int ObFtsIndexBuilderUtil::generate_fts_aux_index_name( + obrpc::ObCreateIndexArg &arg, + ObIAllocator *allocator) +{ + // TODO: @zhenhan.gzh remove index name postfix, and only take one name in index namespace for fulltext index + int ret = OB_SUCCESS; + char *name_buf = nullptr; + share::schema::ObIndexType type = arg.index_type_; + if (OB_ISNULL(allocator)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator is nullptr", K(ret)); + } else if (!share::schema::is_fts_index(type)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(type)); + } else if (OB_ISNULL(name_buf = static_cast(allocator->alloc(OB_MAX_TABLE_NAME_LENGTH)))) { + ret = common::OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc mem", K(ret)); + } else { + int64_t pos = 0; + if (share::schema::is_rowkey_doc_aux(type)) { + if (OB_FAIL(ret)) { + } else if (OB_FAIL(databuff_printf(name_buf, + OB_MAX_TABLE_NAME_LENGTH, + pos, + "fts_rowkey_doc"))) { + LOG_WARN("failed to print", K(ret)); + } + } else if (share::schema::is_doc_rowkey_aux(type)) { + if (OB_FAIL(ret)) { + } else if (OB_FAIL(databuff_printf(name_buf, + OB_MAX_TABLE_NAME_LENGTH, + pos, + "fts_doc_rowkey"))) { + LOG_WARN("failed to print", K(ret)); + } + } else if (share::schema::is_fts_index_aux(type)) { + if (OB_FAIL(ret)) { + } else if (OB_FAIL(databuff_printf(name_buf, + OB_MAX_TABLE_NAME_LENGTH, + pos, + "%.*s", + arg.index_name_.length(), + arg.index_name_.ptr()))) { + LOG_WARN("failed to print", K(ret)); + } + } else if (share::schema::is_fts_doc_word_aux(type)) { + if (OB_FAIL(ret)) { + } else if (OB_FAIL(databuff_printf(name_buf, + OB_MAX_TABLE_NAME_LENGTH, + pos, + "%.*s_fts_doc_word", + arg.index_name_.length(), + arg.index_name_.ptr()))) { + LOG_WARN("failed to print", K(ret)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, unknown fts index type", K(ret), K(type)); + } + if (OB_SUCC(ret)) { + arg.index_name_.assign_ptr(name_buf, static_cast(pos)); + } else { + LOG_WARN("failed to generate fts aux index name", K(ret)); + } + } + return ret; +} + +/* + * this func will also: + * 1. add cascade flag to corresponding column of data_schema + * 2. add doc_id, word, word_count column to data_schema +*/ +int ObFtsIndexBuilderUtil::adjust_fts_args( + obrpc::ObCreateIndexArg &index_arg, + ObTableSchema &data_schema, // not const since will add column to data schema + ObIArray &gen_columns) +{ + int ret = OB_SUCCESS; + const ObIndexType &index_type = index_arg.index_type_; + uint64_t doc_id_col_id = OB_INVALID_ID; + uint64_t word_col_id = OB_INVALID_ID; + uint64_t word_count_col_id = OB_INVALID_ID; + uint64_t doc_len_col_id = OB_INVALID_ID; + const ObColumnSchemaV2 *existing_doc_id_col = nullptr; + const ObColumnSchemaV2 *existing_word_col = nullptr; + const ObColumnSchemaV2 *existing_word_count_col = nullptr; + const ObColumnSchemaV2 *existing_doc_length_col = nullptr; + ObArray tmp_cols; + uint64_t available_col_id = 0; + bool is_rowkey_doc = false; + bool is_doc_rowkey = false; + bool is_fts_index = false; + bool is_doc_word = false; + if (!data_schema.is_valid() || !share::schema::is_fts_index(index_type)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(data_schema), K(index_type)); + } else if (FALSE_IT(available_col_id = data_schema.get_max_used_column_id() + 1)) { + } else if (FALSE_IT(is_rowkey_doc = share::schema::is_rowkey_doc_aux(index_type))) { + } else if (FALSE_IT(is_doc_rowkey = share::schema::is_doc_rowkey_aux(index_type))) { + } else if (FALSE_IT(is_fts_index = share::schema::is_fts_index_aux(index_type))) { + } else if (FALSE_IT(is_doc_word = share::schema::is_fts_doc_word_aux(index_type))) { + } else if (OB_FAIL(check_ft_cols(&index_arg, data_schema))) { + LOG_WARN("ft cols check failed", K(ret)); + } else if (OB_FAIL(get_doc_id_col(data_schema, existing_doc_id_col))) { + LOG_WARN("failed to get doc id col", K(ret)); + } else if (OB_FAIL(get_word_segment_col(data_schema, &index_arg, existing_word_col))) { + LOG_WARN("failed to get word segment col", K(ret)); + } else if (OB_FAIL(get_doc_length_col(data_schema, &index_arg, existing_doc_length_col))) { + LOG_WARN("fail to get document length column", K(ret)); + } else if (OB_FAIL(get_word_cnt_col(data_schema, &index_arg, existing_word_count_col))) { + LOG_WARN("failed to get word cnt col", K(ret)); + } else { + ObColumnSchemaV2 *generated_doc_id_col = nullptr; + ObColumnSchemaV2 *generated_word_col = nullptr; + ObColumnSchemaV2 *generated_doc_len_col = nullptr; + ObColumnSchemaV2 *generated_word_count_col = nullptr; + if (is_rowkey_doc || is_doc_rowkey) { + if (OB_ISNULL(existing_doc_id_col)) { // need to generate doc id col + doc_id_col_id = available_col_id++; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(generate_doc_id_column(&index_arg, + doc_id_col_id, + data_schema, + generated_doc_id_col))) { + LOG_WARN("failed to generate doc id column", K(ret)); + } else if (OB_FAIL(gen_columns.push_back(generated_doc_id_col))) { + LOG_WARN("failed to push back doc id col", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(push_back_gen_col(tmp_cols, + existing_doc_id_col, + generated_doc_id_col))) { + LOG_WARN("failed to push back doc id col", K(ret)); + } else if (OB_FAIL(adjust_fts_arg(&index_arg, + data_schema, + tmp_cols))) { + LOG_WARN("failed to append fts_index arg", K(ret)); + } + } else if (is_fts_index || is_doc_word) { + if (OB_ISNULL(existing_word_col)) { + word_col_id = available_col_id++; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(generate_word_segment_column(&index_arg, + word_col_id, + data_schema, + generated_word_col))) { + LOG_WARN("failed to generate word segment column", K(ret)); + } else if (OB_FAIL(gen_columns.push_back(generated_word_col))) { + LOG_WARN("failed to push back word column", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(existing_word_count_col)) { + word_count_col_id = available_col_id++; + if (OB_FAIL(generate_word_count_column(&index_arg, + word_count_col_id, + data_schema, + generated_word_count_col))) { + LOG_WARN("failed to generate word count column", K(ret)); + } else if (OB_FAIL(gen_columns.push_back(generated_word_count_col))) { + LOG_WARN("failed to push back word count column", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(existing_doc_length_col)) { + doc_len_col_id = available_col_id++; + if (OB_FAIL(generate_doc_length_column(&index_arg, + doc_len_col_id, + data_schema, + generated_doc_len_col))) { + LOG_WARN("fail to generate document length column", K(ret)); + } else if (OB_FAIL(gen_columns.push_back(generated_doc_len_col))) { + LOG_WARN("fail to push back generated document length", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (is_fts_index) { + if (OB_FAIL(push_back_gen_col(tmp_cols, + existing_word_col, + generated_word_col))) { + LOG_WARN("failed to push back word col", K(ret)); + } else if (OB_FAIL(push_back_gen_col(tmp_cols, + existing_doc_id_col, + generated_doc_id_col))) { + LOG_WARN("failed to push back doc id col", K(ret)); + } else if (OB_FAIL(push_back_gen_col(tmp_cols, + existing_word_count_col, + generated_word_count_col))) { + LOG_WARN("failed to push back word count col", K(ret)); + } else if (OB_FAIL(push_back_gen_col(tmp_cols, + existing_doc_length_col, + generated_doc_len_col))) { + LOG_WARN("fail to push back document length column", K(ret)); + } else if (OB_FAIL(adjust_fts_arg(&index_arg, + data_schema, + tmp_cols))) { + LOG_WARN("failed to append fts_index arg", K(ret)); + } + } else if (is_doc_word) { + if (OB_FAIL(push_back_gen_col(tmp_cols, + existing_doc_id_col, + generated_doc_id_col))) { + LOG_WARN("failed to push back doc id col", K(ret)); + } else if (OB_FAIL(push_back_gen_col(tmp_cols, + existing_word_col, + generated_word_col))) { + LOG_WARN("failed to push back word col", K(ret)); + } else if (OB_FAIL(push_back_gen_col(tmp_cols, + existing_word_count_col, + generated_word_count_col))) { + LOG_WARN("failed to push back word count col", K(ret)); + } else if (OB_FAIL(push_back_gen_col(tmp_cols, + existing_doc_length_col, + generated_doc_len_col))) { + LOG_WARN("fail to push back document length column", K(ret)); + } else if (OB_FAIL(adjust_fts_arg(&index_arg, + data_schema, + tmp_cols))) { + LOG_WARN("failed to append fts_index arg", K(ret)); + } + } + } + } + return ret; +} + +int ObFtsIndexBuilderUtil::set_fts_rowkey_doc_table_columns( + const ObCreateIndexArg &arg, + const ObTableSchema &data_schema, + ObTableSchema &index_schema) +{ + int ret = OB_SUCCESS; + if (!data_schema.is_valid() || + arg.store_columns_.count() != 1 || + !share::schema::is_rowkey_doc_aux(arg.index_type_)) { + // expect only doc id column in store columns + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(data_schema), + K(arg.store_columns_.count()), K(arg.index_type_)); + } + HEAP_VAR(ObRowDesc, row_desc) { + // 1. add rowkey columns as index column of fts rowkey doc table + for (int64_t i = 0; OB_SUCC(ret) && i < arg.index_columns_.count(); ++i) { + const ObColumnSchemaV2 *rowkey_column = nullptr; + const ObColumnSortItem &rowkey_col_item = arg.index_columns_.at(i); + const ObString &rowkey_col_name = rowkey_col_item.column_name_; + if (rowkey_col_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(rowkey_col_name)); + } else if (OB_ISNULL(rowkey_column = + data_schema.get_column_schema(rowkey_col_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, + rowkey_col_name.length(), + rowkey_col_name.ptr()); + LOG_WARN("get_column_schema failed", + "tenant_id", data_schema.get_tenant_id(), + "database_id", data_schema.get_database_id(), + "table_name", data_schema.get_table_name(), + "column name", rowkey_col_name, K(ret)); + } else if (OB_FAIL(ObIndexBuilderUtil::add_column( + rowkey_column, + true/*is_index_column*/, + true/*is_rowkey*/, + arg.index_columns_.at(i).order_type_, + row_desc, + index_schema, + false/*is_hidden*/, + false/*is_specified_storing_col*/))) { + LOG_WARN("add column failed", "rowkey_column", *rowkey_column, + "rowkey_order_type", arg.index_columns_.at(i).order_type_, + K(row_desc), K(ret)); + } + } + if (OB_FAIL(ret)) { + } else { + index_schema.set_rowkey_column_num(row_desc.get_column_num()); + index_schema.set_index_column_num(row_desc.get_column_num()); + + // 2. add doc id column to fts rowkey doc table + const ObColumnSchemaV2 *doc_id_column = nullptr; + const ObString &doc_id_col_name = arg.store_columns_.at(0); + // is_rowkey is false, order_in_rowkey will not be used + const ObOrderType order_in_rowkey = ObOrderType::DESC; + if (doc_id_col_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(doc_id_col_name)); + } else if (OB_ISNULL(doc_id_column = data_schema.get_column_schema(doc_id_col_name))) { + ret = OB_ERR_BAD_FIELD_ERROR; + LOG_WARN("get_column_schema failed", + "tenant_id", data_schema.get_tenant_id(), + "database_id", data_schema.get_database_id(), + "table_name", data_schema.get_table_name(), + "column name", doc_id_col_name, K(ret)); + } else if (OB_FAIL(ObIndexBuilderUtil::add_column(doc_id_column, + false/*is_index_column*/, + false/*is_rowkey*/, + order_in_rowkey, + row_desc, + index_schema, + false/*is_hidden*/, + true/*is_specified_storing_col*/))) { + LOG_WARN("add_column failed", "doc_id_column", *doc_id_column, + K(row_desc), K(ret)); + } else if (OB_FAIL(index_schema.sort_column_array_by_column_id())) { + LOG_WARN("failed to sort column", K(ret)); + } else { + LOG_INFO("succeed to set fts_rowkey_doc table columns", K(index_schema)); + } + } + } + STORAGE_FTS_LOG(DEBUG, "set rowkey doc table column", K(ret), K(arg), K(index_schema), K(data_schema)); + return ret; +} + +int ObFtsIndexBuilderUtil::set_fts_doc_rowkey_table_columns( + const ObCreateIndexArg &arg, + const ObTableSchema &data_schema, + ObTableSchema &index_schema) +{ + int ret = OB_SUCCESS; + if (!data_schema.is_valid() || + arg.index_columns_.count() != 1 || + !share::schema::is_doc_rowkey_aux(arg.index_type_)) { + // expect only doc id column in index columns + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(data_schema), + K(arg.index_columns_.count()), K(arg.index_type_)); + } + HEAP_VAR(ObRowDesc, row_desc) { + // 1. add doc id column as index columns of fts doc rowkey table + const ObColumnSchemaV2 *doc_id_column = nullptr; + const ObColumnSortItem &doc_id_col_item = arg.index_columns_.at(0); + const ObString &doc_id_col_name = doc_id_col_item.column_name_; + if (OB_FAIL(ret)) { + } else if (doc_id_col_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(doc_id_col_name)); + } else if (OB_ISNULL(doc_id_column = + data_schema.get_column_schema(doc_id_col_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, + doc_id_col_name.length(), doc_id_col_name.ptr()); + LOG_WARN("get_column_schema failed", "tenant_id", data_schema.get_tenant_id(), + "database_id", data_schema.get_database_id(), + "table_name", data_schema.get_table_name(), + "column name", doc_id_col_name, K(ret)); + } else if (OB_FAIL(ObIndexBuilderUtil::add_column(doc_id_column, + true/*is_index_column*/, + true/*is_rowkey*/, + doc_id_col_item.order_type_, + row_desc, + index_schema, + false/*is_hidden*/, + false/*is_specified_storing_col*/))) { + LOG_WARN("add column failed ", "doc_id_column", *doc_id_column, + "rowkey_order_type", doc_id_col_item.order_type_, K(row_desc), K(ret)); + } else { + index_schema.set_rowkey_column_num(row_desc.get_column_num()); + index_schema.set_index_column_num(row_desc.get_column_num()); + + // 2. add rowkey column to fts doc rowkey table + const ObColumnSchemaV2 *rowkey_column = nullptr; + const ObRowkeyInfo &rowkey_info = data_schema.get_rowkey_info(); + for (int64_t i = 0; OB_SUCC(ret) && i < rowkey_info.get_size(); ++i) { + uint64_t column_id = OB_INVALID_ID; + if (OB_FAIL(rowkey_info.get_column_id(i, column_id))) { + LOG_WARN("get_column_id failed", "index", i, K(ret)); + } else if (OB_ISNULL(rowkey_column = data_schema.get_column_schema(column_id))) { + ret = OB_ERR_BAD_FIELD_ERROR; + LOG_WARN("get_column_schema failed", "table_id", data_schema.get_table_id(), + K(column_id), K(ret)); + } else if (ob_is_text_tc(rowkey_column->get_data_type())) { + ret = OB_ERR_WRONG_KEY_COLUMN; + LOG_WARN("Lob column should not appear in rowkey position", + "rowkey_column", *rowkey_column, "order_in_rowkey", + rowkey_column->get_order_in_rowkey(), K(row_desc), K(ret)); + } else if (ob_is_extend(rowkey_column->get_data_type()) || + ob_is_user_defined_sql_type(rowkey_column->get_data_type())) { + ret = OB_ERR_WRONG_KEY_COLUMN; + LOG_WARN("udt column should not appear in rowkey position", + "rowkey_column", *rowkey_column, "order_in_rowkey", + rowkey_column->get_order_in_rowkey(), K(row_desc), K(ret)); + } else if (ob_is_json_tc(rowkey_column->get_data_type())) { + ret = OB_ERR_JSON_USED_AS_KEY; + LOG_WARN("JSON column cannot be used in key specification.", + "rowkey_column", *rowkey_column, "order_in_rowkey", + rowkey_column->get_order_in_rowkey(), K(row_desc), K(ret)); + } else if (OB_FAIL(ObIndexBuilderUtil::add_column(rowkey_column, + false/*is_index_column*/, + false/*is_rowkey*/, + rowkey_column->get_order_in_rowkey(), + row_desc, + index_schema, + false/*is_hidden*/, + false/*is_specified_storing_col*/))) { + LOG_WARN("add column failed", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(index_schema.sort_column_array_by_column_id())) { + LOG_WARN("failed to sort column", K(ret)); + } else { + LOG_INFO("succeed to set fts_doc_rowkey table columns", K(index_schema)); + } + } + } + STORAGE_FTS_LOG(DEBUG, "set fts doc rowkey table columns", K(ret), K(arg), K(index_schema), K(data_schema)); + return ret; +} + +int ObFtsIndexBuilderUtil::set_fts_index_table_columns( + const ObCreateIndexArg &arg, + const ObTableSchema &data_schema, + ObTableSchema &index_schema) +{ + int ret = OB_SUCCESS; + if (!data_schema.is_valid() || + (!share::schema::is_fts_index_aux(arg.index_type_) && + !share::schema::is_fts_doc_word_aux(arg.index_type_)) || + arg.index_columns_.count() != 2 || + arg.store_columns_.count() != 2) { + // expect word col, doc id col in index_columns, + // expect worc count, doc length col in store_columns. + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(data_schema), K(arg.index_type_), + K(arg.index_columns_.count()), K(arg.store_columns_.count()), + K(arg.index_columns_), K(arg.store_columns_)); + } + HEAP_VAR(ObRowDesc, row_desc) { + // 1. add word col, doc id col to fts index table + for (int64_t i = 0; OB_SUCC(ret) && i < arg.index_columns_.count(); ++i) { + const ObColumnSchemaV2 *fts_column = nullptr; + const ObColumnSortItem &fts_col_item = arg.index_columns_.at(i); + const ObString &fts_col_name = fts_col_item.column_name_; + if (fts_col_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(fts_col_name)); + } else if (OB_ISNULL(fts_column = data_schema.get_column_schema(fts_col_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, + fts_col_name.length(), fts_col_name.ptr()); + LOG_WARN("get_column_schema failed", + "tenant_id", data_schema.get_tenant_id(), + "database_id", data_schema.get_database_id(), + "table_name", data_schema.get_table_name(), + "column name", fts_col_name, K(ret)); + } else if (OB_FAIL(ObIndexBuilderUtil::add_column(fts_column, + true/*is_index_column*/, + true/*is_rowkey*/, + arg.index_columns_.at(i).order_type_, + row_desc, + index_schema, + false/*is_hidden*/, + false/*is_specified_storing_col*/))) { + LOG_WARN("add column failed", "fts_column", *fts_column, + "rowkey_order_type", arg.index_columns_.at(i).order_type_, + K(row_desc), K(ret)); + } + } + if (OB_SUCC(ret)) { + index_schema.set_rowkey_column_num(row_desc.get_column_num()); + index_schema.set_index_column_num(row_desc.get_column_num()); + } + // 2. add word count, doc length col to fts index table + for (int64_t i = 0; OB_SUCC(ret) && i < arg.store_columns_.count(); ++i) { + const ObColumnSchemaV2 *store_column = nullptr; + const ObString &store_column_name = arg.store_columns_.at(i); + // is_rowkey is false, order_in_rowkey will not be used + const ObOrderType order_in_rowkey = ObOrderType::DESC; + if (OB_UNLIKELY(store_column_name.empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(store_column_name)); + } else if (OB_ISNULL(store_column = data_schema.get_column_schema(store_column_name))) { + ret = OB_ERR_BAD_FIELD_ERROR; + LOG_WARN("get_column_schema failed", "tenant_id", data_schema.get_tenant_id(), + "database_id", data_schema.get_database_id(), + "table_name", data_schema.get_table_name(), + "column name", store_column_name, K(ret)); + } else if (OB_FAIL(ObIndexBuilderUtil::add_column(store_column, + false/*is_index_column*/, + false/*is_rowkey*/, + order_in_rowkey, + row_desc, + index_schema, + false/*is_hidden*/, + true/*is_specified_storing_col*/))) { + LOG_WARN("add_column failed", K(store_column), K(row_desc), K(ret)); + } + } + if (FAILEDx(index_schema.sort_column_array_by_column_id())) { + LOG_WARN("failed to sort column", K(ret)); + } else { + LOG_INFO("succeed to set fts index table columns", K(index_schema)); + } + } + STORAGE_FTS_LOG(DEBUG, "set fts index table column", K(ret), K(arg), K(index_schema), K(data_schema)); + return ret; +} + +int ObFtsIndexBuilderUtil::check_ft_cols( + const ObCreateIndexArg *index_arg, + ObTableSchema &data_schema) +{ + int ret = OB_SUCCESS; + ObColumnSchemaV2 *col_schema = NULL; + if (OB_ISNULL(index_arg) || + !share::schema::is_fts_index(index_arg->index_type_) || + !data_schema.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(index_arg), K(data_schema.is_valid())); + } + for (int64_t i = 0; OB_SUCC(ret) && i < index_arg->index_columns_.count(); ++i) { + const ObString &column_name = index_arg->index_columns_.at(i).column_name_; + if (column_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(column_name)); + } else if (OB_ISNULL(col_schema = data_schema.get_column_schema(column_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, column_name.length(), + column_name.ptr()); + } else if (!col_schema->is_string_type() || + col_schema->get_meta_type().is_blob()) { + ret = OB_ERR_BAD_FT_COLUMN; + LOG_USER_ERROR(OB_ERR_BAD_FT_COLUMN, column_name.length(), column_name.ptr()); + } else { + col_schema->add_column_flag(GENERATED_DEPS_CASCADE_FLAG); + } + } + return ret; +} + +int ObFtsIndexBuilderUtil::adjust_fts_arg( + ObCreateIndexArg *index_arg, // not const since index_arg.index_schema.allocator will be used + const ObTableSchema &data_schema, + const ObIArray &fts_cols) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(index_arg) || + !share::schema::is_fts_index(index_arg->index_type_) || + !data_schema.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(index_arg), K(data_schema)); + } else { + const ObIndexType &index_type = index_arg->index_type_; + const bool is_rowkey_doc = share::schema::is_rowkey_doc_aux(index_arg->index_type_); + const bool is_doc_rowkey = share::schema::is_doc_rowkey_aux(index_arg->index_type_); + const bool is_fts_index = share::schema::is_fts_index_aux(index_arg->index_type_); + const bool is_doc_word = share::schema::is_fts_doc_word_aux(index_arg->index_type_); + if ((is_rowkey_doc && fts_cols.count() != 1) || + (is_doc_rowkey && fts_cols.count() != 1) || + (is_fts_index && fts_cols.count() != 4) || + (is_doc_word && fts_cols.count() != 4) ) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fts cols count not expected", K(ret), K(index_type), K(fts_cols)); + } else { + index_arg->index_columns_.reuse(); + index_arg->store_columns_.reuse(); + ObIAllocator *allocator = index_arg->index_schema_.get_allocator(); + + if (OB_ISNULL(allocator)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator is nullptr", K(ret)); + } else if (is_rowkey_doc) { + // 1. add rowkey column to arg->index_columns + const ObRowkeyInfo &rowkey_info = data_schema.get_rowkey_info(); + for (int64_t i = 0; OB_SUCC(ret) && i < rowkey_info.get_size(); ++i) { + ObColumnSortItem rowkey_column; + const ObColumnSchemaV2 *rowkey_col = NULL; + uint64_t column_id = OB_INVALID_ID; + if (OB_FAIL(rowkey_info.get_column_id(i, column_id))) { + LOG_WARN("get_column_id failed", "index", i, K(ret)); + } else if (NULL == (rowkey_col = data_schema.get_column_schema(column_id))) { + ret = OB_ERR_BAD_FIELD_ERROR; + LOG_WARN("get_column_schema failed", "table_id", + data_schema.get_table_id(), K(column_id), K(ret)); + } else if (OB_FAIL(ob_write_string(*allocator, + rowkey_col->get_column_name_str(), + rowkey_column.column_name_))) { + //to keep the memory lifetime of column_name consistent with index_arg + LOG_WARN("deep copy column name failed", K(ret)); + } else if (OB_FAIL(index_arg->index_columns_.push_back(rowkey_column))) { + LOG_WARN("failed to push back rowkey column", K(ret)); + } + } + // 2. add doc id column to arg->store_columns + const ObColumnSchemaV2 *doc_id_col = fts_cols.at(0); + if (OB_FAIL(ret)) { + } else if (OB_FAIL(index_arg->store_columns_.push_back( + doc_id_col->get_column_name_str()))) { + LOG_WARN("failed to push back doc id column", K(ret)); + } + + } else if (is_doc_rowkey) { + // add doc id column to arg->index_columns + ObColumnSortItem doc_id_column; + const ObColumnSchemaV2 *doc_id_col = fts_cols.at(0); + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(doc_id_col)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fts_col is null", K(ret)); + } else if (OB_FAIL(ob_write_string(*allocator, + doc_id_col->get_column_name_str(), + doc_id_column.column_name_))) { + //to keep the memory lifetime of column_name consistent with index_arg + LOG_WARN("deep copy column name failed", K(ret)); + } else if (OB_FAIL(index_arg->index_columns_.push_back(doc_id_column))) { + LOG_WARN("failed to push back doc id column", K(ret)); + } + + } else if (is_fts_index) { + if (OB_FAIL(ret)) { + } else if (OB_FAIL(inner_adjust_fts_arg(index_arg, + fts_cols, + OB_FTS_INDEX_TABLE_INDEX_COL_CNT, + allocator))) { + LOG_WARN("failed to inner_adjust_fts_arg", K(ret)); + } + } else if (is_doc_word) { + if (OB_FAIL(ret)) { + } else if (OB_FAIL(inner_adjust_fts_arg(index_arg, + fts_cols, + OB_FTS_DOC_WORD_TABLE_INDEX_COL_CNT, + allocator))) { + LOG_WARN("failed to inner_adjust_fts_arg", K(ret)); + } + } + } + } + return ret; +} + +int ObFtsIndexBuilderUtil::inner_adjust_fts_arg( + obrpc::ObCreateIndexArg *fts_arg, + const ObIArray &fts_cols, + const int index_column_cnt, + ObIAllocator *allocator) +{ + int ret = OB_SUCCESS; + // 1. add doc id column, word column to arg->index_columns + if (OB_ISNULL(fts_arg) || + (!share::schema::is_fts_index_aux(fts_arg->index_type_) && + !share::schema::is_fts_doc_word_aux(fts_arg->index_type_)) || + OB_ISNULL(allocator) || + fts_cols.count() != index_column_cnt + 2) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid argument", K(ret), KPC(fts_arg), KP(allocator), + K(fts_cols.count()), K(index_column_cnt)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < index_column_cnt; ++i) { + ObColumnSortItem fts_column; + const ObColumnSchemaV2 *fts_col = fts_cols.at(i); + if (OB_ISNULL(fts_col)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fts_col is null", K(ret), K(i)); + } else if (OB_FAIL(ob_write_string(*allocator, + fts_col->get_column_name_str(), + fts_column.column_name_))) { + //to keep the memory lifetime of column_name consistent with index_arg + LOG_WARN("deep copy column name failed", K(ret)); + } else if (OB_FAIL(fts_arg->index_columns_.push_back(fts_column))) { + LOG_WARN("failed to push back index column", K(ret)); + } + } + // 2. add word count column to arg->store_columns + const ObColumnSchemaV2 *word_count_col = fts_cols.at(index_column_cnt); + if (FAILEDx(fts_arg->store_columns_.push_back(word_count_col->get_column_name_str()))) { + LOG_WARN("failed to push back word count column", K(ret)); + } + // 3. add document length column to arg->store_columns + const ObColumnSchemaV2 *doc_length_col = fts_cols.at(index_column_cnt + 1); + if (FAILEDx(fts_arg->store_columns_.push_back(doc_length_col->get_column_name_str()))) { + LOG_WARN("fail to push document length column", K(ret)); + } + return ret; +} + +int ObFtsIndexBuilderUtil::generate_doc_id_column( + const ObCreateIndexArg *index_arg, + const uint64_t col_id, + ObTableSchema &data_schema, // not const since will add column to data schema + ObColumnSchemaV2 *&doc_id_col) +{ + int ret = OB_SUCCESS; + doc_id_col = nullptr; + char col_name_buf[OB_MAX_COLUMN_NAME_LENGTH] = {'\0'}; + int64_t name_pos = 0; + bool col_exists = false; + if (OB_ISNULL(index_arg) || + !share::schema::is_fts_index(index_arg->index_type_) || + !data_schema.is_valid() || + col_id == OB_INVALID_ID) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(index_arg), K(data_schema), K(col_id)); + } else if (OB_FAIL(construct_doc_id_col_name(col_name_buf, + OB_MAX_COLUMN_NAME_LENGTH, + name_pos))) { + LOG_WARN("failed to construct doc id col name", K(ret)); + } else if (OB_FAIL(check_fts_gen_col(data_schema, + col_id, + col_name_buf, + name_pos, + col_exists))) { + LOG_WARN("check doc id col failed", K(ret)); + } else if (!col_exists) { + const ObRowkeyInfo &rowkey_info = data_schema.get_rowkey_info(); + const ObColumnSchemaV2 *col_schema = nullptr; + SMART_VAR(char[OB_MAX_DEFAULT_VALUE_LENGTH], ft_expr_def) { + MEMSET(ft_expr_def, 0, sizeof(ft_expr_def)); + int64_t def_pos = 0; + if (OB_FAIL(databuff_printf(ft_expr_def, OB_MAX_DEFAULT_VALUE_LENGTH, def_pos, "DOC_ID()"))) { + LOG_WARN("print generate expr definition prefix failed", K(ret)); + } else { + ObColumnSchemaV2 column_schema; + ObObj default_value; + default_value.set_varchar(ft_expr_def, static_cast(def_pos)); + column_schema.set_rowkey_position(0); //非主键列 + column_schema.set_index_position(0); //非索引列 + column_schema.set_tbl_part_key_pos(0); //非partition key + column_schema.set_tenant_id(data_schema.get_tenant_id()); + column_schema.set_table_id(data_schema.get_table_id()); + column_schema.set_column_id(col_id); + column_schema.add_column_flag(GENERATED_DOC_ID_COLUMN_FLAG); + column_schema.add_column_flag(VIRTUAL_GENERATED_COLUMN_FLAG); + column_schema.set_is_hidden(true); + column_schema.set_nullable(false); + column_schema.set_data_type(ObVarcharType); + column_schema.set_data_length(OB_DOC_ID_COLUMN_BYTE_LENGTH); + column_schema.set_collation_type(CS_TYPE_BINARY); + column_schema.set_prev_column_id(UINT64_MAX); + column_schema.set_next_column_id(UINT64_MAX); + if (OB_FAIL(column_schema.set_column_name(col_name_buf))) { + LOG_WARN("set column name failed", K(ret)); + } else if (OB_FAIL(column_schema.set_orig_default_value(default_value))) { + LOG_WARN("set orig default value failed", K(ret)); + } else if (OB_FAIL(column_schema.set_cur_default_value(default_value))) { + LOG_WARN("set current default value failed", K(ret)); + } else if (OB_FAIL(data_schema.add_column(column_schema))) { + LOG_WARN("add column schema to data table failed", K(ret)); + } else { + doc_id_col = data_schema.get_column_schema(column_schema.get_column_id()); + if (OB_ISNULL(doc_id_col)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("generate doc id col failed", K(ret), KP(doc_id_col)); + } else { + LOG_INFO("succeed to generate doc id column", KCSTRING(col_name_buf), K(col_id), K(data_schema)); + } + } + } + } + } + return ret; +} + +int ObFtsIndexBuilderUtil::generate_word_segment_column( + const ObCreateIndexArg *index_arg, + const uint64_t col_id, + ObTableSchema &data_schema, // not const since will add column to data schema + ObColumnSchemaV2 *&word_segment_col) +{ + int ret = OB_SUCCESS; + word_segment_col = nullptr; + char col_name_buf[OB_MAX_COLUMN_NAME_LENGTH] = {'\0'}; + int64_t name_pos = 0; + bool col_exists = false; + if (OB_ISNULL(index_arg) || + !share::schema::is_fts_index(index_arg->index_type_) || + !data_schema.is_valid() || + col_id == OB_INVALID_ID ) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(index_arg), K(data_schema), K(col_id)); + } else if (OB_FAIL(construct_word_segment_col_name(index_arg, + data_schema, + col_name_buf, + OB_MAX_COLUMN_NAME_LENGTH, + name_pos))) { + LOG_WARN("failed to construct word segment col name", K(ret)); + } else if (OB_FAIL(check_fts_gen_col(data_schema, + col_id, + col_name_buf, + name_pos, + col_exists))) { + LOG_WARN("check word segment col failed", K(ret)); + } else if (!col_exists) { + int32_t max_data_length = 0; + ObCollationType collation_type = CS_TYPE_INVALID; + ObColumnSchemaV2 column_schema; + SMART_VAR(char[OB_MAX_DEFAULT_VALUE_LENGTH], ft_expr_def) { + MEMSET(ft_expr_def, 0, sizeof(ft_expr_def)); + int64_t def_pos = 0; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(databuff_printf(ft_expr_def, + OB_MAX_DEFAULT_VALUE_LENGTH, + def_pos, + "WORD_SEGMENT("))) { + LOG_WARN("print generate expr definition prefix failed", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < index_arg->index_columns_.count(); ++i) { + const ObString &column_name = index_arg->index_columns_.at(i).column_name_; + ObColumnSchemaV2 *col_schema = nullptr; + if (column_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(column_name)); + } else if (OB_ISNULL(col_schema = data_schema.get_column_schema(column_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, column_name.length(), + column_name.ptr()); + } else if (OB_FAIL(column_schema.add_cascaded_column_id(col_schema->get_column_id()))) { + LOG_WARN("add cascaded column to generated column failed", K(ret)); + } else if (OB_FAIL(databuff_printf(ft_expr_def, + OB_MAX_DEFAULT_VALUE_LENGTH, + def_pos, + "`%s`, ", + col_schema->get_column_name()))) { + LOG_WARN("print column name to buffer failed", K(ret)); + } else { + if (max_data_length < col_schema->get_data_length()) { + max_data_length = col_schema->get_data_length(); + } + if (CS_TYPE_INVALID == collation_type) { + collation_type = col_schema->get_collation_type(); + } else if (collation_type != col_schema->get_collation_type()) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, + "create fulltext index on columns with different collation"); + } + } + } + if (OB_FAIL(ret)) { + } else { + def_pos -= 2; // remove last ", " + if (OB_FAIL(databuff_printf(ft_expr_def, + OB_MAX_DEFAULT_VALUE_LENGTH, + def_pos, + ")"))) { + LOG_WARN("print generate expr definition suffix failed", K(ret)); + } else { + ObObj default_value; + default_value.set_varchar(ft_expr_def, static_cast(def_pos)); + column_schema.set_rowkey_position(0); //非主键列 + column_schema.set_index_position(0); //非索引列 + column_schema.set_tbl_part_key_pos(0); //非partition key + column_schema.set_tenant_id(data_schema.get_tenant_id()); + column_schema.set_table_id(data_schema.get_table_id()); + column_schema.set_column_id(col_id); + column_schema.add_column_flag(GENERATED_FTS_WORD_SEGMENT_COLUMN_FLAG); + column_schema.add_column_flag(VIRTUAL_GENERATED_COLUMN_FLAG); + column_schema.set_is_hidden(true); + column_schema.set_data_type(ObVarcharType); + column_schema.set_data_length(max_data_length); //生成列的长度和被分词列的最大长度保持一致 + column_schema.set_collation_type(collation_type); //生成列的collation和被分词列的collation保持一致 + column_schema.set_prev_column_id(UINT64_MAX); + column_schema.set_next_column_id(UINT64_MAX); + if (OB_FAIL(column_schema.set_column_name(col_name_buf))) { + LOG_WARN("set column name failed", K(ret)); + } else if (OB_FAIL(column_schema.set_orig_default_value(default_value))) { + LOG_WARN("set orig default value failed", K(ret)); + } else if (OB_FAIL(column_schema.set_cur_default_value(default_value))) { + LOG_WARN("set current default value failed", K(ret)); + } else if (OB_FAIL(data_schema.add_column(column_schema))) { + LOG_WARN("add column schema to data table failed", K(ret)); + } else { + word_segment_col = data_schema.get_column_schema(column_schema.get_column_id()); + if (OB_ISNULL(word_segment_col)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("generate word segment col failed", K(ret), KP(word_segment_col)); + } else { + LOG_INFO("succeed to generate word segment column", KCSTRING(col_name_buf), K(col_id), K(data_schema)); + } + } + } + } + } + } + return ret; +} + +int ObFtsIndexBuilderUtil::generate_word_count_column( + const ObCreateIndexArg *index_arg, + const uint64_t col_id, + ObTableSchema &data_schema, // not const since will add column to data schema + ObColumnSchemaV2 *&word_count_col) +{ + int ret = OB_SUCCESS; + word_count_col = nullptr; + char col_name_buf[OB_MAX_COLUMN_NAME_LENGTH] = {'\0'}; + int64_t name_pos = 0; + bool col_exists = false; + if (OB_ISNULL(index_arg) || + !share::schema::is_fts_index_aux(index_arg->index_type_) || + !data_schema.is_valid() || + col_id == OB_INVALID_ID) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(index_arg), K(data_schema), K(col_id)); + } else if (OB_FAIL(construct_word_count_col_name(index_arg, + data_schema, + col_name_buf, + OB_MAX_COLUMN_NAME_LENGTH, + name_pos))) { + LOG_WARN("failed to construct word count col name", K(ret)); + } else if (OB_FAIL(check_fts_gen_col(data_schema, + col_id, + col_name_buf, + name_pos, + col_exists))) { + LOG_WARN("check word count col failed", K(ret)); + } else if (!col_exists) { + ObColumnSchemaV2 column_schema; + SMART_VAR(char[OB_MAX_DEFAULT_VALUE_LENGTH], ft_expr_def) { + MEMSET(ft_expr_def, 0, sizeof(ft_expr_def)); + ObCollationType collation_type = CS_TYPE_INVALID; + int64_t def_pos = 0; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(databuff_printf(ft_expr_def, + OB_MAX_DEFAULT_VALUE_LENGTH, + def_pos, + "WORD_COUNT("))) { + LOG_WARN("print generate expr definition prefix failed", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < index_arg->index_columns_.count(); ++i) { + const ObString &column_name = index_arg->index_columns_.at(i).column_name_; + const ObColumnSchemaV2 *col_schema = nullptr; + if (column_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(column_name)); + } else if (OB_ISNULL(col_schema = data_schema.get_column_schema(column_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, column_name.length(), + column_name.ptr()); + } else if (OB_FAIL(column_schema.add_cascaded_column_id(col_schema->get_column_id()))) { + LOG_WARN("add cascaded column to generated column failed", K(ret)); + } else if (OB_FAIL(databuff_printf(ft_expr_def, + OB_MAX_DEFAULT_VALUE_LENGTH, + def_pos, + "`%s`, ", + col_schema->get_column_name()))) { + LOG_WARN("print column name to buffer failed", K(ret)); + } else if (CS_TYPE_INVALID == collation_type) { + collation_type = col_schema->get_collation_type(); + } else if (collation_type != col_schema->get_collation_type()) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "create fulltext index on columns with different collation"); + } + } + if (OB_SUCC(ret)) { + def_pos -= 2; // remove last ", " + if (OB_FAIL(databuff_printf(ft_expr_def, + OB_MAX_DEFAULT_VALUE_LENGTH, + def_pos, + ")"))) { + LOG_WARN("print generate expr definition suffix failed", K(ret)); + } else { + ObObj default_value; + default_value.set_varchar(ft_expr_def, static_cast(def_pos)); + column_schema.set_rowkey_position(0); //非主键列 + column_schema.set_index_position(0); //非索引列 + column_schema.set_tbl_part_key_pos(0); //非partition key + column_schema.set_tenant_id(data_schema.get_tenant_id()); + column_schema.set_table_id(data_schema.get_table_id()); + column_schema.set_column_id(col_id); + column_schema.add_column_flag(GENERATED_FTS_WORD_COUNT_COLUMN_FLAG); + column_schema.add_column_flag(VIRTUAL_GENERATED_COLUMN_FLAG); + column_schema.set_is_hidden(true); + column_schema.set_data_type(ObUInt64Type); + column_schema.set_collation_type(CS_TYPE_UTF8MB4_GENERAL_CI); + column_schema.set_prev_column_id(UINT64_MAX); + column_schema.set_next_column_id(UINT64_MAX); + if (OB_FAIL(column_schema.set_column_name(col_name_buf))) { + LOG_WARN("set column name failed", K(ret)); + } else if (OB_FAIL(column_schema.set_orig_default_value(default_value))) { + LOG_WARN("set orig default value failed", K(ret)); + } else if (OB_FAIL(column_schema.set_cur_default_value(default_value))) { + LOG_WARN("set current default value failed", K(ret)); + } else if (OB_FAIL(data_schema.add_column(column_schema))) { + LOG_WARN("add word_count column schema to data table failed", K(ret)); + } else { + word_count_col = data_schema.get_column_schema(column_schema.get_column_id()); + if (OB_ISNULL(word_count_col)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("generate word count col failed", K(ret), KP(word_count_col)); + } else { + LOG_INFO("succeed to generate word count column", K(col_id), K(data_schema)); + } + } + } + } + } + } + return ret; +} + +int ObFtsIndexBuilderUtil::generate_doc_length_column( + const obrpc::ObCreateIndexArg *index_arg, + const uint64_t col_id, + ObTableSchema &data_schema, // not const since will add column to data schema + ObColumnSchemaV2 *&doc_length_col) +{ + int ret = OB_SUCCESS; + doc_length_col = nullptr; + char col_name_buf[OB_MAX_COLUMN_NAME_LENGTH] = {'\0'}; + int64_t name_pos = 0; + bool col_exists = false; + if (OB_ISNULL(index_arg) + || OB_UNLIKELY(!share::schema::is_fts_index_aux(index_arg->index_type_)) + || OB_UNLIKELY(!data_schema.is_valid()) + || OB_UNLIKELY(col_id == OB_INVALID_ID)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KPC(index_arg), K(data_schema), K(col_id)); + } else if (OB_FAIL(construct_doc_length_col_name(index_arg, + data_schema, + col_name_buf, + OB_MAX_COLUMN_NAME_LENGTH, + name_pos))) { + LOG_WARN("fail to construct document length column name", K(ret)); + } else if (OB_FAIL(check_fts_gen_col(data_schema, col_id, col_name_buf, name_pos, col_exists))) { + LOG_WARN("fail to check document count", K(ret), K(col_id)); + } else if (!col_exists) { + ObColumnSchemaV2 column_schema; + SMART_VAR(char[OB_MAX_DEFAULT_VALUE_LENGTH], ft_expr_def) { + MEMSET(ft_expr_def, 0, sizeof(ft_expr_def)); + ObCollationType collation_type = CS_TYPE_INVALID; + int64_t def_pos = 0; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(databuff_printf(ft_expr_def, + OB_MAX_DEFAULT_VALUE_LENGTH, + def_pos, + "DOC_LENGTH("))) { + LOG_WARN("print generate expr definition prefix failed", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < index_arg->index_columns_.count(); ++i) { + const ObString &column_name = index_arg->index_columns_.at(i).column_name_; + const ObColumnSchemaV2 *col_schema = nullptr; + if (column_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(column_name)); + } else if (OB_ISNULL(col_schema = data_schema.get_column_schema(column_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, column_name.length(), + column_name.ptr()); + } else if (OB_FAIL(column_schema.add_cascaded_column_id(col_schema->get_column_id()))) { + LOG_WARN("add cascaded column to generated column failed", K(ret)); + } else if (OB_FAIL(databuff_printf(ft_expr_def, + OB_MAX_DEFAULT_VALUE_LENGTH, + def_pos, + "`%s`, ", + col_schema->get_column_name()))) { + LOG_WARN("print column name to buffer failed", K(ret)); + } else if (CS_TYPE_INVALID == collation_type) { + collation_type = col_schema->get_collation_type(); + } else if (collation_type != col_schema->get_collation_type()) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "create fulltext index on columns with different collation"); + } + } + if (OB_SUCC(ret)) { + def_pos -= 2; // remove last ", " + if (OB_FAIL(databuff_printf(ft_expr_def, + OB_MAX_DEFAULT_VALUE_LENGTH, + def_pos, + ")"))) { + LOG_WARN("print generate expr definition suffix failed", K(ret)); + } else { + ObObj default_value; + default_value.set_varchar(ft_expr_def, static_cast(def_pos)); + column_schema.set_rowkey_position(0); //非主键列 + column_schema.set_index_position(0); //非索引列 + column_schema.set_tbl_part_key_pos(0); //非partition key + column_schema.set_tenant_id(data_schema.get_tenant_id()); + column_schema.set_table_id(data_schema.get_table_id()); + column_schema.set_column_id(col_id); + column_schema.add_column_flag(GENERATED_FTS_DOC_LENGTH_COLUMN_FLAG); + column_schema.add_column_flag(VIRTUAL_GENERATED_COLUMN_FLAG); + column_schema.set_is_hidden(true); + column_schema.set_data_type(ObUInt64Type); + column_schema.set_collation_type(CS_TYPE_UTF8MB4_GENERAL_CI); + column_schema.set_prev_column_id(UINT64_MAX); + column_schema.set_next_column_id(UINT64_MAX); + if (OB_FAIL(column_schema.set_column_name(col_name_buf))) { + LOG_WARN("set column name failed", K(ret)); + } else if (OB_FAIL(column_schema.set_orig_default_value(default_value))) { + LOG_WARN("set orig default value failed", K(ret)); + } else if (OB_FAIL(column_schema.set_cur_default_value(default_value))) { + LOG_WARN("set current default value failed", K(ret)); + } else if (OB_FAIL(data_schema.add_column(column_schema))) { + LOG_WARN("add word_count column schema to data table failed", K(ret)); + } else { + doc_length_col = data_schema.get_column_schema(column_schema.get_column_id()); + if (OB_ISNULL(doc_length_col)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("generate word count col failed", K(ret), KP(doc_length_col)); + } else { + LOG_INFO("succeed to generate document length column", K(col_id), K(data_schema)); + } + } + } + } + } + } + return ret; +} + +int ObFtsIndexBuilderUtil::construct_doc_id_col_name( + char *col_name_buf, + const int64_t buf_len, + int64_t &name_pos) +{ + int ret = OB_SUCCESS; + name_pos = 0; + if (OB_ISNULL(col_name_buf)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(col_name_buf)); + } else { + MEMSET(col_name_buf, 0, buf_len); + if (OB_FAIL(databuff_printf(col_name_buf, + buf_len, + name_pos, + OB_DOC_ID_COLUMN_NAME))) { + LOG_WARN("print generate column name failed", K(ret)); + } else if (OB_FAIL(databuff_printf(col_name_buf, buf_len, name_pos, "_%lu", ObTimeUtility::current_time()))){ + LOG_WARN("fail to printf current time", K(ret)); + } + } + return ret; +} + +int ObFtsIndexBuilderUtil::construct_word_segment_col_name( + const ObCreateIndexArg *index_arg, + const ObTableSchema &data_schema, + char *col_name_buf, + const int64_t buf_len, + int64_t &name_pos) +{ + int ret = OB_SUCCESS; + name_pos = 0; + if (OB_ISNULL(index_arg) || + !share::schema::is_fts_index(index_arg->index_type_) || + !data_schema.is_valid() || + OB_ISNULL(col_name_buf)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(index_arg), K(data_schema), + K(col_name_buf)); + } else { + MEMSET(col_name_buf, 0, buf_len); + if (OB_FAIL(databuff_printf(col_name_buf, + buf_len, + name_pos, + OB_WORD_SEGMENT_COLUMN_NAME_PREFIX))) { + LOG_WARN("print generate column prefix name failed", K(ret)); + } + const ObColumnSchemaV2 *col_schema = NULL; + for (int64_t i = 0; OB_SUCC(ret) && i < index_arg->index_columns_.count(); ++i) { + const ObString &column_name = index_arg->index_columns_.at(i).column_name_; + if (column_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(column_name)); + } else if (OB_ISNULL(col_schema = data_schema.get_column_schema(column_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, column_name.length(), + column_name.ptr()); + } else if (OB_FAIL(databuff_printf(col_name_buf, + buf_len, + name_pos, + "_%ld", + col_schema->get_column_id()))) { + LOG_WARN("print column id to buffer failed", K(ret), K(col_schema->get_column_id())); + } + } + if (FAILEDx(databuff_printf(col_name_buf, buf_len, name_pos, "_%lu", ObTimeUtility::current_time()))){ + LOG_WARN("fail to printf current time", K(ret)); + } + } + return ret; +} + +int ObFtsIndexBuilderUtil::construct_word_count_col_name( + const ObCreateIndexArg *index_arg, + const ObTableSchema &data_schema, + char *col_name_buf, + const int64_t buf_len, + int64_t &name_pos) +{ + int ret = OB_SUCCESS; + name_pos = 0; + if (OB_ISNULL(index_arg) || + !share::schema::is_fts_index(index_arg->index_type_) || + !data_schema.is_valid() || + OB_ISNULL(col_name_buf)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(index_arg), K(data_schema), + K(col_name_buf)); + } else { + MEMSET(col_name_buf, 0, buf_len); + if (OB_FAIL(databuff_printf(col_name_buf, + buf_len, + name_pos, + OB_WORD_COUNT_COLUMN_NAME_PREFIX))) { + LOG_WARN("print generate column prefix name failed", K(ret)); + } + const ObColumnSchemaV2 *col_schema = NULL; + for (int64_t i = 0; OB_SUCC(ret) && i < index_arg->index_columns_.count(); ++i) { + const ObString &column_name = index_arg->index_columns_.at(i).column_name_; + if (column_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(column_name)); + } else if (OB_ISNULL(col_schema = data_schema.get_column_schema(column_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, column_name.length(), + column_name.ptr()); + } else if (OB_FAIL(databuff_printf(col_name_buf, + buf_len, + name_pos, + "_%ld", + col_schema->get_column_id()))) { + LOG_WARN("print column id to buffer failed", K(ret), K(col_schema->get_column_id())); + } + } + if (FAILEDx(databuff_printf(col_name_buf, buf_len, name_pos, "_%lu", ObTimeUtility::current_time()))){ + LOG_WARN("fail to printf current time", K(ret)); + } + } + return ret; +} + +int ObFtsIndexBuilderUtil::construct_doc_length_col_name( + const obrpc::ObCreateIndexArg *index_arg, + const ObTableSchema &data_schema, + char *col_name_buf, + const int64_t buf_len, + int64_t &name_pos) +{ + int ret = OB_SUCCESS; + name_pos = 0; + if (OB_ISNULL(index_arg) + || OB_UNLIKELY(!share::schema::is_fts_index(index_arg->index_type_)) + || OB_UNLIKELY(!data_schema.is_valid()) + || OB_ISNULL(col_name_buf)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KPC(index_arg), K(data_schema), K(col_name_buf)); + } else { + MEMSET(col_name_buf, 0, buf_len); + if (OB_FAIL(databuff_printf(col_name_buf, + buf_len, + name_pos, + OB_DOC_LENGTH_COLUMN_NAME_PREFIX))) { + LOG_WARN("fail to printf document length column", K(ret)); + } + const ObColumnSchemaV2 *col_schema = nullptr; + for (int64_t i = 0; OB_SUCC(ret) && i < index_arg->index_columns_.count(); ++i) { + const ObString &column_name = index_arg->index_columns_.at(i).column_name_; + if (column_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(column_name)); + } else if (OB_ISNULL(col_schema = data_schema.get_column_schema(column_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, column_name.length(), column_name.ptr()); + } else if (OB_FAIL(databuff_printf(col_name_buf, + buf_len, + name_pos, + "_%ld", + col_schema->get_column_id()))) { + LOG_WARN("fail to printf document length column", K(ret), K(col_schema->get_column_id())); + } + } + if (FAILEDx(databuff_printf(col_name_buf, buf_len, name_pos, "_%lu", ObTimeUtility::current_time()))){ + LOG_WARN("fail to printf current time", K(ret)); + } + } + return ret; +} + +int ObFtsIndexBuilderUtil::check_fts_gen_col( + const ObTableSchema &data_schema, + const uint64_t col_id, + const char *col_name_buf, + const int64_t name_pos, + bool &col_exists) +{ + int ret = OB_SUCCESS; + col_exists = false; + if (!data_schema.is_valid() || + OB_INVALID_ID == col_id || + OB_ISNULL(col_name_buf) || + name_pos < 0) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(data_schema), K(col_id), + KP(col_name_buf), K(name_pos)); + } else { + // another fulltext index could have created the generated column + const ObColumnSchemaV2 *ft_col = data_schema.get_column_schema(col_name_buf); + if (OB_NOT_NULL(ft_col) && ft_col->get_column_id() != col_id) { + // check the specified column id is consistent with the existed column schema + ret = OB_ERR_INVALID_COLUMN_ID; + LOG_USER_ERROR(OB_ERR_INVALID_COLUMN_ID, static_cast(name_pos), + col_name_buf); + LOG_WARN("Column id specified by create fulltext index mismatch " + "with column schema id", K(ret), K(col_id), K(*ft_col)); + } else if (OB_ISNULL(ft_col) && OB_NOT_NULL(data_schema.get_column_schema(col_id))) { + // check the specified column id is not used by others + ret = OB_ERR_INVALID_COLUMN_ID; + LOG_USER_ERROR(OB_ERR_INVALID_COLUMN_ID, static_cast(name_pos), + col_name_buf); + LOG_WARN("Column id specified by create fulltext index has been used", + K(ret), K(col_id)); + } + if (OB_FAIL(ret)) { + // do nothing + } else if (OB_NOT_NULL(ft_col)) { + // the generated colum is created + col_exists = true; + if (OB_UNLIKELY(!ft_col->has_column_flag(GENERATED_FTS_WORD_SEGMENT_COLUMN_FLAG))) { + ret = OB_ERR_COLUMN_DUPLICATE; + LOG_USER_ERROR(OB_ERR_COLUMN_DUPLICATE, static_cast(name_pos), + col_name_buf); + LOG_WARN("Generate column name has been used", K(ret), K(*ft_col)); + } + } else { + col_exists = false; + } + } + return ret; +} + +int ObFtsIndexBuilderUtil::get_doc_id_col( + const ObTableSchema &data_schema, + const ObColumnSchemaV2 *&doc_id_col) +{ + int ret = OB_SUCCESS; + doc_id_col = nullptr; + if (!data_schema.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(data_schema)); + } else { + for (ObTableSchema::const_column_iterator iter = data_schema.column_begin(); + OB_SUCC(ret) && OB_ISNULL(doc_id_col) && iter != data_schema.column_end(); + iter++) { + const ObColumnSchemaV2 *column_schema = *iter; + if (OB_ISNULL(column_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, column schema is nullptr", K(ret), K(data_schema)); + } else if (column_schema->is_doc_id_column()) { + doc_id_col = column_schema; + } + } + } + return ret; +} + +int ObFtsIndexBuilderUtil::get_word_segment_col( + const ObTableSchema &data_schema, + const obrpc::ObCreateIndexArg *index_arg, + const ObColumnSchemaV2 *&word_segment_col) +{ + int ret = OB_SUCCESS; + schema::ColumnReferenceSet index_col_set; + word_segment_col = nullptr; + if (!data_schema.is_valid() || + OB_ISNULL(index_arg) || + !share::schema::is_fts_index(index_arg->index_type_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(data_schema), KPC(index_arg)); + } else if (OB_FAIL(get_index_column_ids(data_schema, *index_arg, index_col_set))) { + LOG_WARN("fail to get index column ids", K(ret), K(data_schema), KPC(index_arg)); + } else { + for (ObTableSchema::const_column_iterator iter = data_schema.column_begin(); + OB_SUCC(ret) && OB_ISNULL(word_segment_col) && iter != data_schema.column_end(); + iter++) { + const ObColumnSchemaV2 *column_schema = *iter; + if (OB_ISNULL(column_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, column schema is nullptr", K(ret), K(data_schema)); + } else if (column_schema->is_word_segment_column()) { + bool is_match = false; + if (OB_FAIL(check_index_match(*column_schema, index_col_set, is_match))) { + LOG_WARN("fail to check index match", K(ret), KPC(column_schema), K(index_col_set)); + } else if (is_match) { + word_segment_col = column_schema; + } + } + } + } + return ret; +} + +int ObFtsIndexBuilderUtil::get_word_cnt_col( + const ObTableSchema &data_schema, + const obrpc::ObCreateIndexArg *index_arg, + const ObColumnSchemaV2 *&word_cnt_col) +{ + int ret = OB_SUCCESS; + schema::ColumnReferenceSet index_col_set; + word_cnt_col = nullptr; + if (!data_schema.is_valid() || + OB_ISNULL(index_arg) || + !share::schema::is_fts_index(index_arg->index_type_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(data_schema), KPC(index_arg)); + } else if (OB_FAIL(get_index_column_ids(data_schema, *index_arg, index_col_set))) { + LOG_WARN("fail to get index column ids", K(ret), K(data_schema), KPC(index_arg)); + } else { + for (ObTableSchema::const_column_iterator iter = data_schema.column_begin(); + OB_SUCC(ret) && OB_ISNULL(word_cnt_col) && iter != data_schema.column_end(); + iter++) { + const ObColumnSchemaV2 *column_schema = *iter; + if (OB_ISNULL(column_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, column schema is nullptr", K(ret), K(data_schema)); + } else if (column_schema->is_word_count_column()) { + bool is_match = false; + if (OB_FAIL(check_index_match(*column_schema, index_col_set, is_match))) { + LOG_WARN("fail to check index match", K(ret), KPC(column_schema), K(index_col_set)); + } else if (is_match) { + word_cnt_col = column_schema; + } + } + } + } + return ret; +} + +int ObFtsIndexBuilderUtil::get_doc_length_col( + const ObTableSchema &data_schema, + const obrpc::ObCreateIndexArg *index_arg, + const ObColumnSchemaV2 *&doc_len_col) +{ + int ret = OB_SUCCESS; + schema::ColumnReferenceSet index_col_set; + doc_len_col = nullptr; + if (OB_UNLIKELY(!data_schema.is_valid()) + || OB_ISNULL(index_arg) + || OB_UNLIKELY(!share::schema::is_fts_index(index_arg->index_type_))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(data_schema), KPC(index_arg)); + } else if (OB_FAIL(get_index_column_ids(data_schema, *index_arg, index_col_set))) { + LOG_WARN("fail to get index column ids", K(ret), K(data_schema), KPC(index_arg)); + } else { + for (ObTableSchema::const_column_iterator iter = data_schema.column_begin(); + OB_SUCC(ret) && OB_ISNULL(doc_len_col) && iter != data_schema.column_end(); + iter++) { + const ObColumnSchemaV2 *column_schema = *iter; + if (OB_ISNULL(column_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, column schema is nullptr", K(ret), K(data_schema)); + } else if (column_schema->is_doc_length_column()) { + bool is_match = false; + if (OB_FAIL(check_index_match(*column_schema, index_col_set, is_match))) { + LOG_WARN("fail to check index match", K(ret), KPC(column_schema), K(index_col_set)); + } else if (is_match) { + doc_len_col = column_schema; + } + } + } + } + return ret; +} + +int ObFtsIndexBuilderUtil::push_back_gen_col( + ObIArray &cols, + const ObColumnSchemaV2 *existing_col, + ObColumnSchemaV2 *generated_col) +{ + int ret = OB_SUCCESS; + if (OB_NOT_NULL(existing_col)) { + if (OB_FAIL(cols.push_back(existing_col))) { + LOG_WARN("failed to push back existing col", K(ret)); + } + } else { + if (OB_ISNULL(generated_col)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("generated col is nullptr", K(ret)); + } else if (OB_FAIL(cols.push_back(generated_col))) { + LOG_WARN("failed to push back generated col", K(ret)); + } + } + return ret; +} + +int ObFtsIndexBuilderUtil::generate_fts_parser_name( + obrpc::ObCreateIndexArg &arg, + ObIAllocator *allocator) +{ + int ret = OB_SUCCESS; + char *name_buf = nullptr; + share::schema::ObIndexType type = arg.index_type_; + if (OB_ISNULL(allocator)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator is nullptr", K(ret), KP(allocator)); + } else if (OB_UNLIKELY(!share::schema::is_fts_index_aux(type) + && !share::schema::is_fts_doc_word_aux(type))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(type)); + } else if (OB_ISNULL(name_buf = static_cast(allocator->alloc(OB_PLUGIN_NAME_LENGTH)))) { + ret = common::OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc name buffer", K(ret)); + } else { + share::ObPluginName parser_name; + storage::ObFTParser parser; + const char *name_str = nullptr; + if (arg.index_option_.parser_name_.empty()) { + name_str = common::OB_DEFAULT_FULLTEXT_PARSER_NAME; + } else { + name_str = arg.index_option_.parser_name_.ptr(); + } + if (OB_FAIL(parser_name.set_name(name_str))) { + LOG_WARN("fail to set plugin name", K(ret), KCSTRING(name_str)); + } else if (OB_FAIL(OB_FT_PLUGIN_MGR.get_ft_parser(parser_name, parser))) { + LOG_WARN("fail to get fulltext parser", K(ret), K(parser_name)); + } else if (OB_FAIL(parser.serialize_to_str(name_buf, OB_PLUGIN_NAME_LENGTH))) { + LOG_WARN("fail to serialize to cstring", K(ret), K(parser)); + } else { + arg.index_option_.parser_name_ = common::ObString::make_string(name_buf); + } + } + if (OB_FAIL(ret) && OB_NOT_NULL(name_buf)) { + allocator->free(name_buf); + } + return ret; +} + +int ObFtsIndexBuilderUtil::get_index_column_ids( + const ObTableSchema &data_schema, + const obrpc::ObCreateIndexArg &arg, + schema::ColumnReferenceSet &index_column_ids) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!share::schema::is_fts_index(arg.index_type_) || !data_schema.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(arg), K(data_schema)); + } else { + const ObColumnSchemaV2 *col_schema = nullptr; + for (int64_t i = 0; OB_SUCC(ret) && i < arg.index_columns_.count(); ++i) { + const ObString &column_name = arg.index_columns_.at(i).column_name_; + if (column_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, column name is empty", K(ret), K(column_name)); + } else if (OB_ISNULL(col_schema = data_schema.get_column_schema(column_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, column_name.length(), column_name.ptr()); + } else if (OB_FAIL(index_column_ids.add_member(col_schema->get_column_id()))) { + LOG_WARN("fail to add index column id", K(ret), K(col_schema->get_column_id())); + } + } + } + return ret; +} +int ObFtsIndexBuilderUtil::check_index_match( + const schema::ObColumnSchemaV2 &column, + const schema::ColumnReferenceSet &index_column_ids, + bool &is_match) +{ + int ret = OB_SUCCESS; + ObSEArray cascaded_col_ids; + is_match = false; + if (OB_UNLIKELY(!column.is_valid() || index_column_ids.is_empty())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(column), K(index_column_ids)); + } else if (OB_FAIL(column.get_cascaded_column_ids(cascaded_col_ids))) { + LOG_WARN("fail to get cascaded column ids", K(ret), K(column)); + } else if (cascaded_col_ids.count() == index_column_ids.num_members()) { + bool mismatch = false; + for (int64_t i = 0; !mismatch && i < cascaded_col_ids.count(); ++i) { + if (!index_column_ids.has_member(cascaded_col_ids.at(i))) { + mismatch = true; + } + } + is_match = !mismatch; + } + return ret; +} + +int ObMulValueIndexBuilderUtil::generate_mulvalue_index_name( + obrpc::ObCreateIndexArg &arg, + ObIAllocator *allocator) +{ + int ret = OB_SUCCESS; + char *name_buf = nullptr; + share::schema::ObIndexType type = arg.index_type_; + if (OB_ISNULL(allocator)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator is nullptr", K(ret)); + } else if (!is_multivalue_index_aux(type)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(type)); + } else if (OB_ISNULL(name_buf = static_cast(allocator->alloc(OB_MAX_TABLE_NAME_LENGTH)))) { + ret = common::OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc mem", K(ret)); + } else { + MEMSET(name_buf, 0, OB_MAX_TABLE_NAME_LENGTH); + int64_t pos = 0; + if (OB_FAIL(databuff_printf(name_buf, + OB_MAX_TABLE_NAME_LENGTH, + pos, + "%.*s", + arg.index_name_.length(), + arg.index_name_.ptr()))) { + LOG_WARN("failed to print", K(ret)); + } + if (OB_SUCC(ret)) { + arg.index_name_.assign_ptr(name_buf, static_cast(pos)); + } else { + LOG_WARN("failed to generate multivalue aux index name", K(ret), K(type)); + } + } + return ret; +} + +int ObMulValueIndexBuilderUtil::construct_mulvalue_col_name( + const ObCreateIndexArg *index_arg, + const ObTableSchema &data_schema, + bool is_budy_column, + char *col_name_buf, + const int64_t buf_len, + int64_t &name_pos) +{ + int ret = OB_SUCCESS; + name_pos = 0; + if (OB_ISNULL(index_arg) || + !is_multivalue_index(index_arg->index_type_) || + !data_schema.is_valid() || + OB_ISNULL(col_name_buf)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(index_arg), K(data_schema), + K(col_name_buf)); + } else { + MEMSET(col_name_buf, 0, buf_len); + + const ObColumnSchemaV2 *col_schema = NULL; + for (int64_t i = 0; OB_SUCC(ret) && i < index_arg->index_columns_.count(); ++i) { + bool is_define_mv_expr = false; + const ObString &column_name = index_arg->index_columns_.at(i).column_name_; + ObString define_string; + if (column_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(column_name)); + } else if (OB_ISNULL(col_schema = data_schema.get_column_schema(column_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + } else if ((!is_budy_column && col_schema->is_multivalue_generated_column()) || + (is_budy_column && col_schema->is_multivalue_generated_array_column())) { + ObString column_name = col_schema->get_column_name_str(); + if (OB_FAIL(databuff_printf(col_name_buf, OB_MAX_COLUMN_NAME_LENGTH, name_pos, + "%s", column_name.ptr()))) { + LOG_WARN("column name write failed", K(ret), K(column_name)); + } + } + } + } + + return ret; +} + +int ObMulValueIndexBuilderUtil::append_mulvalue_arg( + const ObCreateIndexArg &index_arg, + ObIAllocator *allocator, + ObIArray &index_arg_list) +{ + int ret = OB_SUCCESS; + ObCreateIndexArg multivlaue_arg; + if (OB_ISNULL(allocator) || + !is_multivalue_index(index_arg.index_type_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator is nullptr", K(ret), K(index_arg.index_type_)); + } else if (OB_FAIL(multivlaue_arg.assign(index_arg))) { + LOG_WARN("failed to assign to multivalue arg", K(ret)); + } else if (OB_FAIL(generate_mulvalue_index_name(multivlaue_arg, allocator))) { + LOG_WARN("failed to generate multivalue aux index name", K(ret)); + } else if (OB_FAIL(index_arg_list.push_back(multivlaue_arg))) { + LOG_WARN("failed to push back multivalue arg", K(ret)); + } + return ret; +} + + +int ObMulValueIndexBuilderUtil::is_multivalue_index_type( + const ObString& column_string, + bool& is_multi_value_index) +{ + INIT_SUCC(ret); + + char* buf = nullptr; + if (column_string.length() == 0 || column_string.length() > OB_MAX_COLUMN_NAMES_LENGTH) { + } else { + SMART_VAR(char[OB_MAX_COLUMN_NAMES_LENGTH * 2], buf) { + MEMCPY(buf, column_string.ptr(), column_string.length()); + buf[column_string.length()] = 0; + + std::regex pattern(R"(CAST\s*\(\s*.*\s*as\s*.*\s*array\s*\))", std::regex_constants::icase); + if (std::regex_match(buf, pattern)) { + is_multi_value_index = true; + } else { + std::regex pattern1(R"(JSON_QUERY\s*\(\s*.*\s*ASIS\s*.*\s*MULTIVALUE\s*\))", std::regex_constants::icase); + if (std::regex_match(buf, pattern1)) { + is_multi_value_index = true; + } + } + } + } + + return ret; +} + +int ObMulValueIndexBuilderUtil::adjust_index_type(const ObString& column_string, + bool& is_multi_value_index, + int* index_keyname) +{ + INIT_SUCC(ret); + + char* buf = nullptr; + if (OB_ISNULL(index_keyname)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("null param input", K(ret)); + } else if (OB_FAIL(is_multivalue_index_type(column_string, is_multi_value_index))) { + LOG_WARN("failed to resolve index type", K(ret), K(column_string)); + } else if (!is_multi_value_index) { + } else if (*index_keyname == static_cast(sql::ObDDLResolver::NORMAL_KEY)) { + *index_keyname = static_cast(sql::ObDDLResolver::MULTI_KEY); + } else if (*index_keyname == static_cast(sql::ObDDLResolver::UNIQUE_KEY)) { + *index_keyname = static_cast(sql::ObDDLResolver::MULTI_UNIQUE_KEY); + } + return ret; +} + +int ObMulValueIndexBuilderUtil::get_mulvalue_col( + const ObTableSchema &data_schema, + const obrpc::ObCreateIndexArg *index_arg, + const ObColumnSchemaV2 *&mulvalue_col, + const ObColumnSchemaV2 *&budy_mulvalue_col) +{ + int ret = OB_SUCCESS; + mulvalue_col = nullptr; + budy_mulvalue_col = nullptr; + + int64_t name_pos = 0; + int64_t budy_name_pos = 0; + + char col_name_buf[OB_MAX_COLUMN_NAME_LENGTH] = {'\0'}; + char budy_col_name_buf[OB_MAX_COLUMN_NAME_LENGTH] = {'\0'}; + + if (!data_schema.is_valid() || + OB_ISNULL(index_arg) || + !is_multivalue_index(index_arg->index_type_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(data_schema), KP(index_arg)); + } else if (OB_FAIL(construct_mulvalue_col_name(index_arg, + data_schema, + false, // not budy column, scalar column + col_name_buf, + OB_MAX_COLUMN_NAME_LENGTH, + name_pos))) { + if (ret != OB_ERR_KEY_COLUMN_DOES_NOT_EXITS) { + LOG_WARN("failed to construct multivalue column name", K(ret)); + } else { + ret = OB_SUCCESS; + } + } else if (OB_FAIL(construct_mulvalue_col_name(index_arg, + data_schema, + true, // is budy column, array column + budy_col_name_buf, + OB_MAX_COLUMN_NAME_LENGTH, + budy_name_pos))) { + LOG_WARN("failed to construct budy multivalue column name", K(ret)); + } else { + ObString mulvalue_col_name(name_pos, col_name_buf); + mulvalue_col = data_schema.get_column_schema(mulvalue_col_name); + + ObString budy_mulvalue_col_name(budy_name_pos, budy_col_name_buf); + budy_mulvalue_col = data_schema.get_column_schema(budy_mulvalue_col_name); + } + return ret; +} + +int ObMulValueIndexBuilderUtil::adjust_mulvalue_index_args( + obrpc::ObCreateIndexArg &index_arg, + ObTableSchema &data_schema, // not const since will add column to data schema + ObIArray &gen_columns) +{ + int ret = OB_SUCCESS; + const ObIndexType &index_type = index_arg.index_type_; + const ObColumnSchemaV2 *existing_doc_id_col = nullptr; + const ObColumnSchemaV2 *existing_mulvalue_col = nullptr; + const ObColumnSchemaV2 *existing_budy_mulvalue_col = nullptr; + ObArray tmp_cols; + uint64_t available_col_id = 0; + + if (!data_schema.is_valid() || !is_multivalue_index(index_type)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(data_schema), K(index_type)); + } else if (FALSE_IT(available_col_id = data_schema.get_max_used_column_id() + 1)) { + } else if (OB_FAIL(ObFtsIndexBuilderUtil::get_doc_id_col(data_schema, existing_doc_id_col))) { + LOG_WARN("failed to get doc id col", K(ret)); + } else if (OB_FAIL(get_mulvalue_col(data_schema, &index_arg, existing_mulvalue_col, existing_budy_mulvalue_col))) { + LOG_WARN("failed to get multivalue col", K(ret)); + } else { + ObColumnSchemaV2 *generated_doc_id_col = nullptr; + ObColumnSchemaV2 *generated_mulvalue_col = nullptr; + ObColumnSchemaV2 *generated_budy_mulvalue_col = nullptr; + if (OB_ISNULL(existing_doc_id_col)) { + uint64_t doc_id_col_id = available_col_id++; + if (OB_FAIL(ObFtsIndexBuilderUtil::generate_doc_id_column(&index_arg, + doc_id_col_id, + data_schema, + generated_doc_id_col))) { + LOG_WARN("failed to generate doc id column", K(ret)); + } else if (OB_FAIL(gen_columns.push_back(generated_doc_id_col))) { + LOG_WARN("failed to push back doc id col", K(ret)); + } + } + + if (OB_SUCC(ret) && OB_ISNULL(existing_mulvalue_col)) { + if (OB_FAIL(build_and_generate_multivalue_column_raw(index_arg, data_schema, + generated_mulvalue_col, generated_budy_mulvalue_col))) { + LOG_WARN("failed to build and generate multi value generated column", K(ret)); + } else if (OB_FAIL(gen_columns.push_back(generated_mulvalue_col))) { + LOG_WARN("failed to push back multi value col", K(ret)); + } else if (OB_FAIL(gen_columns.push_back(generated_budy_mulvalue_col))) { + LOG_WARN("failed to push back multi value col", K(ret)); + } + } + } + return ret; +} + +int ObMulValueIndexBuilderUtil::build_and_generate_multivalue_column_raw( + ObCreateIndexArg &arg, + ObTableSchema &data_schema, + ObColumnSchemaV2 *&mulvalue_col, + ObColumnSchemaV2 *&budy_mulvalue_col) +{ + int ret = OB_SUCCESS; + mulvalue_col = nullptr; + budy_mulvalue_col = nullptr; + + ObIArray &sort_items = arg.index_columns_; + ObString expr_def_string; + + bool is_oracle_mode = false; + bool is_add_column = false; + if (OB_FAIL(data_schema.check_if_oracle_compat_mode(is_oracle_mode))) { + LOG_WARN("check_if_oracle_compat_mode failed", K(ret)); + } else if (is_oracle_mode) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("oracle mode create index not supported yet.", K(ret)); + } + + int64_t expr_idx = 0; + for (; OB_SUCC(ret) && expr_idx < sort_items.count(); ++expr_idx) { + ObColumnSortItem& sort_item = sort_items.at(expr_idx); + bool is_multi_value_index = false; + if (sort_item.prefix_len_ > 0) { + } else if (!sort_item.is_func_index_) { + } else if (OB_FAIL(is_multivalue_index_type(sort_item.column_name_, is_multi_value_index))) { + LOG_WARN("failed to calc index type", K(ret), K(sort_item.column_name_)); + } else if (is_multi_value_index) { + is_add_column = true; + expr_def_string = sort_item.column_name_; + // found multivalue index define, break + break; + } + } + + if (OB_SUCC(ret) && expr_def_string.length() > 0) { + ObColumnSortItem sort_item = sort_items.at(expr_idx); + const ObString &index_expr_def = expr_def_string; + ObArenaAllocator allocator(ObModIds::OB_SQL_EXPR); + ObRawExprFactory expr_factory(allocator); + + SMART_VARS_2((sql::ObSQLSessionInfo, session), + (sql::ObExecContext, exec_ctx, allocator)) { + uint64_t tenant_id = data_schema.get_tenant_id(); + const ObTenantSchema *tenant_schema = nullptr; + ObSchemaGetterGuard guard; + ObSchemaChecker schema_checker; + + ObRawExpr *expr = nullptr; + ObColumnSchemaV2 *gen_col = nullptr; + budy_mulvalue_col = nullptr; + + if (OB_FAIL(session.init(0 /*default session id*/, + 0 /*default proxy id*/, + &allocator))) { + LOG_WARN("init session failed", K(ret)); + } else if (OB_FAIL(session.set_default_database(arg.database_name_))) { + LOG_WARN("failed to set default session default database name", K(ret)); + } else if (OB_FAIL(GCTX.schema_service_->get_tenant_schema_guard(tenant_id, guard))) { + LOG_WARN("get schema guard failed", K(ret)); + } else if (OB_FAIL(schema_checker.init(guard))) { + LOG_WARN("failed to init schema checker", K(ret)); + } else if (OB_FAIL(guard.get_tenant_info(tenant_id, tenant_schema))) { + LOG_WARN("get tenant_schema failed", K(ret)); + } else if (OB_FAIL(session.init_tenant(tenant_schema->get_tenant_name_str(), tenant_id))) { + LOG_WARN("init tenant failed", K(ret)); + } else if (OB_FAIL(session.load_all_sys_vars(guard))) { + LOG_WARN("session load system variable failed", K(ret)); + } else if (OB_FAIL(session.load_default_configs_in_pc())) { + LOG_WARN("session load default configs failed", K(ret)); + } else if (OB_FAIL(build_and_generate_multivalue_column(sort_item, + expr_factory, + session, + data_schema, + &schema_checker, + gen_col, + budy_mulvalue_col))) { + LOG_WARN("session load default configs failed", K(ret)); + } else { + ObColumnSortItem& ref_item = arg.index_columns_.at(expr_idx); + ref_item = sort_item; + mulvalue_col = gen_col; + + // need add multivalue budy column + if (OB_ISNULL(budy_mulvalue_col)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("build generate multivalue column failed, budy_mulvalue_col is null", K(ret), KP(budy_mulvalue_col)); + } else if (is_add_column) { + ObColumnSortItem budy_item; + budy_item.is_func_index_ = true; + budy_item.column_name_ = budy_mulvalue_col->get_column_name_str(); + if (OB_FAIL(arg.index_columns_.push_back(budy_item))) { + LOG_WARN("failed to push back column item.", K(ret)); + } + } + } + } + } + + return ret; +} + +int ObMulValueIndexBuilderUtil::build_and_generate_multivalue_column( + ObColumnSortItem& sort_item, + ObRawExprFactory &expr_factory, + const ObSQLSessionInfo &session_info, + ObTableSchema &table_schema, + sql::ObSchemaChecker *schema_checker, + ObColumnSchemaV2 *&gen_col, + ObColumnSchemaV2 *&budy_col) +{ + INIT_SUCC(ret); + ObRawExpr *expr = nullptr; + if (OB_FAIL(ObRawExprUtils::build_generated_column_expr(nullptr, + sort_item.column_name_, + expr_factory, + session_info, + table_schema, + expr, + schema_checker, + ObResolverUtils::CHECK_FOR_FUNCTION_INDEX))) { + LOG_WARN("build generated column expr failed", K(ret)); + } else if (OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("build generated column expr is null", K(ret)); + } else if (!expr->is_sys_func_expr()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("multivalue generated expr should be function, not column ref.", K(ret)); + } else { + //real index expr, so generate hidden generated column in data table schema + if (OB_FAIL(generate_multivalue_column(*expr, table_schema, gen_col, budy_col, schema_checker->get_schema_guard()))) { + LOG_WARN("generate ordinary generated column failed", K(ret)); + } else if (OB_FAIL(ObRawExprUtils::check_generated_column_expr_str( + gen_col->get_cur_default_value().get_string(), session_info, table_schema))) { + LOG_WARN("fail to check printed generated column expr", K(ret)); + } else { + sort_item.column_name_ = gen_col->get_column_name_str(); + sort_item.is_func_index_ = true; + } + } + + return ret; +} + +int ObMulValueIndexBuilderUtil::generate_multivalue_column( + ObRawExpr &expr, + ObTableSchema &data_schema, + ObColumnSchemaV2 *&gen_col, + ObColumnSchemaV2 *&gen_budy_col, + ObSchemaGetterGuard *schema_guard) +{ + int ret = OB_SUCCESS; + ObColumnSchemaV2 multival_col; + SMART_VAR(char[OB_MAX_DEFAULT_VALUE_LENGTH], expr_def_buf) { + MEMSET(expr_def_buf, 0, sizeof(expr_def_buf)); + int64_t pos = 0; + ObRawExprPrinter expr_printer(expr_def_buf, OB_MAX_DEFAULT_VALUE_LENGTH, &pos, schema_guard); + if (OB_FAIL(expr_printer.do_print(&expr, T_NONE_SCOPE, true))) { + LOG_WARN("print expr definition failed", K(ret)); + } else { + // add check + ObString expr_def(pos, expr_def_buf); + ObColumnSchemaV2 *old_gen_col = NULL; + + size_t expr_str_len = strlen(expr_def_buf); + expr_def.assign_ptr(expr_def_buf, expr_str_len); + + if (OB_FAIL(data_schema.get_generated_column_by_define(expr_def, + true/*only hidden column*/, + old_gen_col))) { + LOG_WARN("get generated column by define failed", K(ret), K(expr_def)); + } else if (old_gen_col != NULL) { + //got it + gen_col = old_gen_col; + } else { + //need to add new generated column + ObObj default_value; + char col_name_buf[OB_MAX_COLUMN_NAMES_LENGTH] = {'\0'}; + pos = 0; + default_value.set_varchar(expr_def); + multival_col.set_rowkey_position(0); //非主键列 + multival_col.set_index_position(0); //非索引列 + multival_col.set_tbl_part_key_pos(0); //非partition key + multival_col.set_tenant_id(data_schema.get_tenant_id()); + multival_col.set_table_id(data_schema.get_table_id()); + multival_col.set_column_id(data_schema.get_max_used_column_id() + 1); + multival_col.add_column_flag(VIRTUAL_GENERATED_COLUMN_FLAG); + multival_col.add_column_flag(MULTIVALUE_INDEX_GENERATED_COLUMN_FLAG); + multival_col.set_is_hidden(true); + if (expr.get_result_type().is_null()) { + const ObAccuracy varchar_accuracy(0); + multival_col.set_data_type(ObVarcharType); + multival_col.set_collation_type(data_schema.get_collation_type()); + multival_col.set_accuracy(varchar_accuracy); + } else { + multival_col.set_data_type(expr.get_data_type()); + multival_col.set_collation_type(expr.get_collation_type()); + multival_col.set_accuracy(expr.get_accuracy()); + } + multival_col.set_prev_column_id(UINT64_MAX); + multival_col.set_next_column_id(UINT64_MAX); + ObSEArray dep_columns; + if (OB_FAIL(ObRawExprUtils::extract_column_exprs(&expr, dep_columns))) { + LOG_WARN("extract column exprs failed", K(ret), K(expr)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < dep_columns.count(); ++i) { + const ObRawExpr *dep_column = dep_columns.at(i); + if (OB_ISNULL(dep_column)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("deps_column is null"); + } else if (!dep_column->is_column_ref_expr()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("dep column is invalid", K(ret), KPC(dep_column)); + } else if (OB_FAIL(multival_col.add_cascaded_column_id( + static_cast(dep_column)->get_column_id()))) { + LOG_WARN("add cascaded column id failed", K(ret)); + } + } + + if (OB_FAIL(ret)) { + //do nothing + } else if (OB_FAIL(databuff_printf(col_name_buf, OB_MAX_COLUMN_NAMES_LENGTH, pos, + "__mvi_%ld", /*naming rules are compatible with oracle*/ + multival_col.get_column_id()))) { + LOG_WARN("print generate column prefix name failed", K(ret)); + } else if (OB_FAIL(multival_col.set_column_name(col_name_buf))) { + LOG_WARN("set column name failed", K(ret)); + } else if (OB_FAIL(multival_col.set_orig_default_value(default_value))) { + LOG_WARN("set orig default value failed", K(ret)); + } else if (OB_FAIL(multival_col.set_cur_default_value(default_value))) { + LOG_WARN("set current default value failed", K(ret)); + } else if (OB_FAIL(data_schema.add_column(multival_col))) { + LOG_WARN("add column schema to data table failed", K(ret)); + } else { + gen_col = data_schema.get_column_schema(multival_col.get_column_id()); + } + + ObColumnSchemaV2 multival_arr_col; + if (FAILEDx(multival_arr_col.assign(multival_col))) { + LOG_WARN("fail to assign multival arr col", K(ret), K(multival_col)); + } else { + multival_arr_col.set_column_id(data_schema.get_max_used_column_id() + 1); + multival_arr_col.del_column_flag(MULTIVALUE_INDEX_GENERATED_COLUMN_FLAG); + multival_arr_col.add_column_flag(MULTIVALUE_INDEX_GENERATED_ARRAY_COLUMN_FLAG); + + pos = 0; + ObObj default_value; + char col_name_buf[OB_MAX_COLUMN_NAMES_LENGTH] = {'\0'}; + snprintf(expr_def_buf + expr_str_len - 1, OB_MAX_DEFAULT_VALUE_LENGTH - (expr_str_len - 1), + "%s", " multivalue)"); + + expr_str_len = strlen(expr_def_buf); + expr_def.assign_ptr(expr_def_buf, expr_str_len); + default_value.set_varchar(expr_def); + + multival_arr_col.set_data_type(ObJsonType); + multival_arr_col.set_collation_type(CS_TYPE_UTF8MB4_BIN); + multival_arr_col.set_accuracy(ObAccuracy::DDL_DEFAULT_ACCURACY[ObJsonType]); + + if (OB_FAIL(databuff_printf(col_name_buf, OB_MAX_COLUMN_NAMES_LENGTH, pos, + "__mvi_arr_%ld", /*naming rules are compatible with oracle*/ + multival_arr_col.get_column_id()))) { + LOG_WARN("print generate column prefix name failed", K(ret)); + } else if (OB_FAIL(multival_arr_col.set_column_name(col_name_buf))) { + LOG_WARN("set column name failed", K(ret)); + } else if (OB_FAIL(multival_arr_col.set_orig_default_value(default_value))) { + LOG_WARN("set orig default value failed", K(ret)); + } else if (OB_FAIL(multival_arr_col.set_cur_default_value(default_value))) { + LOG_WARN("set current default value failed", K(ret)); + } else if (OB_FAIL(data_schema.add_column(multival_arr_col))) { + LOG_WARN("add column schema to data table failed", K(ret)); + } else { + gen_budy_col = data_schema.get_column_schema(multival_arr_col.get_column_id()); + } + } + } + } + } + return ret; +} + +int ObMulValueIndexBuilderUtil::inner_adjust_multivalue_arg( + ObCreateIndexArg &index_arg, + const ObTableSchema &data_schema, + ObColumnSchemaV2 *doc_id_col) +{ + int ret = OB_SUCCESS; + ObArray sort_items; + ObIAllocator *allocator = index_arg.index_schema_.get_allocator(); + + if (OB_ISNULL(allocator)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator is nullptr", K(ret)); + } else if (OB_FAIL(sort_items.assign(index_arg.index_columns_))) { + LOG_WARN("failed to assign old index columns", K(ret)); + } else { + index_arg.index_columns_.reuse(); + index_arg.store_columns_.reuse(); + } + + for (int i = 0; OB_SUCC(ret) && i < sort_items.count(); ++i) { + ObColumnSortItem new_sort_item; + ObColumnSortItem &sort_item = sort_items.at(i); + const ObString column_name = sort_item.column_name_; + const ObColumnSchemaV2 *col_schema = nullptr; + + if (OB_ISNULL(col_schema = data_schema.get_column_schema(column_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_WARN("failed to get col schema", K(ret), K(column_name)); + } else if (OB_FAIL(ob_write_string(*allocator, + col_schema->get_column_name_str(), + new_sort_item.column_name_))) { + //to keep the memory lifetime of column_name consistent with index_arg + LOG_WARN("deep copy column name failed", K(ret)); + } else if (OB_FAIL(index_arg.index_columns_.push_back(new_sort_item))) { + LOG_WARN("failed to push back index column", K(ret)); + } else if (col_schema->is_multivalue_generated_column()) { + const ObColumnSchemaV2 *budy_col_schema = nullptr; + ObColumnSortItem budy_sort_item; + if (OB_ISNULL(budy_col_schema = data_schema.get_column_schema(col_schema->get_column_id() + 1))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get budy column", K(ret)); + } else if (OB_FAIL(ob_write_string(*allocator, + budy_col_schema->get_column_name_str(), + budy_sort_item.column_name_))) { + //to keep the memory lifetime of column_name consistent with index_arg + LOG_WARN("deep copy column name failed", K(ret)); + } else if (OB_FAIL(index_arg.index_columns_.push_back(budy_sort_item))) { + LOG_WARN("failed to push back index column", K(ret)); + } + } + } + + const ObRowkeyInfo &rowkey_info = data_schema.get_rowkey_info(); + for (int64_t i = 0; OB_SUCC(ret) && i < rowkey_info.get_size(); ++i) { + ObColumnSortItem rowkey_column; + const ObColumnSchemaV2 *rowkey_col = NULL; + uint64_t column_id = OB_INVALID_ID; + if (OB_FAIL(rowkey_info.get_column_id(i, column_id))) { + LOG_WARN("get_column_id failed", "index", i, K(ret)); + } else if (NULL == (rowkey_col = data_schema.get_column_schema(column_id))) { + ret = OB_ERR_BAD_FIELD_ERROR; + LOG_WARN("get_column_schema failed", "table_id", + data_schema.get_table_id(), K(column_id), K(ret)); + } else if (OB_FAIL(ob_write_string(*allocator, + rowkey_col->get_column_name_str(), + rowkey_column.column_name_))) { + //to keep the memory lifetime of column_name consistent with index_arg + LOG_WARN("deep copy column name failed", K(ret)); + } else if (OB_FAIL(index_arg.index_columns_.push_back(rowkey_column))) { + LOG_WARN("failed to push back rowkey column", K(ret)); + } + } + + + ObColumnSortItem tmp_sort_item; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ob_write_string(*allocator, + doc_id_col->get_column_name_str(), + tmp_sort_item.column_name_))) { + //to keep the memory lifetime of column_name consistent with index_arg + LOG_WARN("deep copy column name failed", K(ret)); + } else if (OB_FAIL(index_arg.index_columns_.push_back(tmp_sort_item))) { + LOG_WARN("failed to push back index column", K(ret)); + } + + return ret; +} + +int ObMulValueIndexBuilderUtil::set_multivalue_index_table_columns( + const ObCreateIndexArg &arg, + const ObTableSchema &data_schema, + ObTableSchema &index_schema) +{ + int ret = OB_SUCCESS; + char col_name_buf[OB_MAX_COLUMN_NAME_LENGTH] = {'\0'}; + if (!data_schema.is_valid()) { + // expect word col, doc id col in index_columns, + // expect worc count col in store_columns. + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(data_schema), + K(arg.index_columns_.count()), K(arg.store_columns_.count()), + K(arg.index_columns_), K(arg.store_columns_)); + } + + HEAP_VAR(ObRowDesc, row_desc) { + common::ObOrderType order_type; + const ObColumnSchemaV2 *mvi_array_column = nullptr; + for (int64_t i = 0; OB_SUCC(ret) && i < arg.index_columns_.count(); ++i) { + const ObColumnSchemaV2 *mvi_column = nullptr; + const ObColumnSortItem &mvi_col_item = arg.index_columns_.at(i); + order_type = mvi_col_item.order_type_; + if (OB_ISNULL(mvi_column = data_schema.get_column_schema(mvi_col_item.column_name_))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, + mvi_col_item.column_name_.length(), mvi_col_item.column_name_.ptr()); + LOG_WARN("get_column_schema failed", "tenant_id", data_schema.get_tenant_id(), + "database_id", data_schema.get_database_id(), + "table_name", data_schema.get_table_name(), + "column name", mvi_col_item.column_name_, K(ret)); + } else if (!mvi_column->is_multivalue_generated_array_column()) { + if (OB_FAIL(ObIndexBuilderUtil::add_column(mvi_column, + true/*is_index_column*/, + true/*is_rowkey*/, + order_type, + row_desc, + index_schema, + false/*is_hidden*/, + false/*is_specified_storing_col*/))) { + LOG_WARN("add column failed", "mvi_column", *mvi_column, "rowkey_order_type", + mvi_col_item.order_type_, K(row_desc), K(ret)); + } + } else if (mvi_column->is_multivalue_generated_array_column()) { + mvi_array_column = mvi_column; + } + } + + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(mvi_array_column)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get multivalue array column", K(ret)); + } else if (index_schema.is_unique_index()) { + // json-array column is not index coumn, not rowkey column + index_schema.set_rowkey_column_num(row_desc.get_column_num()); + index_schema.set_index_column_num(row_desc.get_column_num()); + } + + bool is_rowkey = !index_schema.is_unique_index(); + bool is_index_column = is_rowkey; + + const ObColumnSchemaV2 *rowkey_column = nullptr; + const ObRowkeyInfo &rowkey_info = data_schema.get_rowkey_info(); + for (int64_t i = 0; OB_SUCC(ret) && i < rowkey_info.get_size(); ++i) { + uint64_t column_id = OB_INVALID_ID; + if (OB_FAIL(rowkey_info.get_column_id(i, column_id))) { + LOG_WARN("get_column_id failed", "index", i, K(ret)); + } else if (OB_ISNULL(rowkey_column = data_schema.get_column_schema(column_id))) { + ret = OB_ERR_BAD_FIELD_ERROR; + LOG_WARN("get_column_schema failed", "table_id", data_schema.get_table_id(), + K(column_id), K(ret)); + } else if (OB_FAIL(ObIndexBuilderUtil::add_column(rowkey_column, + is_index_column/*is_index_column*/, + is_rowkey /*is_rowkey*/, + rowkey_column->get_order_in_rowkey(), + row_desc, + index_schema, + false /*is_hidden*/, + true /*is_specified_storing_col*/))) { + LOG_WARN("add column failed", K(ret)); + } + } + + const ObColumnSchemaV2 *doc_id_col = nullptr; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObFtsIndexBuilderUtil::get_doc_id_col(data_schema, doc_id_col))) { + LOG_WARN("failed to get doc id col", K(ret)); + } else if (OB_ISNULL(doc_id_col)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get doc id col is null", K(ret)); + } else if (OB_FAIL(ObIndexBuilderUtil::add_column(doc_id_col, + is_index_column /*is_index_column*/, + is_rowkey /*is_rowkey*/, + order_type, + row_desc, index_schema, + false/*is_hidden*/, + true /*is_specified_storing_col*/))) { + LOG_WARN("add column failed", "docid column", *doc_id_col, "rowkey_order_type", + order_type, K(row_desc), K(ret)); + } else if (OB_FAIL(ObIndexBuilderUtil::add_column(mvi_array_column, + false/*is_index_column*/, + false/*is_rowkey*/, + order_type, + row_desc, + index_schema, + false/*is_hidden*/, + false/*is_specified_storing_col*/))) { + LOG_WARN("add column failed", "mvi_array_column", *mvi_array_column, K(row_desc), K(ret)); + } + + if (OB_SUCC(ret) && !index_schema.is_unique_index()) { + // json-array column is not index coumn, not rowkey column + index_schema.set_rowkey_column_num(row_desc.get_column_num() - 1); + index_schema.set_index_column_num(row_desc.get_column_num() - 1); + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(index_schema.sort_column_array_by_column_id())) { + LOG_WARN("failed to sort column", K(ret)); + } else { + LOG_INFO("succeed to set multivalue index table columns", K(index_schema)); + } + } + return ret; +} + + +}//end namespace rootserver +}//end namespace oceanbase diff --git a/src/share/ob_fts_index_builder_util.h b/src/share/ob_fts_index_builder_util.h new file mode 100644 index 0000000000..05a861b762 --- /dev/null +++ b/src/share/ob_fts_index_builder_util.h @@ -0,0 +1,226 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_SHARE_FTS_INDEX_BUILDER_UTIL_H_ +#define OCEANBASE_SHARE_FTS_INDEX_BUILDER_UTIL_H_ + +#include "share/ob_rpc_struct.h" +#include "share/schema/ob_schema_struct.h" +#include "sql/resolver/ob_schema_checker.h" + +namespace oceanbase +{ +namespace share +{ +class ObMulValueIndexBuilderUtil; + +class ObFtsIndexBuilderUtil +{ + friend class ObMulValueIndexBuilderUtil; +public: + static const int64_t OB_FTS_INDEX_TABLE_INDEX_COL_CNT = 2; + static const int64_t OB_FTS_DOC_WORD_TABLE_INDEX_COL_CNT = 2; +public: + static int append_fts_rowkey_doc_arg( + const obrpc::ObCreateIndexArg &index_arg, + ObIAllocator *allocator, + ObIArray &index_arg_list); + static int append_fts_doc_rowkey_arg( + const obrpc::ObCreateIndexArg &index_arg, + ObIAllocator *allocator, + ObIArray &index_arg_list); + static int append_fts_index_arg( + const obrpc::ObCreateIndexArg &index_arg, + ObIAllocator *allocator, + ObIArray &index_arg_list); + static int append_fts_doc_word_arg( + const obrpc::ObCreateIndexArg &index_arg, + ObIAllocator *allocator, + ObIArray &index_arg_list); + static int generate_fts_aux_index_name( + obrpc::ObCreateIndexArg &arg, + ObIAllocator *allocator); + static int adjust_fts_args( + obrpc::ObCreateIndexArg &index_arg, + ObTableSchema &data_schema, // not const since will add column to data schema + ObIArray &gen_columns); + static int set_fts_rowkey_doc_table_columns( + const obrpc::ObCreateIndexArg &arg, + const share::schema::ObTableSchema &data_schema, + share::schema::ObTableSchema &index_schema); + static int set_fts_doc_rowkey_table_columns( + const obrpc::ObCreateIndexArg &arg, + const share::schema::ObTableSchema &data_schema, + share::schema::ObTableSchema &index_schema); + static int set_fts_index_table_columns( + const obrpc::ObCreateIndexArg &arg, + const share::schema::ObTableSchema &data_schema, + share::schema::ObTableSchema &index_schema); + static int get_doc_id_col( + const ObTableSchema &data_schema, + const ObColumnSchemaV2 *&doc_id_col); +private: + static int check_ft_cols( + const obrpc::ObCreateIndexArg *index_arg, + ObTableSchema &data_schema); // not const since will add cascade flag + static int adjust_fts_arg( + obrpc::ObCreateIndexArg *index_arg, // not const since index_arg.index_schema.allocator will be used + const ObTableSchema &data_schema, + const ObIArray &fts_cols); + static int inner_adjust_fts_arg( + obrpc::ObCreateIndexArg *fts_arg, + const ObIArray &fts_cols, + const int index_column_cnt, + ObIAllocator *allocator); + static int generate_doc_id_column( + const obrpc::ObCreateIndexArg *index_arg, + const uint64_t col_id, + ObTableSchema &data_schema, // not const since will add column to data schema + ObColumnSchemaV2 *&doc_id_col); + static int generate_word_segment_column( + const obrpc::ObCreateIndexArg *index_arg, + const uint64_t col_id, + ObTableSchema &data_schema, // not const since will add column to data schema + ObColumnSchemaV2 *&word_segment_col); + static int generate_word_count_column( + const obrpc::ObCreateIndexArg *index_arg, + const uint64_t col_id, + ObTableSchema &data_schema, // not const since will add column to data schema + ObColumnSchemaV2 *&word_count_col); + static int generate_doc_length_column( + const obrpc::ObCreateIndexArg *index_arg, + const uint64_t col_id, + ObTableSchema &data_schema, // not const since will add column to data schema + ObColumnSchemaV2 *&doc_length_col); + static int construct_doc_id_col_name( + char *col_name_buf, + const int64_t buf_len, + int64_t &name_pos); + static int construct_word_segment_col_name( + const obrpc::ObCreateIndexArg *index_arg, + const ObTableSchema &data_schema, + char *col_name_buf, + const int64_t buf_len, + int64_t &name_pos); + static int construct_word_count_col_name( + const obrpc::ObCreateIndexArg *index_arg, + const ObTableSchema &data_schema, + char *col_name_buf, + const int64_t buf_len, + int64_t &name_pos); + static int construct_doc_length_col_name( + const obrpc::ObCreateIndexArg *index_arg, + const ObTableSchema &data_schema, + char *col_name_buf, + const int64_t buf_len, + int64_t &name_pos); + static int check_fts_gen_col( + const ObTableSchema &data_schema, + const uint64_t col_id, + const char *col_name_buf, + const int64_t name_pos, + bool &col_exists); + static int get_word_segment_col( + const ObTableSchema &data_schema, + const obrpc::ObCreateIndexArg *index_arg, + const ObColumnSchemaV2 *&word_segment_col); + static int get_word_cnt_col( + const ObTableSchema &data_schema, + const obrpc::ObCreateIndexArg *index_arg, + const ObColumnSchemaV2 *&word_cnt_col); + static int get_doc_length_col( + const ObTableSchema &data_schema, + const obrpc::ObCreateIndexArg *index_arg, + const ObColumnSchemaV2 *&doc_len_col); + static int push_back_gen_col( + ObIArray &cols, + const ObColumnSchemaV2 *existing_col, + ObColumnSchemaV2 *generated_col); + static int generate_fts_parser_name( + obrpc::ObCreateIndexArg &arg, + ObIAllocator *allocator); + static int get_index_column_ids( + const ObTableSchema &data_schema, + const obrpc::ObCreateIndexArg &arg, + schema::ColumnReferenceSet &index_column_ids); + static int check_index_match( + const schema::ObColumnSchemaV2 &column, + const schema::ColumnReferenceSet &index_column_ids, + bool &is_match); +}; + +class ObMulValueIndexBuilderUtil +{ +public: + static int generate_mulvalue_index_name( + obrpc::ObCreateIndexArg &arg, + ObIAllocator *allocator); + static int construct_mulvalue_col_name( + const obrpc::ObCreateIndexArg *index_arg, + const ObTableSchema &data_schema, + bool is_budy_column, + char *col_name_buf, + const int64_t buf_len, + int64_t &name_pos); + static int append_mulvalue_arg( + const obrpc::ObCreateIndexArg &index_arg, + ObIAllocator *allocator, + ObIArray &index_arg_list); + static int is_multivalue_index_type( + const ObString& column_string, + bool& is_multi_value_index); + static int adjust_index_type( + const ObString& column_string, + bool& is_multi_value_index, + int* index_keyname); + static int get_mulvalue_col( + const ObTableSchema &data_schema, + const obrpc::ObCreateIndexArg *index_arg, + const ObColumnSchemaV2 *&mulvalue_col, + const ObColumnSchemaV2 *&budy_col); + static int adjust_mulvalue_index_args( + obrpc::ObCreateIndexArg &index_arg, + ObTableSchema &data_schema, // not const since will add column to data schema + ObIArray &gen_columns); + static int build_and_generate_multivalue_column_raw( + obrpc::ObCreateIndexArg &arg, + ObTableSchema &data_schema, + ObColumnSchemaV2 *&mulvalue_col, + ObColumnSchemaV2 *&budy_col); + static int build_and_generate_multivalue_column( + obrpc::ObColumnSortItem& sort_item, + sql::ObRawExprFactory &expr_factory, + const sql::ObSQLSessionInfo &session_info, + ObTableSchema &table_schema, + sql::ObSchemaChecker *schema_checker, + ObColumnSchemaV2 *&gen_col, + ObColumnSchemaV2 *&budy_col); + static int generate_multivalue_column( + sql::ObRawExpr &expr, + ObTableSchema &data_schema, + ObColumnSchemaV2 *&gen_col, + ObColumnSchemaV2 *&budy_col, + ObSchemaGetterGuard *schema_guard); + static int inner_adjust_multivalue_arg( + obrpc::ObCreateIndexArg &index_arg, + const ObTableSchema &data_schema, + ObColumnSchemaV2 *doc_id_col); + static int set_multivalue_index_table_columns( + const obrpc::ObCreateIndexArg &arg, + const ObTableSchema &data_schema, + ObTableSchema &index_schema); +}; + +}//end namespace share +}//end namespace oceanbase + +#endif //OCEANBASE_SHARE_FTS_INDEX_BUILDER_UTIL_H_ diff --git a/src/share/ob_index_builder_util.cpp b/src/share/ob_index_builder_util.cpp index 585a02f2b7..05ca32a881 100644 --- a/src/share/ob_index_builder_util.cpp +++ b/src/share/ob_index_builder_util.cpp @@ -12,6 +12,7 @@ #define USING_LOG_PREFIX SHARE #include "ob_index_builder_util.h" +#include "ob_fts_index_builder_util.h" #include "share/ob_define.h" #include "lib/container/ob_array_iterator.h" @@ -34,8 +35,12 @@ namespace share { void ObIndexBuilderUtil::del_column_flags_and_default_value(ObColumnSchemaV2 &column) { - if ((column.is_generated_column() && !column.is_fulltext_column() && - !column.is_spatial_generated_column()) || column.is_identity_column()) { + if ((column.is_generated_column() && + !column.is_fulltext_column() && + !column.is_spatial_generated_column() && + !column.is_multivalue_generated_column() && + !column.is_multivalue_generated_array_column()) + || column.is_identity_column()) { if (column.is_virtual_generated_column()) { column.del_column_flag(VIRTUAL_GENERATED_COLUMN_FLAG); } else if (column.is_stored_generated_column()) { @@ -133,7 +138,20 @@ int ObIndexBuilderUtil::add_column( if (column.is_spatial_generated_column()) { column.set_geo_col_id(data_column->get_geo_col_id()); } - + if (column.is_fulltext_column()) { + ObObj default_value; + column.del_column_flag(VIRTUAL_GENERATED_COLUMN_FLAG); + if (column.is_word_segment_column()) { + const int64_t data_length = MIN(column.get_data_length(), MIN(OB_MAX_ROW_KEY_LENGTH, OB_MAX_USER_ROW_KEY_LENGTH)); + column.set_data_length(data_length); + } + column.set_is_hidden(false); + if (FAILEDx(column.set_orig_default_value(default_value))) { + LOG_WARN("set orig default value failed", K(ret)); + } else if (OB_FAIL(column.set_cur_default_value(default_value))) { + LOG_WARN("set current default value failed", K(ret)); + } + } if (OB_FAIL(ret)) { } else if (OB_FAIL(table_schema.add_column(column))) { LOG_WARN("add_column failed", K(column), K(ret)); @@ -345,7 +363,45 @@ int ObIndexBuilderUtil::set_index_table_columns( use_mysql_errno = !is_oracle_mode; } } - if (OB_SUCC(ret)) { + // no matter what index col of data table is, columns of 4 aux fts table is fixed + if (OB_FAIL(ret)) { + } else if (is_fts_index(arg.index_type_) || + is_multivalue_index(arg.index_type_)) { + if (is_doc_rowkey_aux(arg.index_type_)) { + if (OB_FAIL(ObFtsIndexBuilderUtil::set_fts_doc_rowkey_table_columns(arg, + data_schema, + index_schema))) { + LOG_WARN("failed to set fts doc rowkey table", K(ret)); + } + } else if (is_rowkey_doc_aux(arg.index_type_)) { + if (OB_FAIL(ObFtsIndexBuilderUtil::set_fts_rowkey_doc_table_columns(arg, + data_schema, + index_schema))) { + LOG_WARN("failed to set fts rowkey doc table", K(ret)); + } + } else if (is_fts_index_aux(arg.index_type_)) { + if (OB_FAIL(ObFtsIndexBuilderUtil::set_fts_index_table_columns(arg, + data_schema, + index_schema))) { + LOG_WARN("failed to set fts index table", K(ret)); + } + } else if (is_fts_doc_word_aux(arg.index_type_)) { + if (OB_FAIL(ObFtsIndexBuilderUtil::set_fts_index_table_columns(arg, + data_schema, + index_schema))) { + LOG_WARN("failed to set fts doc word table", K(ret)); + } + } else if (is_multivalue_index(arg.index_type_)) { + if (OB_FAIL(ObMulValueIndexBuilderUtil::set_multivalue_index_table_columns(arg, + data_schema, + index_schema))) { + LOG_WARN("failed to set multivalue index table", K(ret)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fts arg index type not expected", K(ret)); + } + } else { // not fts index HEAP_VAR(ObRowDesc, row_desc) { bool is_index_column = false; // index columns @@ -582,7 +638,10 @@ int ObIndexBuilderUtil::adjust_expr_index_args( ObIArray &gen_columns) { int ret = OB_SUCCESS; - if (ObSimpleTableSchemaV2::is_spatial_index(arg.index_type_)) { + if (arg.fulltext_columns_.count() > 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fulltext_columns_ is deprecated!", K(ret)); + } else if (ObSimpleTableSchemaV2::is_spatial_index(arg.index_type_)) { ObSEArray spatial_cols; uint64_t tenant_id = data_schema.get_tenant_id(); uint64_t tenant_data_version = 0; @@ -590,7 +649,8 @@ int ObIndexBuilderUtil::adjust_expr_index_args( LOG_WARN("get tenant data version failed", K(ret)); } else if (tenant_data_version < DATA_VERSION_4_1_0_0) { ret = OB_NOT_SUPPORTED; - LOG_WARN("tenant version is less than 4.1, spatial index not supported", K(ret), K(tenant_data_version)); + LOG_WARN("tenant version is less than 4.1, spatial index not supported", + K(ret), K(tenant_data_version)); LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant version is less than 4.1, spatial index"); } else if (OB_FAIL(adjust_spatial_args(arg, data_schema, allocator, spatial_cols))) { LOG_WARN("adjust spatial args failed", K(ret)); @@ -599,14 +659,13 @@ int ObIndexBuilderUtil::adjust_expr_index_args( } else if (OB_FAIL(gen_columns.push_back(spatial_cols.at(1)))) { LOG_WARN("push back mbr column to gen columns failed", K(ret)); } - } else if (arg.fulltext_columns_.count() > 0) { - ObColumnSchemaV2 *ft_col = NULL; - if (OB_FAIL(adjust_fulltext_args(arg, data_schema, allocator, ft_col))) { - LOG_WARN("adjust fulltext args failed", K(ret)); - } else if (ft_col != NULL) { - if (OB_FAIL(gen_columns.push_back(ft_col))) { - LOG_WARN("store fulltext column failed", K(ret)); - } + } else if (is_fts_index(arg.index_type_)) { + if (OB_FAIL(ObFtsIndexBuilderUtil::adjust_fts_args(arg, data_schema, gen_columns))) { + LOG_WARN("failed to adjust fts args", K(ret)); + } + } else if (is_multivalue_index(arg.index_type_)) { + if (OB_FAIL(ObMulValueIndexBuilderUtil::adjust_mulvalue_index_args(arg, data_schema, gen_columns))) { + LOG_WARN("failed to adjust multivalue args", K(ret)); } } else if (OB_FAIL(adjust_ordinary_index_column_args(arg, data_schema, allocator, gen_columns))) { LOG_WARN("adjust ordinary index column args failed", K(ret)); @@ -614,112 +673,6 @@ int ObIndexBuilderUtil::adjust_expr_index_args( return ret; } -int ObIndexBuilderUtil::adjust_fulltext_columns( - ObCreateIndexArg &arg, - OrderFTColumns &ft_columns) -{ - int ret = OB_SUCCESS; - ObIArray &fulltext_columns = arg.fulltext_columns_; - ObIArray &sort_items = arg.index_columns_; - for (int64_t i = 0; OB_SUCC(ret) && i < fulltext_columns.count(); ++i) { - const ObString &ft_name = fulltext_columns.at(i); - bool found_ft = false; - for (int64_t j = 0; OB_SUCC(ret) && !found_ft && j < sort_items.count(); ++j) { - const ObColumnSortItem &sort_item = sort_items.at(j); - if (ObCharset::case_insensitive_equal(sort_item.column_name_, ft_name)) { - found_ft = true; - ret = ft_columns.push_back(std::pair(j, ft_name)); - } - } - if (!found_ft) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("fulltext column not exists in index", K(ft_name)); - } - } - if (OB_SUCC(ret)) { - std::sort(ft_columns.begin(), ft_columns.end(), FulltextColumnOrder()); - } - for (int64_t i = 1; OB_SUCC(ret) && i < ft_columns.count(); ++i) { - if (ft_columns.at(i).first - ft_columns.at(i - 1).first != 1) { - ret = OB_ERR_BAD_CTXCAT_COLUMN; - LOG_USER_ERROR(OB_ERR_BAD_CTXCAT_COLUMN); - } - } - return ret; -} - -int ObIndexBuilderUtil::adjust_fulltext_args( - ObCreateIndexArg &arg, - ObTableSchema &data_schema, - ObIAllocator &allocator, - ObColumnSchemaV2 *&ft_col) -{ - ft_col = NULL; - int ret = OB_SUCCESS; - //如果是ctxcat index,那么需要在表中创建一个generated column的分词列,并且在该分词列上创建索引 - ObIArray &fulltext_columns = arg.fulltext_columns_; - ObIArray &sort_items = arg.index_columns_; - ObArray new_sort_items; - uint64_t virtual_column_id = OB_INVALID_ID; - if (fulltext_columns.count() > 0) { - OrderFTColumns order_ft_columns; - int64_t ft_begin_index = 0; //全文列第一列在索引列中的index - int64_t ft_end_index = 0; //全文列最后一列在索引中的index - if (sort_items.count() <= 0) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("sort items is empty", K(ret)); - } else if (OB_INVALID_ID != sort_items.at(0).column_id_) { - // for restore purpose - virtual_column_id = sort_items.at(0).column_id_; - sort_items.at(0).column_id_ = OB_INVALID_ID; - } - if (OB_SUCC(ret)) { - if (OB_FAIL(adjust_fulltext_columns(arg, order_ft_columns))) { - LOG_WARN("adjust fulltext columns to order fulltext columns failed", K(ret)); - } else if (order_ft_columns.empty()) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("order fulltext columns is empty"); - } else { - ft_begin_index = order_ft_columns.at(0).first; - ft_end_index = order_ft_columns.at(order_ft_columns.count() - 1).first; - } - } - for (int64_t i = 0; OB_SUCC(ret) && i < ft_begin_index; ++i) { - ret = new_sort_items.push_back(sort_items.at(i)); - } - if (OB_SUCC(ret)) { - ObColumnSortItem ft_sort_item; - int64_t old_cnt = data_schema.get_column_count(); - ObColumnSchemaV2 *tmp_ft_col = NULL; - if (OB_FAIL(generate_fulltext_column(order_ft_columns, - data_schema, - virtual_column_id, - tmp_ft_col))) { - LOG_WARN("generate fulltext column failed", K(ret)); - } else if (OB_ISNULL(tmp_ft_col)) { - LOG_WARN("fulltext column schema is null", K(ret)); - } else if (OB_FAIL(ob_write_string(allocator, tmp_ft_col->get_column_name_str(), - ft_sort_item.column_name_))) { - //to keep the memory lifetime of column_name consistent with index_arg - LOG_WARN("deep copy column name failed", K(ret)); - } else if (OB_FAIL(new_sort_items.push_back(ft_sort_item))) { - LOG_WARN("store new sort items failed", K(ret)); - } else if (data_schema.get_column_count() > old_cnt) { - ft_col = tmp_ft_col; //新生成的全文列,需要传递出去,让rs进行创建 - } - } - for (int64_t i = ft_end_index + 1; OB_SUCC(ret) && i < sort_items.count(); ++i) { - ret = new_sort_items.push_back(sort_items.at(i)); - } - if (OB_SUCC(ret)) { - sort_items.reset(); - fulltext_columns.reset(); - ret = sort_items.assign(new_sort_items); - } - } - return ret; -} - int ObIndexBuilderUtil::adjust_ordinary_index_column_args( ObCreateIndexArg &arg, ObTableSchema &data_schema, @@ -893,130 +846,6 @@ int ObIndexBuilderUtil::adjust_ordinary_index_column_args( return ret; } -int ObIndexBuilderUtil::generate_fulltext_column( - OrderFTColumns &ft_cols, - ObTableSchema &data_schema, - uint64_t specified_virtual_cid, - ObColumnSchemaV2 *&ft_col) -{ - ft_col = NULL; - int ret = OB_SUCCESS; - ObColumnSchemaV2 *col_schema = NULL; - ObColumnSchemaV2 column_schema; - char col_name_buf[OB_MAX_COLUMN_NAMES_LENGTH] = {'\0'}; - SMART_VAR(char[OB_MAX_DEFAULT_VALUE_LENGTH], ft_expr_def) { - MEMSET(ft_expr_def, 0, sizeof(ft_expr_def)); - int64_t name_pos = 0; - int64_t def_pos = 0; - int32_t max_data_length = 0; - ObCollationType collation_type = CS_TYPE_INVALID; - if (OB_FAIL(databuff_printf(col_name_buf, OB_MAX_COLUMN_NAMES_LENGTH, name_pos, "__word_segment"))) { - LOG_WARN("print generate column prefix name failed", K(ret)); - } else if (OB_FAIL(databuff_printf(ft_expr_def, OB_MAX_DEFAULT_VALUE_LENGTH, def_pos, "WORD_SEGMENT("))) { - LOG_WARN("print generate expr definition prefix failed", K(ret)); - } - for (int64_t i = 0; OB_SUCC(ret) && i < ft_cols.count(); ++i) { - const ObString &column_name = ft_cols.at(i).second; - if (OB_ISNULL(col_schema = data_schema.get_column_schema(column_name))) { - ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; - LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, column_name.length(), column_name.ptr()); - } else if (!col_schema->is_string_type() || col_schema->get_meta_type().is_blob()) { - ret = OB_ERR_BAD_FT_COLUMN; - LOG_USER_ERROR(OB_ERR_BAD_FT_COLUMN, column_name.length(), column_name.ptr()); - } else if (col_schema->is_generated_column()) { - ret = OB_NOT_SUPPORTED; - LOG_USER_ERROR(OB_NOT_SUPPORTED, "CTXCAT on generated column"); - } else if (OB_FAIL(databuff_printf(col_name_buf, OB_MAX_COLUMN_NAMES_LENGTH, name_pos, "_%ld", col_schema->get_column_id()))) { - LOG_WARN("print column id to buffer failed", K(ret), K(col_schema->get_column_id())); - } else if (OB_FAIL(databuff_printf(ft_expr_def, OB_MAX_DEFAULT_VALUE_LENGTH, def_pos, "`%s`, ", col_schema->get_column_name()))) { - LOG_WARN("print column name to buffer failed", K(ret)); - } else if (OB_FAIL(column_schema.add_cascaded_column_id(col_schema->get_column_id()))) { - LOG_WARN("add cascaded column to generated column failed", K(ret)); - } else { - col_schema->add_column_flag(GENERATED_DEPS_CASCADE_FLAG); - if (max_data_length < col_schema->get_data_length()) { - max_data_length = col_schema->get_data_length(); - } - if (CS_TYPE_INVALID == collation_type) { - collation_type = col_schema->get_collation_type(); - } else if (collation_type != col_schema->get_collation_type()) { - ret = OB_NOT_SUPPORTED; - LOG_USER_ERROR(OB_NOT_SUPPORTED, "create fulltext index on columns with different collation"); - } else { /*do nothing*/ } - } - } - if (OB_SUCC(ret)) { - def_pos -= 2; //去掉最后一个", " - if (OB_FAIL(databuff_printf(ft_expr_def, OB_MAX_DEFAULT_VALUE_LENGTH, def_pos, ")"))) { - LOG_WARN("print generate expr definition suffix failed", K(ret)); - } - } - if (OB_SUCC(ret)) { - // another fulltext index could have created the generated column - ft_col = data_schema.get_column_schema(col_name_buf); - if (OB_INVALID_ID != specified_virtual_cid) { - if (OB_NOT_NULL(ft_col)) { - // check the specified column id is consistent with the existed column schema - if (specified_virtual_cid != ft_col->get_column_id()) { - ret = OB_ERR_INVALID_COLUMN_ID; - LOG_USER_ERROR(OB_ERR_INVALID_COLUMN_ID, static_cast(name_pos), col_name_buf); - LOG_WARN("Column id specified by create fulltext index mismatch with column schema id", - K(ret), K(specified_virtual_cid), K(*ft_col)); - } - } else if (OB_NOT_NULL(data_schema.get_column_schema(specified_virtual_cid))) { - // check the specified column id is not used by others - ret = OB_ERR_INVALID_COLUMN_ID; - LOG_USER_ERROR(OB_ERR_INVALID_COLUMN_ID, static_cast(name_pos), col_name_buf); - LOG_WARN("Column id specified by create fulltext index has been used", - K(ret), K(specified_virtual_cid)); - } - } - if (OB_FAIL(ret)) { - // do nothing - } else if (OB_NOT_NULL(ft_col)) { - // the generated colum is created - if (OB_UNLIKELY(!ft_col->has_column_flag(GENERATED_CTXCAT_CASCADE_FLAG))) { - ret = OB_ERR_COLUMN_DUPLICATE; - LOG_USER_ERROR(OB_ERR_COLUMN_DUPLICATE, static_cast(name_pos), col_name_buf); - LOG_WARN("Generate column name has been used", K(ret), K(*ft_col)); - } - } else { - // the generated column is not created - ObObj default_value; - default_value.set_varchar(ft_expr_def, static_cast(def_pos)); - column_schema.set_rowkey_position(0); //非主键列 - column_schema.set_index_position(0); //非索引列 - column_schema.set_tbl_part_key_pos(0); //非partition key - column_schema.set_tenant_id(data_schema.get_tenant_id()); - column_schema.set_table_id(data_schema.get_table_id()); - column_schema.set_column_id(OB_INVALID_ID == specified_virtual_cid ? - data_schema.get_max_used_column_id() + 1 : - specified_virtual_cid); - column_schema.add_column_flag(GENERATED_CTXCAT_CASCADE_FLAG); - column_schema.add_column_flag(VIRTUAL_GENERATED_COLUMN_FLAG); - column_schema.set_is_hidden(true); - column_schema.set_data_type(ObVarcharType); - column_schema.set_data_length(max_data_length); //生成列的长度和被分词列的最大长度保持一致 - column_schema.set_collation_type(collation_type); //生成列的collation和被分词列的collation保持一致 - column_schema.set_prev_column_id(UINT64_MAX); - column_schema.set_next_column_id(UINT64_MAX); - if (OB_FAIL(column_schema.set_column_name(col_name_buf))) { - LOG_WARN("set column name failed", K(ret)); - } else if (OB_FAIL(column_schema.set_orig_default_value(default_value))) { - LOG_WARN("set orig default value failed", K(ret)); - } else if (OB_FAIL(column_schema.set_cur_default_value(default_value))) { - LOG_WARN("set current default value failed", K(ret)); - } else if (OB_FAIL(data_schema.add_column(column_schema))) { - LOG_WARN("add column schema to data table failed", K(ret)); - } else { - ft_col = data_schema.get_column_schema(column_schema.get_column_id()); - } - } - } - } - return ret; -} - int ObIndexBuilderUtil::generate_ordinary_generated_column( ObRawExpr &expr, const ObSQLMode sql_mode, diff --git a/src/share/ob_index_builder_util.h b/src/share/ob_index_builder_util.h index cc3e05a92f..4032518110 100644 --- a/src/share/ob_index_builder_util.h +++ b/src/share/ob_index_builder_util.h @@ -79,37 +79,11 @@ public: ObColumnSchemaV2 &shadow_column_schema); private: static const int SPATIAL_MBR_COLUMN_MAX_LENGTH = 32; - typedef common::ObArray > OrderFTColumns; - class FulltextColumnOrder - { - public: - FulltextColumnOrder() {} - ~FulltextColumnOrder() {} - - bool operator()(const std::pair &left, - const std::pair &right) const - { - return left.first < right.first; - } - }; - static int generate_fulltext_column( - OrderFTColumns &ft_cols, - share::schema::ObTableSchema &data_schema, - uint64_t specified_virtual_cid, - share::schema::ObColumnSchemaV2 *&ft_col); static int generate_prefix_column( const obrpc::ObColumnSortItem &sort_item, const ObSQLMode sql_mode, share::schema::ObTableSchema &data_schema, share::schema::ObColumnSchemaV2 *&prefix_col); - static int adjust_fulltext_args( - obrpc::ObCreateIndexArg &arg, - share::schema::ObTableSchema &data_schema, - common::ObIAllocator &allocator, - share::schema::ObColumnSchemaV2 *&ft_col); - static int adjust_fulltext_columns( - obrpc::ObCreateIndexArg &arg, - OrderFTColumns &ft_columns); static int adjust_ordinary_index_column_args( obrpc::ObCreateIndexArg &arg, share::schema::ObTableSchema &data_schema, diff --git a/src/share/ob_plugin_helper.cpp b/src/share/ob_plugin_helper.cpp new file mode 100644 index 0000000000..adf76b7534 --- /dev/null +++ b/src/share/ob_plugin_helper.cpp @@ -0,0 +1,278 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE + +#include "lib/oblog/ob_log_module.h" +#include "lib/oblog/ob_log.h" +#include "share/ob_force_print_log.h" +#include "share/ob_plugin_helper.h" + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace share +{ + +#define OB_PLUGIN_GETTER(buf, buf_len, name, name_len, suffix, suffix_len) \ +do { \ + const int64_t prefix_len = STRLEN(OB_PLUGIN_PREFIX); \ + if (OB_UNLIKELY(buf_len <= prefix_len + name_len + suffix_len)) { \ + ret = OB_INVALID_ARGUMENT; \ + LOG_WARN("This buffer is too small to accommodate all of name", K(ret), K(buf_len), \ + K(prefix_len), K(name_len), K(suffix_len)); \ + } else { \ + MEMCPY(buf + prefix_len + name_len, suffix, suffix_len); \ + MEMCPY(buf + prefix_len, name, name_len); \ + MEMCPY(buf, OB_PLUGIN_PREFIX, prefix_len); \ + buf[prefix_len + name_len + suffix_len] = '\0'; \ + } \ +} while (false) + +int get_plugin_version_str( + char *buf, + const int64_t buf_len, + const char *name, + const int64_t name_len) +{ + int ret = common::OB_SUCCESS; + OB_PLUGIN_GETTER(buf, + buf_len, + name, + name_len, + OB_PLUGIN_VERSION_SUFFIX, + STRLEN(OB_PLUGIN_VERSION_SUFFIX)); + return ret; +} + +int get_plugin_size_str( + char *buf, + const int64_t buf_len, + const char *name, + const int64_t name_len) +{ + int ret = common::OB_SUCCESS; + OB_PLUGIN_GETTER(buf, + buf_len, + name, + name_len, + OB_PLUGIN_SIZE_SUFFIX, + STRLEN(OB_PLUGIN_SIZE_SUFFIX)); + return ret; +} + +int get_plugin_str( + char *buf, + const int64_t buf_len, + const char *name, + const int64_t name_len) +{ + int ret = common::OB_SUCCESS; + OB_PLUGIN_GETTER(buf, + buf_len, + name, + name_len, + OB_PLUGIN_SUFFIX, + STRLEN(OB_PLUGIN_SUFFIX)); + return ret; +} + +int ObPluginName::set_name(const char *name) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(name)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("The name is nullptr", K(ret), KP(name)); + } else if (OB_UNLIKELY(STRLEN(name) >= OB_PLUGIN_NAME_LENGTH)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("The name is too long", K(ret), KCSTRING(name)); + } else { + int i = 0; + while ('\0' != name[i]) { + name_[i] = tolower(name[i]); + ++i; + } + name_[i] = '\0'; + } + return ret; +} + +ObPluginSoHandler::ObPluginSoHandler() + : so_handler_(nullptr), + has_opened_(false) +{ + memset(plugin_name_, 0x0, OB_PLUGIN_NAME_LENGTH); +} + +ObPluginSoHandler::~ObPluginSoHandler() +{ + reset(); +} + +int ObPluginSoHandler::open(const char *plugin_name, const char *file_name) +{ + int ret = OB_SUCCESS; + const uint64_t plugin_name_len = nullptr == plugin_name ? 0 : STRLEN(plugin_name); + const uint64_t file_name_len = nullptr == file_name ? 0 : STRLEN(file_name); + if (OB_UNLIKELY(nullptr == plugin_name + || plugin_name_len >= OB_PLUGIN_NAME_LENGTH + || file_name_len >= OB_PLUGIN_FILE_NAME_LENGTH)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Invalid arguments", K(ret), KP(plugin_name), K(plugin_name_len), KP(file_name), + K(file_name_len)); + } else if (OB_UNLIKELY(has_opened_)) { + ret = OB_INIT_TWICE; + LOG_WARN("This dynamic liabrary has be opened", K(ret), K(has_opened_), K(plugin_name), + K(file_name)); + } else if (OB_ISNULL(so_handler_ = ob_dlopen(file_name, RTLD_LAZY))) { + ret = OB_ERR_SYS; + const char *errmsg = ob_dlerror(); + LOG_WARN("fail to open dynamic library", K(ret), K(errmsg), K(plugin_name), K(file_name)); + } else { + STRCPY(plugin_name_, plugin_name); + has_opened_ = true; + FLOG_INFO("succeed to open a dynamic library", KP(so_handler_), K(plugin_name), K(file_name)); + } + if (OB_FAIL(ret) && !has_opened_) { + reset(); + } + return ret; +} + +void ObPluginSoHandler::reset() +{ + if (has_opened_) { + (void)close(); + } + so_handler_ = nullptr; + memset(plugin_name_, 0x0, OB_PLUGIN_NAME_LENGTH); +} + +int ObPluginSoHandler::close() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!has_opened_)) { + ret = OB_FILE_NOT_OPENED; + LOG_WARN("The dynamic library hasn't be opened, couldn't be closed", K(ret), K(has_opened_), + K(plugin_name_)); + } else if (OB_ISNULL(so_handler_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, so handler is nullptr", K(ret), K(plugin_name_), KP(so_handler_), + K(has_opened_)); + } else if (OB_UNLIKELY(0 != ob_dlclose(so_handler_))) { + ret = OB_ERR_SYS; + const char *errmsg = ob_dlerror(); + LOG_WARN("fail to close dynamic library", K(ret), K(errmsg), K(plugin_name_), KP(so_handler_)); + } else { + has_opened_ = false; + FLOG_INFO("succeed to close a dynamic library", K(plugin_name_), KP(so_handler_)); + } + return ret; +} + +int ObPluginSoHandler::get_plugin(lib::ObPlugin *&plugin) const +{ + int ret = OB_SUCCESS; + void *plugin_symbol = nullptr; + char plugin_name[OB_PLUGIN_SYMBOL_NAME_LENGTH]; + if (OB_UNLIKELY(!has_opened_)) { + ret = OB_FILE_NOT_EXIST; + LOG_WARN("The dynamic library hasn't be opened", K(ret), K(has_opened_), K(plugin_name_)); + } else if (OB_FAIL(get_plugin_str(plugin_name, OB_PLUGIN_SYMBOL_NAME_LENGTH, plugin_name_, + STRLEN(plugin_name_)))) { + LOG_WARN("fail to get plugin str", K(ret)); + } else if (OB_FAIL(get_symbol_ptr(plugin_name, plugin_symbol))) { + LOG_WARN("fail to get symbol pointer", K(ret), K(plugin_name)); + } else if (OB_ISNULL(plugin_symbol)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, plugin desc symbol ptr is nullptr", K(ret), K(plugin_name_), + KP(plugin_symbol)); + } else { + plugin = static_cast(plugin_symbol); + } + return ret; +} + +int ObPluginSoHandler::get_plugin_version(int64_t &version) const +{ + int ret = OB_SUCCESS; + void *plugin_version_symbol = nullptr; + char plugin_version_name[OB_PLUGIN_SYMBOL_NAME_LENGTH]; + if (OB_UNLIKELY(!has_opened_)) { + ret = OB_FILE_NOT_EXIST; + LOG_WARN("The dynamic library hasn't be opened", K(ret), K(has_opened_), K(plugin_name_)); + } else if (OB_FAIL(get_plugin_version_str(plugin_version_name, OB_PLUGIN_SYMBOL_NAME_LENGTH, + plugin_name_, STRLEN(plugin_name_)))) { + + } else if (OB_FAIL(get_symbol_ptr(plugin_version_name, plugin_version_symbol))) { + LOG_WARN("fail to get symbol pointer", K(ret), K(plugin_version_name)); + } else if (OB_ISNULL(plugin_version_symbol)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, plugin desc symbol ptr is nullptr", K(ret), K(plugin_name_), + KP(plugin_version_symbol)); + } else { + version = *static_cast(plugin_version_symbol); + } + return ret; +} + +int ObPluginSoHandler::get_plugin_size(int64_t &size) const +{ + int ret = OB_SUCCESS; + void *plugin_size_symbol = nullptr; + char plugin_size_name[OB_PLUGIN_SYMBOL_NAME_LENGTH]; + if (OB_UNLIKELY(!has_opened_)) { + ret = OB_FILE_NOT_EXIST; + LOG_WARN("The dynamic library hasn't be opened", K(ret), K(has_opened_), K(plugin_name_)); + } else if (OB_FAIL(get_plugin_size_str(plugin_size_name, OB_PLUGIN_SYMBOL_NAME_LENGTH, + plugin_name_, STRLEN(plugin_name_)))) { + LOG_WARN("fail to get plugin size name", K(ret)); + } else if (OB_FAIL(get_symbol_ptr(plugin_size_name, plugin_size_symbol))) { + LOG_WARN("fail to get symbol pointer", K(ret), K(plugin_size_name)); + } else if (OB_ISNULL(plugin_size_symbol)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, plugin desc symbol ptr is nullptr", K(ret), K(plugin_name_), + KP(plugin_size_symbol)); + } else { + size = *static_cast(plugin_size_symbol); + } + return ret; +} + +int ObPluginSoHandler::get_symbol_ptr(const char *sym_name, void *&sym_ptr) const +{ + int ret = OB_SUCCESS; + const uint64_t sym_name_len = nullptr == sym_name ? 0 : STRLEN(sym_name); + if (OB_UNLIKELY(!has_opened_)) { + ret = OB_FILE_NOT_OPENED; + LOG_WARN("The dynamic library hasn't be opened", K(ret), K(plugin_name_), K(has_opened_)); + } else if (OB_ISNULL(so_handler_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, so handler is nullptr", K(ret), K(plugin_name_), KP(so_handler_), + K(has_opened_)); + } else if (OB_UNLIKELY(nullptr == sym_name || sym_name_len >= OB_PLUGIN_SYMBOL_NAME_LENGTH)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Invalid argument", K(ret), K(plugin_name_), K(sym_name), K(sym_name_len)); + } else if (OB_ISNULL(sym_ptr = ob_dlsym(so_handler_, sym_name))) { + ret = OB_SEARCH_NOT_FOUND; + const char *errmsg = ob_dlerror(); + LOG_WARN("Don't find symbol in dynamic library", K(ret), K(errmsg), K(plugin_name_), K(sym_name), + KP(so_handler_)); + } else { + LOG_DEBUG("succeed to get a symbol", K(sym_name), KP(sym_ptr), K(plugin_name_), KP(so_handler_)); + } + return ret; +} + +} // end namespace share +} // end namespace oceanbase diff --git a/src/share/ob_plugin_helper.h b/src/share/ob_plugin_helper.h new file mode 100644 index 0000000000..168338afea --- /dev/null +++ b/src/share/ob_plugin_helper.h @@ -0,0 +1,122 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OB_PLUGIN_HELPER_H_ +#define OB_PLUGIN_HELPER_H_ + +#include + +#include "lib/ob_plugin.h" +#include "lib/ob_errno.h" +#include "lib/utility/ob_macro_utils.h" +#include "lib/utility/ob_print_utils.h" + +namespace oceanbase +{ +namespace share +{ + +inline void *ob_dlopen(const char *file_name, int flags) +{ + return ::dlopen(file_name, flags); +} + +inline void *ob_dlsym(void *__restrict handle, const char *__restrict symbol_name) +{ + return ::dlsym(handle, symbol_name); +} + +inline int ob_dlclose(void *handle) +{ + return ::dlclose(handle); +} + +inline char *ob_dlerror(void) +{ + return ::dlerror(); +} + +class ObPluginName final +{ +public: + ObPluginName() { memset(name_, 0x0, OB_PLUGIN_NAME_LENGTH); } + explicit ObPluginName(const char *name) { OB_ASSERT(common::OB_SUCCESS == set_name(name)); } + ~ObPluginName() = default; + + int set_name(const char *name); + OB_INLINE bool is_valid() const { return STRLEN(name_) > 0; } + OB_INLINE int len() const { return STRLEN(name_); } + OB_INLINE char *str() { return name_; } + OB_INLINE const char *str() const { return name_; } + OB_INLINE int hash(uint64_t &value) const + { + value = murmurhash(name_, static_cast(STRLEN(name_)), 0); + return OB_SUCCESS; + } + + OB_INLINE bool operator ==(const ObPluginName &other) const + { + return 0 == STRCMP(name_, other.name_); + } + OB_INLINE bool operator !=(const ObPluginName &other) const + { + return 0 != STRCMP(name_, other.name_); + } + OB_INLINE bool operator <(const ObPluginName &other) const + { + return 0 > STRCMP(name_, other.name_); + } + TO_STRING_KV(K_(name)); +private: + char name_[OB_PLUGIN_NAME_LENGTH]; +}; + +class ObPluginSoHandler final : public lib::ObIPluginHandler +{ +public: + ObPluginSoHandler(); + ~ObPluginSoHandler(); + + void reset(); + /** + * open dynamic plugin library + * - if file_name is nullptr, then the so handle is for the main program. + * + * @param[in] plugin_name + * @param[in] file_name + * @return error code + */ + int open(const char *plugin_name, const char *file_name); + int close(); + + virtual int get_plugin(lib::ObPlugin *&plugin) const override; + virtual int get_plugin_version(int64_t &version) const override; + virtual int get_plugin_size(int64_t &size) const override; + + VIRTUAL_TO_STRING_KV(K_(plugin_name), KP_(so_handler), K_(has_opened)); + +private: + int get_symbol_ptr(const char *sym_name, void *&sym_ptr) const; + +private: + char plugin_name_[OB_PLUGIN_NAME_LENGTH]; + void *so_handler_; + bool has_opened_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObPluginSoHandler); +}; + +} // end namespace share +} // end namespace oceanbase + +#endif // OB_PLUGIN_HELPER_H_ diff --git a/src/share/ob_rpc_struct.h b/src/share/ob_rpc_struct.h index 4cbbc67497..0394390c29 100755 --- a/src/share/ob_rpc_struct.h +++ b/src/share/ob_rpc_struct.h @@ -2518,7 +2518,7 @@ struct ObIndexOption : public ObTableOption public: ObIndexOption() : ObTableOption(), - parser_name_(common::OB_DEFAULT_FULLTEXT_PARSER_NAME), + parser_name_(), index_attributes_set_(common::OB_DEFAULT_INDEX_ATTRIBUTES_SET) { } @@ -2526,7 +2526,7 @@ public: void reset() { ObTableOption::reset(); - parser_name_ = common::ObString::make_string(common::OB_DEFAULT_FULLTEXT_PARSER_NAME); + parser_name_.reset(); } DECLARE_TO_STRING; @@ -2633,15 +2633,12 @@ public: } inline bool is_unique_primary_index() const { - return share::schema::INDEX_TYPE_UNIQUE_LOCAL == index_type_ - || share::schema::INDEX_TYPE_UNIQUE_GLOBAL == index_type_ - || share::schema::INDEX_TYPE_UNIQUE_GLOBAL_LOCAL_STORAGE == index_type_ - || share::schema::INDEX_TYPE_PRIMARY == index_type_; + return ObSimpleTableSchemaV2::is_unique_index(index_type_) + || share::schema::INDEX_TYPE_PRIMARY == index_type_; } DECLARE_VIRTUAL_TO_STRING; - inline bool is_spatial_index() const { return share::schema::INDEX_TYPE_SPATIAL_LOCAL == index_type_ - || share::schema::INDEX_TYPE_SPATIAL_GLOBAL == index_type_ - || share::schema::INDEX_TYPE_SPATIAL_GLOBAL_LOCAL_STORAGE == index_type_; } + inline bool is_spatial_index() const { return ObSimpleTableSchemaV2::is_spatial_index(index_type_); } + inline bool is_multivalue_index() const { return is_multivalue_index_aux(index_type_); } //todo @qilu:only for each_cg now, when support customized cg ,refine this typedef common::ObSEArray ObCGColumnList; diff --git a/src/share/parameter/ob_parameter_seed.ipp b/src/share/parameter/ob_parameter_seed.ipp index f85204c2e8..72cb7aee3d 100644 --- a/src/share/parameter/ob_parameter_seed.ipp +++ b/src/share/parameter/ob_parameter_seed.ipp @@ -1343,6 +1343,9 @@ DEF_INT(_max_schema_slot_num, OB_TENANT_PARAMETER, "128", "[2,256]", "the max schema slot number for multi-version schema memory management, " "Range: [2, 256] in integer", ObParameterAttr(Section::OBSERVER, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE)); +DEF_BOOL(_enable_add_fulltext_index_to_existing_table, OB_CLUSTER_PARAMETER, "False", + "enable create fulltext index after table is created", + ObParameterAttr(Section::OBSERVER, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE)); DEF_INT_WITH_CHECKER(_ob_query_rate_limit, OB_TENANT_PARAMETER, "-1", common::ObConfigQueryRateLimitChecker, "the maximun throughput allowed for a tenant per observer instance", diff --git a/src/share/rc/ob_tenant_base.h b/src/share/rc/ob_tenant_base.h index bb85b29bd7..980c33e6dd 100755 --- a/src/share/rc/ob_tenant_base.h +++ b/src/share/rc/ob_tenant_base.h @@ -74,6 +74,7 @@ class ObTenantMdsService; class ObAccessService; class ObTenantFreezer; class ObTenantMetaMemMgr; + class ObTenantFTPluginMgr; class ObStorageLogger; class ObTenantCheckpointSlogHandler; class ObTenantFreezeInfoMgr; @@ -250,6 +251,7 @@ using ObTableScanIteratorObjPool = common::ObServerObjectPoolis_global_index_table()) { // skip - } else if (!with_domain_index && index_schema->is_domain_index()) { + } else if (!with_domain_index && index_schema->is_fts_index()) { // does not need domain index, skip it } else if (!with_spatial_index && index_schema->is_spatial_index() && is_geo_default_srid) { // skip spatial index when geometry column has not specific srid. @@ -1693,7 +1693,8 @@ int ObSchemaGetterGuard::get_table_id(uint64_t tenant_id, const ObString &table_name, const bool is_index, const CheckTableType check_type, // check if temporary table is visable - uint64_t &table_id) + uint64_t &table_id, + const bool is_built_in_index/* = false*/) { int ret = OB_SUCCESS; uint64_t session_id = session_id_; @@ -1724,9 +1725,10 @@ int ObSchemaGetterGuard::get_table_id(uint64_t tenant_id, table_name, is_index, simple_table, - USER_HIDDEN_TABLE_TYPE == check_type ? true : false))) { + USER_HIDDEN_TABLE_TYPE == check_type ? true : false, + is_built_in_index))) { LOG_WARN("get simple table failed", KR(ret), K(tenant_id), - K(tenant_id), K(database_id), K(session_id), K(table_name), K(is_index)); + K(tenant_id), K(database_id), K(session_id), K(table_name), K(is_index), K(is_built_in_index)); } else if (NULL == simple_table) { if (OB_CORE_SCHEMA_VERSION != mgr->get_schema_version()) { // this log is useless when observer restarts. @@ -1754,7 +1756,8 @@ int ObSchemaGetterGuard::get_table_id(uint64_t tenant_id, const ObString &table_name, const bool is_index, const CheckTableType check_type, // check if temporary table is visable - uint64_t &table_id) + uint64_t &table_id, + const bool is_built_in_index/* = false*/) { int ret = OB_SUCCESS; table_id = OB_INVALID_ID; @@ -1776,9 +1779,9 @@ int ObSchemaGetterGuard::get_table_id(uint64_t tenant_id, } else if (OB_INVALID_ID == database_id) { // do-nothing } else if (OB_FAIL(get_table_id(tenant_id, database_id, table_name, is_index, - check_type, table_id))){ + check_type, table_id, is_built_in_index))){ LOG_WARN("get table id failed", KR(ret), K(tenant_id), K(database_id), - K(table_name), K(is_index)); + K(table_name), K(is_index), K(is_built_in_index)); } } @@ -2282,7 +2285,8 @@ int ObSchemaGetterGuard::get_simple_table_schema( const ObString &table_name, const bool is_index, const ObSimpleTableSchemaV2 *&simple_table_schema, - bool is_hidden/*false*/) + const bool with_hidden_flag/*false*/, + const bool is_built_in_index/*false*/) { int ret = OB_SUCCESS; const ObSchemaMgr *mgr = NULL; @@ -2304,8 +2308,9 @@ int ObSchemaGetterGuard::get_simple_table_schema( table_name, is_index, simple_table_schema, - is_hidden))) { - LOG_WARN("get simple table failed", KR(ret), K(tenant_id), + with_hidden_flag, + is_built_in_index))) { + LOG_WARN("get simple table failed", KR(ret), K(tenant_id), K(with_hidden_flag), K(is_built_in_index), K(tenant_id), K(database_id), K(table_name), K(is_index)); } return ret; @@ -2317,7 +2322,8 @@ int ObSchemaGetterGuard::get_table_schema( const ObString &table_name, const bool is_index, const ObTableSchema *&table_schema, - bool is_hidden/*false*/) + const bool with_hidden_flag/*false*/, + const bool is_built_in_index/*false*/) { int ret = OB_SUCCESS; const ObSimpleTableSchemaV2 *simple_table = NULL; @@ -2327,9 +2333,10 @@ int ObSchemaGetterGuard::get_table_schema( table_name, is_index, simple_table, - is_hidden))) { + with_hidden_flag, + is_built_in_index))) { LOG_WARN("fail to get simple table schema", KR(ret), K(tenant_id), - K(database_id), K(table_name), K(is_index), K(is_hidden)); + K(database_id), K(table_name), K(is_index), K(with_hidden_flag), K(is_built_in_index)); } else if (NULL == simple_table) { LOG_INFO("table not exist", K(tenant_id), K(database_id), K(table_name), K(is_index)); @@ -2355,7 +2362,8 @@ int ObSchemaGetterGuard::get_table_schema( const ObString &table_name, const bool is_index, const ObTableSchema *&table_schema, - bool is_hidden/*false*/) + const bool with_hidden_flag/*false*/, + const bool is_built_in_index/*false*/) { int ret = OB_SUCCESS; uint64_t database_id = OB_INVALID_ID; @@ -2373,7 +2381,7 @@ int ObSchemaGetterGuard::get_table_schema( } else if (OB_INVALID_ID == database_id) { // do-nothing } else { - ret = get_table_schema(tenant_id, database_id, table_name, is_index, table_schema, is_hidden); + ret = get_table_schema(tenant_id, database_id, table_name, is_index, table_schema, with_hidden_flag, is_built_in_index); } return ret; diff --git a/src/share/schema/ob_schema_getter_guard.h b/src/share/schema/ob_schema_getter_guard.h index 5cf233abe6..7ff14e6c46 100644 --- a/src/share/schema/ob_schema_getter_guard.h +++ b/src/share/schema/ob_schema_getter_guard.h @@ -188,7 +188,8 @@ public: const ObString &table_name, const bool is_index, const ObSimpleTableSchemaV2 *&simple_table_schema, - bool is_hidden = false); + const bool with_hidden_flag = false, + const bool is_built_in_index = false); int get_table_schemas_in_tenant(const uint64_t tenant_id, common::ObIArray &table_schemas); int get_database_schemas_in_tenant(const uint64_t tenant_id, @@ -354,13 +355,15 @@ public: const common::ObString &table_name, const bool is_index, const CheckTableType check_type, // if temporary table is visable - uint64_t &table_id); + uint64_t &table_id, + const bool is_built_in_index = false); int get_table_id(uint64_t tenant_id, const common::ObString &database_name, const common::ObString &table_name, const bool is_index, const CheckTableType check_type, // if temporary table is visable - uint64_t &table_id); + uint64_t &table_id, + const bool is_built_in_index = false); int get_foreign_key_id(const uint64_t tenant_id, const uint64_t database_id, const common::ObString &foreign_key_name, @@ -411,13 +414,15 @@ public: const common::ObString &table_name, const bool is_index, const ObTableSchema *&table_schema, - bool is_hidden = false); + const bool with_hidden_flag = false, + const bool is_built_in_index = false); int get_table_schema(const uint64_t tenant_id, const common::ObString &database_name, const common::ObString &table_name, const bool is_index, const ObTableSchema *&table_schema, - bool is_hidden = false); + const bool with_hidden_flag = false, + const bool is_built_in_index = false); int get_index_schemas_with_data_table_id(const uint64_t tenant_id, const uint64_t data_table_id, ObIArray &aux_schemas); diff --git a/src/share/schema/ob_schema_mgr.cpp b/src/share/schema/ob_schema_mgr.cpp index 20f032739f..e3ff5c332e 100644 --- a/src/share/schema/ob_schema_mgr.cpp +++ b/src/share/schema/ob_schema_mgr.cpp @@ -481,7 +481,7 @@ ObSchemaMgr::ObSchemaMgr() lob_piece_infos_(0, NULL, SET_USE_500(ObModIds::OB_SCHEMA_LOB_PIECE_INFO_VEC, ObCtxIds::SCHEMA_SERVICE)), table_id_map_(SET_USE_500(ObModIds::OB_SCHEMA_TABLE_ID_MAP, ObCtxIds::SCHEMA_SERVICE)), table_name_map_(SET_USE_500(ObModIds::OB_SCHEMA_TABLE_NAME_MAP, ObCtxIds::SCHEMA_SERVICE)), - index_name_map_(SET_USE_500(ObModIds::OB_SCHEMA_INDEX_NAME_MAP, ObCtxIds::SCHEMA_SERVICE)), + normal_index_name_map_(SET_USE_500(ObModIds::OB_SCHEMA_INDEX_NAME_MAP, ObCtxIds::SCHEMA_SERVICE)), aux_vp_name_map_(SET_USE_500(ObModIds::OB_SCHEMA_AUX_VP_NAME_VEC, ObCtxIds::SCHEMA_SERVICE)), outline_mgr_(allocator_), routine_mgr_(allocator_), @@ -505,6 +505,7 @@ ObSchemaMgr::ObSchemaMgr() keystore_mgr_(allocator_), tablespace_mgr_(allocator_), hidden_table_name_map_(SET_USE_500("HiddenTblNames", ObCtxIds::SCHEMA_SERVICE)), + built_in_index_name_map_(SET_USE_500("BuiltInIdxNames", ObCtxIds::SCHEMA_SERVICE)), dblink_mgr_(allocator_), directory_mgr_(allocator_), context_mgr_(allocator_), @@ -536,7 +537,7 @@ ObSchemaMgr::ObSchemaMgr(ObIAllocator &allocator) lob_piece_infos_(0, NULL, SET_USE_500(ObModIds::OB_SCHEMA_LOB_PIECE_INFO_VEC, ObCtxIds::SCHEMA_SERVICE)), table_id_map_(SET_USE_500(ObModIds::OB_SCHEMA_TABLE_ID_MAP, ObCtxIds::SCHEMA_SERVICE)), table_name_map_(SET_USE_500(ObModIds::OB_SCHEMA_TABLE_NAME_MAP, ObCtxIds::SCHEMA_SERVICE)), - index_name_map_(SET_USE_500(ObModIds::OB_SCHEMA_INDEX_NAME_MAP, ObCtxIds::SCHEMA_SERVICE)), + normal_index_name_map_(SET_USE_500(ObModIds::OB_SCHEMA_INDEX_NAME_MAP, ObCtxIds::SCHEMA_SERVICE)), aux_vp_name_map_(SET_USE_500(ObModIds::OB_SCHEMA_AUX_VP_NAME_VEC, ObCtxIds::SCHEMA_SERVICE)), outline_mgr_(allocator_), routine_mgr_(allocator_), @@ -560,6 +561,7 @@ ObSchemaMgr::ObSchemaMgr(ObIAllocator &allocator) keystore_mgr_(allocator_), tablespace_mgr_(allocator_), hidden_table_name_map_(SET_USE_500("HiddenTblNames", ObCtxIds::SCHEMA_SERVICE)), + built_in_index_name_map_(SET_USE_500("BuiltInIdxNames", ObCtxIds::SCHEMA_SERVICE)), dblink_mgr_(allocator_), directory_mgr_(allocator_), context_mgr_(allocator_), @@ -586,7 +588,7 @@ int ObSchemaMgr::init(const uint64_t tenant_id) LOG_WARN("init table id map failed", K(ret)); } else if (OB_FAIL(table_name_map_.init())) { LOG_WARN("init table name map failed", K(ret)); - } else if (OB_FAIL(index_name_map_.init())) { + } else if (OB_FAIL(normal_index_name_map_.init())) { LOG_WARN("init index name map failed", K(ret)); } else if (OB_FAIL(aux_vp_name_map_.init())) { LOG_WARN("init index name map failed", K(ret)); @@ -642,6 +644,8 @@ int ObSchemaMgr::init(const uint64_t tenant_id) LOG_WARN("init rls_context mgr failed", K(ret)); } else if (OB_FAIL(hidden_table_name_map_.init())) { LOG_WARN("init hidden table name map failed", K(ret)); + } else if (OB_FAIL(built_in_index_name_map_.init())) { + LOG_WARN("init built in index name map failed", K(ret)); } else if (OB_FAIL(context_mgr_.init())) { LOG_WARN("init context mgr failed", K(ret)); } else if (OB_FAIL(mock_fk_parent_table_mgr_.init())) { @@ -680,7 +684,7 @@ void ObSchemaMgr::reset() database_name_map_.clear(); table_id_map_.clear(); table_name_map_.clear(); - index_name_map_.clear(); + normal_index_name_map_.clear(); aux_vp_name_map_.clear(); foreign_key_name_map_.clear(); constraint_name_map_.clear(); @@ -709,6 +713,7 @@ void ObSchemaMgr::reset() rls_context_mgr_.reset(); tenant_id_ = OB_INVALID_TENANT_ID; hidden_table_name_map_.clear(); + built_in_index_name_map_.clear(); context_mgr_.reset(); mock_fk_parent_table_mgr_.reset(); mlog_infos_.clear(); @@ -768,12 +773,13 @@ int ObSchemaMgr::assign(const ObSchemaMgr &other) ASSIGN_FIELD(drop_tenant_infos_); ASSIGN_FIELD(table_id_map_); ASSIGN_FIELD(table_name_map_); - ASSIGN_FIELD(index_name_map_); + ASSIGN_FIELD(normal_index_name_map_); ASSIGN_FIELD(aux_vp_name_map_); ASSIGN_FIELD(foreign_key_name_map_); ASSIGN_FIELD(constraint_name_map_); ASSIGN_FIELD(hidden_table_name_map_); ASSIGN_FIELD(mlog_infos_); + ASSIGN_FIELD(built_in_index_name_map_); #undef ASSIGN_FIELD if (OB_SUCC(ret)) { if (OB_FAIL(outline_mgr_.assign(other.outline_mgr_))) { @@ -2686,15 +2692,17 @@ int ObSchemaMgr::add_table( "table_name", new_table_schema->get_table_name()); } } else if (new_table_schema->is_index_table()) { // index is in recyclebin + const bool is_built_in_index = new_table_schema->is_built_in_fts_index(); + IndexNameMap &index_name_map = get_index_name_map_(is_built_in_index); if (new_table_schema->is_in_recyclebin()) { ObIndexSchemaHashWrapper index_name_wrapper(new_table_schema->get_tenant_id(), new_table_schema->get_database_id(), common::OB_INVALID_ID, new_table_schema->get_table_name_str()); - hash_ret = index_name_map_.set_refactored(index_name_wrapper, new_table_schema, over_write); + hash_ret = index_name_map.set_refactored(index_name_wrapper, new_table_schema, over_write); if (OB_SUCCESS != hash_ret && OB_HASH_EXIST != hash_ret) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("build index name hashmap failed", K(ret), K(hash_ret), + LOG_WARN("build index name hashmap failed", K(ret), K(hash_ret), K(is_built_in_index), "table_id", new_table_schema->get_table_id(), "index_name", new_table_schema->get_table_name()); } @@ -2706,10 +2714,10 @@ int ObSchemaMgr::add_table( new_table_schema->get_database_id(), is_oracle_mode ? common::OB_INVALID_ID : new_table_schema->get_data_table_id(), new_table_schema->get_origin_index_name_str()); - hash_ret = index_name_map_.set_refactored(cutted_index_name_wrapper, new_table_schema, over_write); + hash_ret = index_name_map.set_refactored(cutted_index_name_wrapper, new_table_schema, over_write); if (OB_SUCCESS != hash_ret && OB_HASH_EXIST != hash_ret) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("build index name hashmap failed", K(ret), K(hash_ret), + LOG_WARN("build index name hashmap failed", K(ret), K(hash_ret), K(is_built_in_index), K(new_table_schema->get_table_id()), K(new_table_schema->get_data_table_id()), K(new_table_schema->get_origin_index_name_str())); @@ -3227,22 +3235,24 @@ bool ObSchemaMgr::check_schema_meta_consistent() if (table_infos_.count() != table_id_map_.item_count() || table_id_map_.item_count() != (table_name_map_.item_count() + - index_name_map_.item_count() + + normal_index_name_map_.item_count() + aux_vp_name_map_.item_count() + lob_meta_infos_.count() + lob_piece_infos_.count() + - hidden_table_name_map_.item_count())) { + hidden_table_name_map_.item_count() + + built_in_index_name_map_.item_count())) { is_consistent_ = false; LOG_WARN_RET(OB_ERR_UNEXPECTED, "schema meta is not consistent, need rebuild", "schema_mgr version", get_schema_version(), "table_infos_count", table_infos_.count(), "table_id_map_item_count", table_id_map_.item_count(), "table_name_map_item_count", table_name_map_.item_count(), - "index_name_map_item_count", index_name_map_.item_count(), + "index_name_map_item_count", normal_index_name_map_.item_count(), "aux_vp_name_map_item_count", aux_vp_name_map_.item_count(), "lob_meta_infos_count", lob_meta_infos_.count(), "lob_piece_infos_count", lob_piece_infos_.count(), - "hidden_table_map count", hidden_table_name_map_.item_count()); + "hidden_table_map count", hidden_table_name_map_.item_count(), + "built_in_index_map count", built_in_index_name_map_.item_count()); } return is_consistent_; @@ -3285,9 +3295,10 @@ int ObSchemaMgr::rebuild_schema_meta_if_not_consistent() "msg", "duplicate table/database/foreign key/constraint exist", K_(tenant_id), "db_cnt", database_infos_.count(), "db_name_cnt", database_name_map_.item_count(), "table_cnt", table_infos_.count(), "table_id_cnt", table_id_map_.item_count(), - "table_name_cnt", table_name_map_.item_count(), "index_name_cnt", index_name_map_.item_count(), + "table_name_cnt", table_name_map_.item_count(), "index_name_cnt", normal_index_name_map_.item_count(), "aux_vp_name_cnt", aux_vp_name_map_.item_count(), "lob_meta_cnt", lob_meta_infos_.count(), "log_piece_cnt", lob_piece_infos_.count(), "hidden_table_cnt", hidden_table_name_map_.item_count(), + "built_in_index_cnt", built_in_index_name_map_.item_count(), "fk_cnt", fk_cnt, "fk_name_cnt", foreign_key_name_map_.item_count(), "cst_cnt", cst_cnt, "cst_name_cnt", constraint_name_map_.item_count()); right_to_die_or_duty_to_live(); @@ -3419,16 +3430,19 @@ int ObSchemaMgr::del_table(const ObTenantTableId table) ret = OB_HASH_NOT_EXIST != hash_ret ? hash_ret : ret; } } else if (schema_to_del->is_index_table()) { + const bool is_built_in_index = schema_to_del->is_built_in_fts_index(); + IndexNameMap &index_name_map = get_index_name_map_(is_built_in_index); if (schema_to_del->is_in_recyclebin()) { // index is in recyclebin ObIndexSchemaHashWrapper index_schema_wrapper(schema_to_del->get_tenant_id(), schema_to_del->get_database_id(), common::OB_INVALID_ID, schema_to_del->get_table_name_str()); - int hash_ret = index_name_map_.erase_refactored(index_schema_wrapper); + int hash_ret = index_name_map.erase_refactored(index_schema_wrapper); if (OB_SUCCESS != hash_ret) { LOG_WARN("failed delete index from index name hashmap, ", K(ret), K(hash_ret), + K(is_built_in_index), "index_name", schema_to_del->get_table_name()); // 增加增量schema刷新的容错处理,此时不报错,靠rebuild逻辑解 ret = OB_HASH_NOT_EXIST != hash_ret ? hash_ret : ret; @@ -3442,11 +3456,12 @@ int ObSchemaMgr::del_table(const ObTenantTableId table) schema_to_del->get_database_id(), is_oracle_mode ? common::OB_INVALID_ID : schema_to_del->get_data_table_id(), schema_to_del->get_origin_index_name_str()); - hash_ret = index_name_map_.erase_refactored(cutted_index_name_wrapper); + hash_ret = index_name_map.erase_refactored(cutted_index_name_wrapper); if (OB_SUCCESS != hash_ret) { LOG_WARN("failed delete index from index name hashmap, ", K(ret), K(hash_ret), + K(is_built_in_index), K(schema_to_del->get_tenant_id()), K(schema_to_del->get_database_id()), K(schema_to_del->get_data_table_id()), @@ -3508,11 +3523,12 @@ int ObSchemaMgr::del_table(const ObTenantTableId table) if (table_infos_.count() != table_id_map_.item_count() || table_id_map_.item_count() != (table_name_map_.item_count() + - index_name_map_.item_count() + + normal_index_name_map_.item_count() + aux_vp_name_map_.item_count() + lob_meta_infos_.count() + lob_piece_infos_.count() + - hidden_table_name_map_.item_count())) { + hidden_table_name_map_.item_count() + + built_in_index_name_map_.item_count())) { LOG_WARN("table info is non-consistent", "table_infos_count", table_infos_.count(), @@ -3521,7 +3537,7 @@ int ObSchemaMgr::del_table(const ObTenantTableId table) "table_name_map_item_count", table_name_map_.item_count(), "index_name_map_item_count", - index_name_map_.item_count(), + normal_index_name_map_.item_count(), "aux_vp_name_map_item_count", aux_vp_name_map_.item_count(), "lob_meta_infos_count", @@ -3533,7 +3549,9 @@ int ObSchemaMgr::del_table(const ObTenantTableId table) "table_id", table.table_id_, "hidden_table_map_item_count", - hidden_table_name_map_.item_count()); + hidden_table_name_map_.item_count(), + "built_in_index_map_item_count", + built_in_index_name_map_.item_count()); } return ret; @@ -3775,11 +3793,13 @@ int ObSchemaMgr::get_hidden_table_schema( } ERRSIM_POINT_DEF(ERRSIM_INVALID_INDEX_NAME); + int ObSchemaMgr::get_index_schema( const uint64_t tenant_id, const uint64_t database_id, const ObString &table_name, - const ObSimpleTableSchemaV2 *&table_schema) const + const ObSimpleTableSchemaV2 *&table_schema, + const bool is_built_in/* = false*/) const { int ret = OB_SUCCESS; table_schema = NULL; @@ -3799,16 +3819,17 @@ int ObSchemaMgr::get_index_schema( } else { ObSimpleTableSchemaV2 *tmp_schema = NULL; lib::Worker::CompatMode compat_mode = lib::Worker::CompatMode::INVALID; + const IndexNameMap &index_name_map = get_index_name_map_(is_built_in); if (OB_FAIL(ObCompatModeGetter::get_tenant_mode(tenant_id, compat_mode))) { LOG_WARN("fail to get tenant mode", K(ret)); } else if (is_recyclebin_database_id(database_id)) { // in recyclebin const ObIndexSchemaHashWrapper index_name_wrapper( tenant_id, database_id, common::OB_INVALID_ID, table_name); - int hash_ret = index_name_map_.get_refactored(index_name_wrapper, tmp_schema); + int hash_ret = index_name_map.get_refactored(index_name_wrapper, tmp_schema); if (OB_SUCCESS == hash_ret) { if (OB_ISNULL(tmp_schema)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("NULL ptr", K(ret), K(tmp_schema)); + LOG_WARN("NULL ptr", K(ret), K(tenant_id), K(table_name), K(is_built_in), KP(tmp_schema)); } else { table_schema = tmp_schema; } @@ -3836,11 +3857,11 @@ int ObSchemaMgr::get_index_schema( && !is_mysql_sys_database_id(database_id); const ObIndexSchemaHashWrapper cutted_index_name_wrapper(tenant_id, database_id, is_oracle_mode ? common::OB_INVALID_ID : data_table_id, cutted_index_name); - int hash_ret = index_name_map_.get_refactored(cutted_index_name_wrapper, tmp_schema); + int hash_ret = index_name_map.get_refactored(cutted_index_name_wrapper, tmp_schema); if (OB_SUCCESS == hash_ret) { if (OB_ISNULL(tmp_schema)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("NULL ptr", K(ret), K(tmp_schema)); + LOG_WARN("NULL ptr", K(ret), K(is_built_in), K(tmp_schema)); } else { table_schema = tmp_schema; } @@ -3864,16 +3885,16 @@ int ObSchemaMgr::deep_copy_index_name_map( } else { // index_name_cache will destory or not init, so sub_map_mem_size should be set first // to reduce dynamic memory allocation and avoid error. - (void) index_name_cache.set_sub_map_mem_size(index_name_map_.get_sub_map_mem_size()); + (void) index_name_cache.set_sub_map_mem_size(normal_index_name_map_.get_sub_map_mem_size()); if (OB_FAIL(index_name_cache.init())) { LOG_WARN("init index name cache failed", KR(ret)); } } for (int64_t sub_map_id = 0; - OB_SUCC(ret) && sub_map_id < index_name_map_.get_sub_map_count(); + OB_SUCC(ret) && sub_map_id < normal_index_name_map_.get_sub_map_count(); sub_map_id++) { - auto it = index_name_map_.begin(sub_map_id); - auto end = index_name_map_.end(sub_map_id); + IndexNameMap::iterator it = normal_index_name_map_.begin(sub_map_id); + IndexNameMap::iterator end = normal_index_name_map_.end(sub_map_id); for (; OB_SUCC(ret) && it != end; ++it) { const ObSimpleTableSchemaV2 *index_schema = *it; void *buf = NULL; @@ -3932,16 +3953,17 @@ int ObSchemaMgr::get_table_schema(const uint64_t tenant_id, const ObString &table_name, const bool is_index, const ObSimpleTableSchemaV2 *&table_schema, - bool is_hidden/*false*/) const + const bool with_hidden_flag/*false*/, + const bool is_built_in_index/*false*/) const { int ret = OB_SUCCESS; - if (OB_UNLIKELY(is_hidden)) { + if (OB_UNLIKELY(with_hidden_flag)) { ret = get_hidden_table_schema(tenant_id, database_id, table_name, table_schema); } else { if (!is_index) { ret = get_table_schema(tenant_id, database_id, session_id, table_name, table_schema); } else { - ret = get_index_schema(tenant_id, database_id, table_name, table_schema); + ret = get_index_schema(tenant_id, database_id, table_name, table_schema, is_built_in_index); } } return ret; @@ -4713,6 +4735,8 @@ int ObSchemaMgr::deal_with_change_table_state(const ObSimpleTableSchemaV2 &old_t // non-hidden table to hidden table if (old_table_schema.is_index_table()) { bool is_oracle_mode = false; + const bool is_built_in_index = old_table_schema.is_built_in_fts_index(); + IndexNameMap &index_name_map = get_index_name_map_(is_built_in_index); if (OB_FAIL(old_table_schema.check_if_oracle_compat_mode(is_oracle_mode))) { LOG_WARN("fail to check if tenant mode is oracle mode", K(ret)); } else if (old_table_schema.is_in_recyclebin()) { // index is in recyclebin @@ -4720,10 +4744,10 @@ int ObSchemaMgr::deal_with_change_table_state(const ObSimpleTableSchemaV2 &old_t old_table_schema.get_database_id(), common::OB_INVALID_ID, old_table_schema.get_table_name_str()); - int hash_ret = index_name_map_.erase_refactored(index_name_wrapper); + int hash_ret = index_name_map.erase_refactored(index_name_wrapper); if (OB_SUCCESS != hash_ret) { LOG_WARN("fail to delete index from index name hashmap", - K(ret), K(hash_ret), K(old_table_schema.get_table_name_str())); + K(ret), K(hash_ret), K(is_built_in_index), K(old_table_schema.get_table_name_str())); // increase the fault-tolerant processing of incremental schema refresh ret = OB_HASH_NOT_EXIST != hash_ret ? hash_ret : ret; } @@ -4736,10 +4760,10 @@ int ObSchemaMgr::deal_with_change_table_state(const ObSimpleTableSchemaV2 &old_t old_table_schema.get_database_id(), is_oracle_mode ? common::OB_INVALID_ID : old_table_schema.get_data_table_id(), cutted_index_name); - int hash_ret = index_name_map_.erase_refactored(cutted_index_name_wrapper); + int hash_ret = index_name_map.erase_refactored(cutted_index_name_wrapper); if (OB_SUCCESS != hash_ret) { LOG_WARN("failed delete index from index name hashmap, ", - K(ret), K(hash_ret), K(cutted_index_name)); + K(ret), K(hash_ret), K(is_built_in_index), K(cutted_index_name)); // increase the fault-tolerant processing of incremental schema refresh ret = OB_HASH_NOT_EXIST != hash_ret ? hash_ret : ret; } @@ -4811,7 +4835,9 @@ int ObSchemaMgr::deal_with_table_rename( K(new_table_name)); bool is_system_table = false; if (old_table_schema.is_index_table()) { + const bool is_built_in_index = old_table_schema.is_built_in_fts_index(); bool is_oracle_mode = false; + IndexNameMap &index_name_map = get_index_name_map_(is_built_in_index); if (OB_FAIL(old_table_schema.check_if_oracle_compat_mode(is_oracle_mode))) { LOG_WARN("fail to check if tenant mode is oracle mode", K(ret)); } else if (old_table_schema.is_in_recyclebin()) { // index is in recyclebin @@ -4819,10 +4845,10 @@ int ObSchemaMgr::deal_with_table_rename( old_table_schema.get_database_id(), common::OB_INVALID_ID, old_table_schema.get_table_name_str()); - int hash_ret = index_name_map_.erase_refactored(index_name_wrapper); + int hash_ret = index_name_map.erase_refactored(index_name_wrapper); if (OB_SUCCESS != hash_ret) { LOG_WARN("fail to delete index from index name hashmap", - K(ret), K(hash_ret), K(old_table_name)); + K(ret), K(hash_ret), K(is_built_in_index), K(old_table_name)); // 增加增量schema刷新的容错处理,此时不报错,靠rebuild逻辑解 ret = OB_HASH_NOT_EXIST != hash_ret ? hash_ret : ret; } @@ -4835,10 +4861,10 @@ int ObSchemaMgr::deal_with_table_rename( old_table_schema.get_database_id(), is_oracle_mode ? common::OB_INVALID_ID : old_table_schema.get_data_table_id(), cutted_index_name); - int hash_ret = index_name_map_.erase_refactored(cutted_index_name_wrapper); + int hash_ret = index_name_map.erase_refactored(cutted_index_name_wrapper); if (OB_SUCCESS != hash_ret) { LOG_WARN("failed delete index from index name hashmap, ", - K(ret), K(hash_ret), K(cutted_index_name)); + K(ret), K(hash_ret), K(is_built_in_index), K(cutted_index_name)); // Increase the fault-tolerant processing of incremental schema refresh, no error is reported at this time, // and the solution is solved by rebuild logic ret = OB_HASH_NOT_EXIST != hash_ret ? hash_ret : ret; @@ -4940,11 +4966,12 @@ int ObSchemaMgr::rebuild_table_hashmap(uint64_t &fk_cnt, uint64_t &cst_cnt) } else { table_id_map_.clear(); table_name_map_.clear(); - index_name_map_.clear(); + normal_index_name_map_.clear(); aux_vp_name_map_.clear(); foreign_key_name_map_.clear(); constraint_name_map_.clear(); hidden_table_name_map_.clear(); + built_in_index_name_map_.clear(); ObSimpleTableSchemaV2 *table_schema = NULL; // It is expected that OB_HASH_EXIST should not appear in the rebuild process int over_write = 0; @@ -5001,18 +5028,20 @@ int ObSchemaMgr::rebuild_table_hashmap(uint64_t &fk_cnt, uint64_t &cst_cnt) LOG_TRACE("index is", "table_id", table_schema->get_table_id(), "database_id", table_schema->get_database_id(), "table_name", table_schema->get_table_name_str()); + const bool is_built_in_index = table_schema->is_built_in_fts_index(); + IndexNameMap &index_name_map = get_index_name_map_(is_built_in_index); // oracle mode and index is not in recyclebin if (table_schema->is_in_recyclebin()) { ObIndexSchemaHashWrapper index_name_wrapper(table_schema->get_tenant_id(), table_schema->get_database_id(), common::OB_INVALID_ID, table_schema->get_table_name_str()); - hash_ret = index_name_map_.set_refactored(index_name_wrapper, table_schema, over_write); + hash_ret = index_name_map.set_refactored(index_name_wrapper, table_schema, over_write); if (OB_SUCCESS != hash_ret) { ret = OB_HASH_EXIST == hash_ret ? OB_SUCCESS : OB_ERR_UNEXPECTED; - tmp_ret = index_name_map_.get_refactored(index_name_wrapper, exist_schema); + tmp_ret = index_name_map.get_refactored(index_name_wrapper, exist_schema); LOG_ERROR("build index name hashmap failed", - KR(ret), KR(hash_ret), K(tmp_ret), + KR(ret), KR(hash_ret), K(tmp_ret), K(is_built_in_index), "exist_table_id", OB_NOT_NULL(exist_schema) ? exist_schema->get_table_id() : OB_INVALID_ID, "exist_database_id", OB_NOT_NULL(exist_schema) ? exist_schema->get_database_id() : OB_INVALID_ID, "index_name", OB_NOT_NULL(exist_schema) ? exist_schema->get_table_name() : "", @@ -5028,12 +5057,12 @@ int ObSchemaMgr::rebuild_table_hashmap(uint64_t &fk_cnt, uint64_t &cst_cnt) table_schema->get_database_id(), is_oracle_mode ? common::OB_INVALID_ID : table_schema->get_data_table_id(), table_schema->get_origin_index_name_str()); - hash_ret = index_name_map_.set_refactored(cutted_index_name_wrapper, table_schema, over_write); + hash_ret = index_name_map.set_refactored(cutted_index_name_wrapper, table_schema, over_write); if (OB_SUCCESS != hash_ret) { ret = OB_HASH_EXIST == hash_ret ? OB_SUCCESS : OB_ERR_UNEXPECTED; - tmp_ret = index_name_map_.get_refactored(cutted_index_name_wrapper, exist_schema); + tmp_ret = index_name_map.get_refactored(cutted_index_name_wrapper, exist_schema); LOG_ERROR("build index name hashmap failed", - KR(ret), KR(hash_ret), K(tmp_ret), + KR(ret), KR(hash_ret), K(tmp_ret), K(is_built_in_index), "exist_table_id", OB_NOT_NULL(exist_schema) ? exist_schema->get_table_id() : OB_INVALID_ID, "exist_database_id", OB_NOT_NULL(exist_schema) ? exist_schema->get_database_id() : OB_INVALID_ID, "index_name", OB_NOT_NULL(exist_schema) ? exist_schema->get_origin_index_name_str() : "", @@ -5145,7 +5174,7 @@ int ObSchemaMgr::get_idx_schema_by_origin_idx_name(const uint64_t tenant_id, const ObIndexSchemaHashWrapper index_name_wrapper( tenant_id, database_id, common::OB_INVALID_ID, ori_index_name); lib::CompatModeGuard g(lib::Worker::CompatMode::ORACLE); - int hash_ret = index_name_map_.get_refactored(index_name_wrapper, tmp_schema); + int hash_ret = normal_index_name_map_.get_refactored(index_name_wrapper, tmp_schema); if (OB_SUCCESS == hash_ret) { if (OB_ISNULL(tmp_schema)) { ret = OB_ERR_UNEXPECTED; diff --git a/src/share/schema/ob_schema_mgr.h b/src/share/schema/ob_schema_mgr.h index 2e32e3e071..f782d4d651 100644 --- a/src/share/schema/ob_schema_mgr.h +++ b/src/share/schema/ob_schema_mgr.h @@ -591,7 +591,8 @@ public: const common::ObString &table_name, const bool is_index, const ObSimpleTableSchemaV2 *&table_schema, - bool is_hidden = false) const; + const bool with_hidden_flag = false, + const bool is_built_in_index = false) const; int get_table_schema( const uint64_t tenant_id, const uint64_t database_id, @@ -606,7 +607,8 @@ public: const uint64_t tenant_id, const uint64_t database_id, const common::ObString &table_name, - const ObSimpleTableSchemaV2 *&table_schema) const; + const ObSimpleTableSchemaV2 *&table_schema, + const bool is_built_in = false) const; int get_idx_schema_by_origin_idx_name(const uint64_t tenant_id, const uint64_t database_id, const common::ObString &index_name, @@ -905,6 +907,14 @@ private: int reserved_mem_for_tables_( const common::ObIArray &table_schemas); + IndexNameMap &get_index_name_map_(const bool is_built_in) + { + return is_built_in ? built_in_index_name_map_ : normal_index_name_map_; + } + const IndexNameMap &get_index_name_map_(const bool is_built_in) const + { + return is_built_in ? built_in_index_name_map_ : normal_index_name_map_; + } private: common::ObArenaAllocator local_allocator_; common::ObIAllocator &allocator_; @@ -923,7 +933,7 @@ private: TableInfos lob_piece_infos_; TableIdMap table_id_map_; TableNameMap table_name_map_; - IndexNameMap index_name_map_; + IndexNameMap normal_index_name_map_; AuxVPNameMap aux_vp_name_map_; ObOutlineMgr outline_mgr_; ObRoutineMgr routine_mgr_; @@ -946,7 +956,14 @@ private: DropTenantInfos drop_tenant_infos_; ObKeystoreMgr keystore_mgr_; ObTablespaceMgr tablespace_mgr_; + // Map of tables with HIDDEN flag (is_user_hidden_table()) TableNameMap hidden_table_name_map_; + // Map of index tables with following attributes: + // 1. with no HIDDEN flag:is_user_hidden_table() == false + // 2. system built-in index tables when creating index + // 3. they are not visible to users, and their names are not in normal index name space. Their names + // are not conflicted with normal index names + IndexNameMap built_in_index_name_map_; ObDbLinkMgr dblink_mgr_; ObDirectoryMgr directory_mgr_; ObContextMgr context_mgr_; diff --git a/src/share/schema/ob_schema_printer.cpp b/src/share/schema/ob_schema_printer.cpp index 738b2d5e6e..bcf8be5a3f 100644 --- a/src/share/schema/ob_schema_printer.cpp +++ b/src/share/schema/ob_schema_printer.cpp @@ -602,6 +602,14 @@ int ObSchemaPrinter::print_single_index_definition(const ObTableSchema *index_sc // !is_alter_table_add for show create table // is_alter_table_add for dbms_metadata.get_ddl getting uk cst info SHARE_SCHEMA_LOG(WARN, "fail to print comma", K(ret)); + } else if (index_schema->is_multivalue_index()) { + if (!index_schema->is_unique_index() && + OB_FAIL(databuff_printf(buf, buf_len, pos, " MULTIVALUE KEY "))) { + SHARE_SCHEMA_LOG(WARN, "fail to print FULLTEXT KEY", K(ret)); + } else if (index_schema->is_unique_index() && + OB_FAIL(databuff_printf(buf, buf_len, pos, " UNIQUE MULTIVALUE KEY "))) { + SHARE_SCHEMA_LOG(WARN, "fail to print FULLTEXT KEY", K(ret)); + } } else if (index_schema->is_unique_index()) { if (is_oracle_mode) { if (index_schema->is_sys_generated_name(false/*check_unknown*/)) { @@ -619,7 +627,7 @@ int ObSchemaPrinter::print_single_index_definition(const ObTableSchema *index_sc SHARE_SCHEMA_LOG(WARN, "fail to print UNIQUE KEY", K(ret)); } } - } else if (index_schema->is_domain_index()) { + } else if (index_schema->is_fts_index()) { if (OB_FAIL(databuff_printf(buf, buf_len, pos, " FULLTEXT KEY "))) { SHARE_SCHEMA_LOG(WARN, "fail to print FULLTEXT KEY", K(ret)); } @@ -649,7 +657,6 @@ int ObSchemaPrinter::print_single_index_definition(const ObTableSchema *index_sc int64_t rowkey_count = index_rowkey_info.get_size(); ObColumnSchemaV2 last_col; bool is_valid_col = false; - ObArray ctxcat_cols; for (int64_t k = 0; OB_SUCC(ret) && k < index_column_num; k++) { const ObRowkeyColumn *rowkey_column = index_rowkey_info.get_column(k); const ObColumnSchemaV2 *col = NULL; @@ -661,9 +668,11 @@ int ObSchemaPrinter::print_single_index_definition(const ObTableSchema *index_sc rowkey_column->column_id_))) { ret = OB_SCHEMA_ERROR; SHARE_SCHEMA_LOG(WARN, "fail to get column schema", K(ret), KPC(index_schema)); + } else if (index_schema->is_fts_index() && col->is_doc_id_column()) { + // skip doc id for fts index. } else if (!col->is_shadow_column()) { if (OB_SUCC(ret) && is_valid_col) { - if (OB_FAIL(print_index_column(table_schema, last_col, ctxcat_cols, false /* not last one */, buf, buf_len, pos))) { + if (OB_FAIL(print_index_column(table_schema, last_col, false /* not last one */, buf, buf_len, pos))) { SHARE_SCHEMA_LOG(WARN, "fail to print index column", K(last_col), K(ret)); } } @@ -683,14 +692,12 @@ int ObSchemaPrinter::print_single_index_definition(const ObTableSchema *index_sc } } if (OB_SUCC(ret)) { - if (OB_FAIL(print_index_column(table_schema, last_col, ctxcat_cols, true /* last column */, buf, buf_len, pos))) { + if (OB_FAIL(print_index_column(table_schema, last_col, true /* last column */, buf, buf_len, pos))) { SHARE_SCHEMA_LOG(WARN, "fail to print column name", K(ret), K(last_col)); - } else if (!strict_compat_ && OB_FAIL(print_table_definition_fulltext_indexs(is_oracle_mode, ctxcat_cols, buf, buf_len, pos))) { - LOG_WARN("print table definition fulltext indexs failed", K(ret)); } else { /*do nothing*/ } } // show storing columns in index - if (OB_SUCC(ret) && !strict_compat_ && !is_no_key_options(sql_mode)) { + if (OB_SUCC(ret) && !strict_compat_ && !is_no_key_options(sql_mode) && !index_schema->is_fts_index()) { int64_t column_count = index_schema->get_column_count(); if (column_count >= rowkey_count) { bool first_storing_column = true; @@ -796,6 +803,8 @@ int ObSchemaPrinter::print_table_definition_indexes(const ObTableSchema &table_s index_schema->is_global_local_index_table())) { // For strictly compatible with MySQL, // Do not print global index. + } else if (index_schema->is_built_in_fts_index()) { + // For full-text search index, only inverted table can be printed, and others table will not be printed. } else if (OB_FAIL(print_single_index_definition(index_schema, table_schema, arena_allocator, buf, buf_len, pos, is_unique_index, is_oracle_mode, false, sql_mode, tz_info))) { LOG_WARN("print single index definition failed", K(ret)); @@ -916,7 +925,6 @@ int ObSchemaPrinter::print_table_definition_constraints(const ObTableSchema &tab int ObSchemaPrinter::print_fulltext_index_column(const ObTableSchema &table_schema, const ObColumnSchemaV2 &column, - ObIArray &ctxcat_cols, bool is_last, char *buf, int64_t buf_len, @@ -952,8 +960,6 @@ int ObSchemaPrinter::print_fulltext_index_column(const ObTableSchema &table_sche } else if (OB_FAIL(databuff_printf(buf, buf_len, pos, is_last && j == ctxcat_ids.count() - 1 ? ")" : ", "))) { SHARE_SCHEMA_LOG(WARN, "fail to print column name", K(ret), K(column)); - } else if (OB_FAIL(ctxcat_cols.push_back(ctxcat_column->get_column_name_str()))) { - LOG_WARN("get fulltext index column failed", K(ret)); } else { /*do nothing*/ } } } @@ -1091,7 +1097,6 @@ int ObSchemaPrinter::print_ordinary_index_column_expr(const ObColumnSchemaV2 &co int ObSchemaPrinter::print_index_column(const ObTableSchema &table_schema, const ObColumnSchemaV2 &column, - ObIArray &ctxcat_cols, bool is_last, char *buf, int64_t buf_len, @@ -1107,7 +1112,6 @@ int ObSchemaPrinter::print_index_column(const ObTableSchema &table_schema, if (column.is_fulltext_column()) { if (OB_FAIL(print_fulltext_index_column(table_schema, column, - ctxcat_cols, is_last, buf, buf_len, @@ -1674,10 +1678,14 @@ int ObSchemaPrinter::print_table_definition_table_options(const ObTableSchema &t SHARE_SCHEMA_LOG(WARN, "fail to print collate", K(ret), K(table_schema)); } } - if (OB_SUCC(ret) && table_schema.is_domain_index() + if (OB_SUCC(ret) && table_schema.is_fts_index() && !is_no_key_options(sql_mode) && !table_schema.get_parser_name_str().empty()) { - if (OB_FAIL(databuff_printf(buf, buf_len, pos, "WITH PARSER '%s' ", table_schema.get_parser_name()))) { - SHARE_SCHEMA_LOG(WARN, "print parser name failed", K(ret)); + storage::ObFTParser parser; + if (OB_FAIL(parser.parse_from_str(table_schema.get_parser_name_str().ptr(), table_schema.get_parser_name_str().length()))) { + LOG_WARN("fail to parse name from cstring", K(ret), K(table_schema.get_parser_name_str())); + } else if (OB_FAIL(databuff_printf(buf, buf_len, pos, "WITH PARSER %.*s ", parser.get_parser_name().len(), + parser.get_parser_name().str()))) { + SHARE_SCHEMA_LOG(WARN, "print parser name failed", K(ret), K(parser)); } } if (OB_SUCCESS == ret && !is_index_tbl && !is_no_table_options(sql_mode) && !table_schema.is_external_table()) { @@ -1718,8 +1726,8 @@ int ObSchemaPrinter::print_table_definition_table_options(const ObTableSchema &t SHARE_SCHEMA_LOG(WARN, "fail to print block size", K(ret), K(table_schema)); } } - if (OB_SUCCESS == ret && !strict_compat_ && is_index_tbl && !table_schema.is_domain_index() - && !is_no_key_options(sql_mode)) { + if (OB_SUCCESS == ret && !strict_compat_ && is_index_tbl && !table_schema.is_fts_index() + && !table_schema.is_multivalue_index() && !is_no_key_options(sql_mode)) { const char* local_flag = table_schema.is_global_index_table() || table_schema.is_global_local_index_table() ? "GLOBAL " : "LOCAL "; @@ -2184,7 +2192,7 @@ int ObSchemaPrinter::print_table_definition_table_options( OB_LOG(WARN, "fail to print collate", K(ret), K(table_schema)); } } - if (OB_SUCC(ret) && table_schema.is_domain_index()) { + if (OB_SUCC(ret) && table_schema.is_fts_index()) { if (full_text_columns.count() <= 0 || OB_UNLIKELY(virtual_column_id == OB_INVALID_ID)) { ret = OB_ERR_UNEXPECTED; OB_LOG(WARN, "invalid domain index infos", K(full_text_columns), K(virtual_column_id)); @@ -2231,7 +2239,7 @@ int ObSchemaPrinter::print_table_definition_table_options( OB_LOG(WARN, "fail to print block size", K(ret), K(table_schema)); } } - if (OB_SUCC(ret) && !strict_compat_ && is_index_tbl && !table_schema.is_domain_index()) { + if (OB_SUCC(ret) && !strict_compat_ && is_index_tbl && !table_schema.is_fts_index() && !table_schema.is_multivalue_index()) { const char* local_flag = table_schema.is_global_index_table() || table_schema.is_global_local_index_table() ? "GLOBAL " : "LOCAL "; @@ -2687,7 +2695,7 @@ int ObSchemaPrinter::print_index_table_definition( : "CREATE UNIQUE INDEX "))) { OB_LOG(WARN, "fail to print create table prefix", K(ret), K(table_schema->get_table_name())); } - } else if (index_table_schema->is_domain_index()) { + } else if (index_table_schema->is_fts_index()) { if (OB_FAIL(databuff_printf(buf, buf_len, pos, !is_oracle_mode ? "CREATE FULLTEXT INDEX if not exists " : "CREATE FULLTEXT INDEX "))) { diff --git a/src/share/schema/ob_schema_printer.h b/src/share/schema/ob_schema_printer.h index 80fa851dfc..5b044120de 100644 --- a/src/share/schema/ob_schema_printer.h +++ b/src/share/schema/ob_schema_printer.h @@ -182,14 +182,12 @@ public: int64_t& pos) const; int print_index_column(const ObTableSchema &table_schema, const ObColumnSchemaV2 &column, - common::ObIArray &ctxcat_cols, bool is_last, char *buf, int64_t buf_len, int64_t &pos) const; int print_fulltext_index_column(const ObTableSchema &table_schema, const ObColumnSchemaV2 &column, - common::ObIArray &ctxcat_cols, bool is_last, char *buf, int64_t buf_len, diff --git a/src/share/schema/ob_schema_struct.h b/src/share/schema/ob_schema_struct.h index fc6afa7b26..3806c62fb2 100755 --- a/src/share/schema/ob_schema_struct.h +++ b/src/share/schema/ob_schema_struct.h @@ -133,7 +133,7 @@ static const uint64_t OB_MIN_ID = 0;//used for lower_bound //the high 32-bit flag isn't stored in __all_column #define GENERATED_DEPS_CASCADE_FLAG (INT64_C(1) << 32) -#define GENERATED_CTXCAT_CASCADE_FLAG (INT64_C(1) << 33) +#define GENERATED_FTS_WORD_COUNT_COLUMN_FLAG (INT64_C(1) << 33) // word count column for full-text search index #define TABLE_PART_KEY_COLUMN_FLAG (INT64_C(1) << 34) #define TABLE_ALIAS_NAME_FLAG (INT64_C(1) << 35) /* create table t1(c1 int, c2 as (c1+1)) partition by hash(c2) partitions 2 @@ -299,7 +299,7 @@ enum ObIndexType INDEX_TYPE_NORMAL_GLOBAL = 3, INDEX_TYPE_UNIQUE_GLOBAL = 4, INDEX_TYPE_PRIMARY = 5, - INDEX_TYPE_DOMAIN_CTXCAT = 6, + INDEX_TYPE_DOMAIN_CTXCAT_DEPRECATED = 6, /* create table t1(c1 int primary key, c2 int); * create index i1 on t1(c2) * i1 is a global index. @@ -314,18 +314,17 @@ enum ObIndexType INDEX_TYPE_SPATIAL_GLOBAL = 11, INDEX_TYPE_SPATIAL_GLOBAL_LOCAL_STORAGE = 12, // new index types for fts - INDEX_TYPE_FTS_ROWKEY_DOC_LOCAL = 13, - INDEX_TYPE_FTS_DOC_ROWKEY_LOCAL = 14, + INDEX_TYPE_ROWKEY_DOC_ID_LOCAL = 13, + INDEX_TYPE_DOC_ID_ROWKEY_LOCAL = 14, INDEX_TYPE_FTS_INDEX_LOCAL = 15, INDEX_TYPE_FTS_DOC_WORD_LOCAL = 16, - INDEX_TYPE_FTS_DOC_ROWKEY_GLOBAL = 17, + INDEX_TYPE_DOC_ID_ROWKEY_GLOBAL = 17, INDEX_TYPE_FTS_INDEX_GLOBAL = 18, INDEX_TYPE_FTS_DOC_WORD_GLOBAL = 19, - INDEX_TYPE_FTS_DOC_ROWKEY_GLOBAL_LOCAL_STORAGE = 20, + INDEX_TYPE_DOC_ID_ROWKEY_GLOBAL_LOCAL_STORAGE = 20, INDEX_TYPE_FTS_INDEX_GLOBAL_LOCAL_STORAGE = 21, INDEX_TYPE_FTS_DOC_WORD_GLOBAL_LOCAL_STORAGE = 22, - - // new index types for multivalue index + // new index types for json multivalue index INDEX_TYPE_NORMAL_MULTIVALUE_LOCAL = 23, INDEX_TYPE_UNIQUE_MULTIVALUE_LOCAL = 24, /* @@ -597,6 +596,101 @@ inline bool is_available_index_status(const ObIndexStatus index_status) const char *ob_index_status_str(ObIndexStatus status); +inline bool is_local_fts_index(const ObIndexType index_type) +{ + return index_type == INDEX_TYPE_ROWKEY_DOC_ID_LOCAL || + index_type == INDEX_TYPE_DOC_ID_ROWKEY_LOCAL || + index_type == INDEX_TYPE_FTS_INDEX_LOCAL || + index_type == INDEX_TYPE_FTS_DOC_WORD_LOCAL; +} + +inline bool is_global_local_fts_index(const ObIndexType index_type) +{ + return index_type == INDEX_TYPE_DOC_ID_ROWKEY_GLOBAL_LOCAL_STORAGE || + index_type == INDEX_TYPE_FTS_INDEX_GLOBAL_LOCAL_STORAGE || + index_type == INDEX_TYPE_FTS_DOC_WORD_GLOBAL_LOCAL_STORAGE; +} + +inline bool is_global_fts_index(const ObIndexType index_type) +{ + return index_type == INDEX_TYPE_DOC_ID_ROWKEY_GLOBAL || + index_type == INDEX_TYPE_FTS_INDEX_GLOBAL || + index_type == INDEX_TYPE_FTS_DOC_WORD_GLOBAL; +} + +inline bool is_local_multivalue_index(const ObIndexType index_type) +{ + return index_type == INDEX_TYPE_ROWKEY_DOC_ID_LOCAL || + index_type == INDEX_TYPE_DOC_ID_ROWKEY_LOCAL || + index_type == INDEX_TYPE_NORMAL_MULTIVALUE_LOCAL || + index_type == INDEX_TYPE_UNIQUE_MULTIVALUE_LOCAL; +} + +inline bool is_doc_rowkey_aux(const ObIndexType index_type) +{ + return index_type == INDEX_TYPE_DOC_ID_ROWKEY_LOCAL || + index_type == INDEX_TYPE_DOC_ID_ROWKEY_GLOBAL || + index_type == INDEX_TYPE_DOC_ID_ROWKEY_GLOBAL_LOCAL_STORAGE; +} + +inline bool is_rowkey_doc_aux(const ObIndexType index_type) +{ + return index_type == INDEX_TYPE_ROWKEY_DOC_ID_LOCAL; +} + +inline bool is_fts_index_aux(const ObIndexType index_type) +{ + return index_type == INDEX_TYPE_FTS_INDEX_LOCAL || + index_type == INDEX_TYPE_FTS_INDEX_GLOBAL || + index_type == INDEX_TYPE_FTS_INDEX_GLOBAL_LOCAL_STORAGE; +} + +inline bool is_fts_doc_word_aux(const ObIndexType index_type) +{ + return INDEX_TYPE_FTS_DOC_WORD_LOCAL == index_type + || INDEX_TYPE_FTS_DOC_WORD_GLOBAL == index_type + || INDEX_TYPE_FTS_DOC_WORD_GLOBAL_LOCAL_STORAGE == index_type; +} + +inline bool is_multivalue_index_aux(const ObIndexType index_type) +{ + return index_type == INDEX_TYPE_NORMAL_MULTIVALUE_LOCAL || + index_type == INDEX_TYPE_UNIQUE_MULTIVALUE_LOCAL; +} + +inline bool is_built_in_multivalue_index(const ObIndexType index_type) +{ + return is_rowkey_doc_aux(index_type) + || is_doc_rowkey_aux(index_type); +} + +inline bool is_built_in_fts_index(const ObIndexType index_type) +{ + return is_rowkey_doc_aux(index_type) + || is_doc_rowkey_aux(index_type) + || is_fts_doc_word_aux(index_type); +} + +inline bool is_multivalue_index(const ObIndexType index_type) +{ + return is_multivalue_index_aux(index_type) || is_built_in_multivalue_index(index_type); +} + +inline bool is_fts_index(const ObIndexType index_type) +{ + return is_fts_index_aux(index_type) || is_built_in_fts_index(index_type); +} + +inline bool is_fts_or_multivalue_index(ObIndexType index_type) +{ + return is_multivalue_index(index_type) || is_fts_index(index_type); +} + +inline bool is_fts_or_multivalue_index_aux(ObIndexType index_type) +{ + return is_multivalue_index_aux(index_type) || is_fts_index_aux(index_type); +} + inline bool is_index_local_storage(ObIndexType index_type) { return INDEX_TYPE_NORMAL_LOCAL == index_type @@ -604,9 +698,12 @@ inline bool is_index_local_storage(ObIndexType index_type) || INDEX_TYPE_NORMAL_GLOBAL_LOCAL_STORAGE == index_type || INDEX_TYPE_UNIQUE_GLOBAL_LOCAL_STORAGE == index_type || INDEX_TYPE_PRIMARY == index_type - || INDEX_TYPE_DOMAIN_CTXCAT == index_type + || INDEX_TYPE_DOMAIN_CTXCAT_DEPRECATED == index_type || INDEX_TYPE_SPATIAL_LOCAL == index_type - || INDEX_TYPE_SPATIAL_GLOBAL_LOCAL_STORAGE == index_type; + || INDEX_TYPE_SPATIAL_GLOBAL_LOCAL_STORAGE == index_type + || is_local_fts_index(index_type) + || is_global_local_fts_index(index_type) + || is_local_multivalue_index(index_type); } inline bool is_related_table( @@ -628,7 +725,9 @@ inline bool index_has_tablet(const ObIndexType &index_type) || INDEX_TYPE_UNIQUE_GLOBAL == index_type || INDEX_TYPE_SPATIAL_LOCAL == index_type || INDEX_TYPE_SPATIAL_GLOBAL == index_type - || INDEX_TYPE_SPATIAL_GLOBAL_LOCAL_STORAGE == index_type; + || INDEX_TYPE_SPATIAL_GLOBAL_LOCAL_STORAGE == index_type + || is_fts_index(index_type) + || is_multivalue_index(index_type); } struct ObTenantTableId diff --git a/src/share/schema/ob_schema_utils.cpp b/src/share/schema/ob_schema_utils.cpp index ed926935c1..17db7ca033 100644 --- a/src/share/schema/ob_schema_utils.cpp +++ b/src/share/schema/ob_schema_utils.cpp @@ -123,16 +123,26 @@ int ObSchemaUtils::cascaded_generated_column(ObTableSchema &table_schema, root_expr_type, columns_names))) { LOG_WARN("get generated column expr failed", K(ret)); } else if (T_FUN_SYS_WORD_SEGMENT == root_expr_type) { - column.add_column_flag(GENERATED_CTXCAT_CASCADE_FLAG); + column.add_column_flag(GENERATED_FTS_WORD_SEGMENT_COLUMN_FLAG); + } else if (T_FUN_SYS_WORD_COUNT == root_expr_type) { + column.add_column_flag(GENERATED_FTS_WORD_COUNT_COLUMN_FLAG); + } else if (T_FUN_SYS_DOC_LENGTH == root_expr_type) { + column.add_column_flag(GENERATED_FTS_DOC_LENGTH_COLUMN_FLAG); } else if (T_FUN_SYS_SPATIAL_CELLID == root_expr_type || T_FUN_SYS_SPATIAL_MBR == root_expr_type) { column.add_column_flag(SPATIAL_INDEX_GENERATED_COLUMN_FLAG); + } else if (T_FUN_SYS_JSON_QUERY == root_expr_type) { + if (strstr(col_def.ptr(), "multivalue)")) { + column.add_column_flag(MULTIVALUE_INDEX_GENERATED_ARRAY_COLUMN_FLAG); + } else { + column.add_column_flag(MULTIVALUE_INDEX_GENERATED_COLUMN_FLAG); + } } else { LOG_DEBUG("succ to resolve_generated_column_info", K(col_def), K(root_expr_type), K(columns_names), K(table_schema)); } } // TODO: materialized view - if (OB_SUCC(ret) && resolve_dependencies && (table_schema.is_table() + if (OB_SUCC(ret) && resolve_dependencies && !column.is_doc_id_column() && (table_schema.is_table() || table_schema.is_tmp_table())) { for (int64_t i = 0; OB_SUCC(ret) && i < columns_names.count(); ++i) { if (OB_ISNULL(col_schema = table_schema.get_column_schema(columns_names.at(i)))) { @@ -170,6 +180,16 @@ bool ObSchemaUtils::is_virtual_generated_column(uint64_t flag) return flag & VIRTUAL_GENERATED_COLUMN_FLAG; } +bool ObSchemaUtils::is_multivalue_generated_column(uint64_t flag) +{ + return flag & MULTIVALUE_INDEX_GENERATED_COLUMN_FLAG; +} + +bool ObSchemaUtils::is_multivalue_generated_array_column(uint64_t flag) +{ + return flag & MULTIVALUE_INDEX_GENERATED_ARRAY_COLUMN_FLAG; +} + bool ObSchemaUtils::is_stored_generated_column(uint64_t flag) { return flag & STORED_GENERATED_COLUMN_FLAG; @@ -205,9 +225,32 @@ bool ObSchemaUtils::is_default_expr_v2_column(uint64_t flag) return flag & DEFAULT_EXPR_V2_COLUMN_FLAG; } -bool ObSchemaUtils::is_fulltext_column(uint64_t flag) +bool ObSchemaUtils::is_fulltext_column(const uint64_t flag) { - return flag & GENERATED_CTXCAT_CASCADE_FLAG; + return is_doc_id_column(flag) + || is_word_segment_column(flag) + || is_word_count_column(flag) + || is_doc_length_column(flag); +} + +bool ObSchemaUtils::is_doc_id_column(const uint64_t flag) +{ + return flag & GENERATED_DOC_ID_COLUMN_FLAG; +} + +bool ObSchemaUtils::is_word_segment_column(const uint64_t flag) +{ + return flag & GENERATED_FTS_WORD_SEGMENT_COLUMN_FLAG; +} + +bool ObSchemaUtils::is_word_count_column(const uint64_t flag) +{ + return flag & GENERATED_FTS_WORD_COUNT_COLUMN_FLAG; +} + +bool ObSchemaUtils::is_doc_length_column(const uint64_t flag) +{ + return flag & GENERATED_FTS_DOC_LENGTH_COLUMN_FLAG; } bool ObSchemaUtils::is_spatial_generated_column(uint64_t flag) diff --git a/src/share/schema/ob_schema_utils.h b/src/share/schema/ob_schema_utils.h index d4e7b62dea..1449744322 100644 --- a/src/share/schema/ob_schema_utils.h +++ b/src/share/schema/ob_schema_utils.h @@ -75,7 +75,13 @@ public: static bool is_invisible_column(uint64_t flag); static bool is_cte_generated_column(uint64_t flag); static bool is_default_expr_v2_column(uint64_t flag); - static bool is_fulltext_column(uint64_t flag); + static bool is_fulltext_column(const uint64_t flag); + static bool is_doc_id_column(const uint64_t flag); + static bool is_word_segment_column(const uint64_t flag); + static bool is_word_count_column(const uint64_t flag); + static bool is_doc_length_column(const uint64_t flag); + static bool is_multivalue_generated_column(uint64_t flag); + static bool is_multivalue_generated_array_column(uint64_t flag); static bool is_spatial_generated_column(uint64_t flag); static bool is_generated_column(uint64_t flag) { return is_virtual_generated_column(flag) || is_stored_generated_column(flag); } static bool is_identity_column(uint64_t flag) { return is_always_identity_column(flag) || is_default_identity_column(flag) || is_default_on_null_identity_column(flag); } diff --git a/src/share/schema/ob_table_dml_param.cpp b/src/share/schema/ob_table_dml_param.cpp index ec81b805ad..6a6ee818f9 100644 --- a/src/share/schema/ob_table_dml_param.cpp +++ b/src/share/schema/ob_table_dml_param.cpp @@ -28,14 +28,16 @@ ObTableSchemaParam::ObTableSchemaParam(ObIAllocator &allocator) table_id_(OB_INVALID_ID), schema_version_(OB_INVALID_VERSION), table_type_(MAX_TABLE_TYPE), - index_type_(INDEX_TYPE_MAX), - index_status_(INDEX_STATUS_MAX), + index_type_(INDEX_TYPE_IS_NOT), + index_status_(INDEX_STATUS_NOT_FOUND), shadow_rowkey_column_num_(0), + doc_id_col_id_(OB_INVALID_ID), fulltext_col_id_(OB_INVALID_ID), spatial_geo_col_id_(OB_INVALID_ID), spatial_cellid_col_id_(OB_INVALID_ID), spatial_mbr_col_id_(OB_INVALID_ID), index_name_(), + fts_parser_name_(), columns_(allocator), col_map_(allocator), pk_name_(), @@ -59,14 +61,16 @@ void ObTableSchemaParam::reset() table_id_ = OB_INVALID_ID; schema_version_ = OB_INVALID_VERSION; table_type_ = MAX_TABLE_TYPE; - index_type_ = INDEX_TYPE_MAX; - index_status_ = INDEX_STATUS_MAX; + index_type_ = INDEX_TYPE_IS_NOT; + index_status_ = INDEX_STATUS_NOT_FOUND; shadow_rowkey_column_num_ = 0; + doc_id_col_id_ = OB_INVALID_ID; fulltext_col_id_ = OB_INVALID_ID; spatial_geo_col_id_ = OB_INVALID_ID; spatial_cellid_col_id_ = OB_INVALID_ID; spatial_mbr_col_id_ = OB_INVALID_ID; index_name_.reset(); + fts_parser_name_.reset(); columns_.reset(); col_map_.clear(); pk_name_.reset(); @@ -129,12 +133,34 @@ int ObTableSchemaParam::convert(const ObTableSchema *schema) } else if (OB_FAIL(schema->get_index_info().get_spatial_mbr_col_id(spatial_mbr_col_id_))) { LOG_WARN("fail to get spatial mbr column id", K(ret), K(schema->get_index_info())); } + } else if (schema->is_fts_index_aux() || schema->is_fts_doc_word_aux()) { + if (OB_FAIL(schema->get_fulltext_column_ids(doc_id_col_id_, fulltext_col_id_))) { + LOG_WARN("fail to get fulltext column ids", K(ret)); + } else if (OB_UNLIKELY(doc_id_col_id_ <= OB_APP_MIN_COLUMN_ID || OB_INVALID_ID == doc_id_col_id_ + || fulltext_col_id_ <= OB_APP_MIN_COLUMN_ID || OB_INVALID_ID == fulltext_col_id_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid doc id or fulltext column id", K(ret), K(doc_id_col_id_), K(fulltext_col_id_)); + } else if (OB_FAIL(ob_write_string(allocator_, schema->get_parser_name_str(), fts_parser_name_))) { + LOG_WARN("fail to copy fts parser name", K(ret), K(schema->get_parser_name_str())); + } + } else if (schema->is_multivalue_index_aux()) { + for (int64_t i = 0; OB_SUCC(ret) && i < schema->get_column_count(); ++i) { + const ObColumnSchemaV2 *column_schema = schema->get_column_schema_by_idx(i); + if (OB_ISNULL(column_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, column schema is nullptr", K(ret), K(i), KPC(schema)); + } else if (column_schema->is_doc_id_column()) { + doc_id_col_id_ = column_schema->get_column_id(); + } else if (column_schema->is_multivalue_generated_column()) { + multivalue_col_id_ = column_schema->get_column_id(); + } else if (column_schema->is_multivalue_generated_array_column()) { + multivalue_arr_col_id_ = column_schema->get_column_id(); + } + } } if (OB_FAIL(ret)) { // do nothing - } else if (OB_FAIL(schema->get_index_info().get_fulltext_column(fulltext_col_id_))) { - LOG_WARN("fail to get fulltext column id", K(ret), K(schema->get_index_info())); } else if (OB_FAIL(schema->get_index_name(tmp_name))) { LOG_WARN("fail to get index name", K(ret), K(schema->get_index_info())); } else if (OB_FAIL(ob_write_string(allocator_, tmp_name, index_name_))) { @@ -304,6 +330,17 @@ const ObColumnParam * ObTableSchemaParam::get_column_by_idx(const int64_t idx) c return ptr; } +ObColumnParam * ObTableSchemaParam::get_column_by_idx(const int64_t idx) +{ + ObColumnParam * ptr = NULL; + if (idx < 0 || idx >= columns_.count()) { + LOG_WARN_RET(OB_INVALID_ARGUMENT, "idx out of range", K(idx), K(columns_.count()), K(lbt())); + } else { + ptr = columns_.at(idx); + } + return ptr; +} + const ObColumnParam * ObTableSchemaParam::get_rowkey_column_by_idx(const int64_t idx) const { const ObColumnParam * ptr = NULL; @@ -402,8 +439,10 @@ int64_t ObTableSchemaParam::to_string(char *buf, const int64_t buf_len) const K_(index_type), K_(index_status), K_(shadow_rowkey_column_num), + K_(doc_id_col_id), K_(fulltext_col_id), K_(index_name), + K_(fts_parser_name), K_(pk_name), K_(columns), K_(read_info), @@ -455,6 +494,12 @@ OB_DEF_SERIALIZE(ObTableSchemaParam) OB_UNIS_ENCODE(multivalue_col_id_); OB_UNIS_ENCODE(multivalue_arr_col_id_); OB_UNIS_ENCODE(data_table_rowkey_column_num_); + OB_UNIS_ENCODE(doc_id_col_id_); + if (OB_SUCC(ret)) { + if (OB_FAIL(fts_parser_name_.serialize(buf, buf_len, pos))) { + LOG_WARN("fail to serialize fts parser name", K(ret)); + } + } return ret; } @@ -537,9 +582,44 @@ OB_DEF_DESERIALIZE(ObTableSchemaParam) } } + if (OB_SUCC(ret) && pos == data_len) { + // Here is to solve the compatibility problem and correct the `index_type_` and `index_status_`. + // + // Before version 4.3.1.0, the default value of `index_type_` and `index_sattus_` was max. In + // version 4.3.1.0, the full-text search and json multi-value indexes were introduced. If the + // RPC request from older version observer is received, the `index_type_` will be mistaken for + // a valid json multi-valued index. + // + // Therefore, if there are still unresolved fields here, it means that it is a new version + // observer. It is necessary to re-assign the initial values to the `index_type_` and + // `index_status_` to avoid misjudgment as a valid index. + + + // ATTENTION!!! + // The front-end version is currently only 4.3.0.x, and its value of max index type is 23. + if (23 == index_type_) { + index_type_ = INDEX_TYPE_IS_NOT; + } + // ATTENTION!!! + // The front-end version is currently only 4.3.0.x, and its value of max index status is 8. + if (8 == index_status_) { + index_status_ = INDEX_STATUS_NOT_FOUND; + } + } + OB_UNIS_DECODE(multivalue_col_id_); OB_UNIS_DECODE(multivalue_arr_col_id_); OB_UNIS_DECODE(data_table_rowkey_column_num_); + OB_UNIS_DECODE(doc_id_col_id_) + + if (OB_SUCC(ret) && pos < data_len) { + ObString tmp_name; + if (OB_FAIL(tmp_name.deserialize(buf, data_len, pos))) { + LOG_WARN("fail to deserialize fts parser name", K(ret)); + } else if (OB_FAIL(ob_write_string(allocator_, tmp_name, fts_parser_name_))) { + LOG_WARN("fail to copy fts parser name", K(ret), K(tmp_name)); + } + } return ret; } @@ -585,6 +665,8 @@ OB_DEF_SERIALIZE_SIZE(ObTableSchemaParam) OB_UNIS_ADD_LEN(multivalue_col_id_); OB_UNIS_ADD_LEN(multivalue_arr_col_id_); OB_UNIS_ADD_LEN(data_table_rowkey_column_num_); + OB_UNIS_ADD_LEN(doc_id_col_id_); + len += fts_parser_name_.get_serialize_size(); return len; } diff --git a/src/share/schema/ob_table_dml_param.h b/src/share/schema/ob_table_dml_param.h index da25e4022b..9f5c271f1f 100644 --- a/src/share/schema/ob_table_dml_param.h +++ b/src/share/schema/ob_table_dml_param.h @@ -50,10 +50,16 @@ public: OB_INLINE ObIndexStatus get_index_status() const { return index_status_; } OB_INLINE int64_t get_rowkey_column_num() const { return read_info_.get_schema_rowkey_count(); } OB_INLINE int64_t get_shadow_rowkey_column_num() const { return shadow_rowkey_column_num_; } + OB_INLINE int64_t get_data_table_rowkey_column_num() const { return data_table_rowkey_column_num_; } + OB_INLINE void set_data_table_rowkey_column_num(int64_t cnt) { data_table_rowkey_column_num_ = cnt; } + OB_INLINE int64_t get_doc_id_col_id() const { return doc_id_col_id_; } OB_INLINE int64_t get_fulltext_col_id() const { return fulltext_col_id_; } + OB_INLINE const common::ObString &get_fts_parser_name() const { return fts_parser_name_; } OB_INLINE uint64_t get_spatial_geo_col_id() const { return spatial_geo_col_id_; } OB_INLINE uint64_t get_spatial_cellid_col_id() const { return spatial_cellid_col_id_; } OB_INLINE uint64_t get_spatial_mbr_col_id() const { return spatial_mbr_col_id_; } + OB_INLINE int64_t get_multivalue_col_id() const { return multivalue_col_id_; } + OB_INLINE int64_t get_multivalue_array_col_id() const { return multivalue_arr_col_id_; } OB_INLINE int64_t get_lob_inrow_threshold() const { return lob_inrow_threshold_; } OB_INLINE int64_t get_column_count() const { return columns_.count(); } OB_INLINE const Columns &get_columns() const { return columns_; } @@ -70,12 +76,18 @@ public: OB_INLINE bool is_unique_index() const { return ObTableSchema::is_unique_index(index_type_); } OB_INLINE bool is_domain_index() const { return ObTableSchema::is_domain_index(index_type_); } OB_INLINE bool is_spatial_index() const { return ObTableSchema::is_spatial_index(index_type_); } + OB_INLINE bool is_fts_index() const { return share::schema::is_fts_index(index_type_); } + OB_INLINE bool is_doc_rowkey() const { return share::schema::is_doc_rowkey_aux(index_type_); } + OB_INLINE bool is_fts_index_aux() const { return share::schema::is_fts_index_aux(index_type_); } + OB_INLINE bool is_multivalue_index() const { return share::schema::is_multivalue_index(index_type_); } + OB_INLINE bool is_multivalue_index_aux() const { return share::schema::is_multivalue_index_aux(index_type_); } OB_INLINE bool is_index_local_storage() const { return share::schema::is_index_local_storage(index_type_); } int is_rowkey_column(const uint64_t column_id, bool &is_rowkey) const; int is_column_nullable_for_write(const uint64_t column_id, bool &is_nullable_for_write) const; const ObColumnParam * get_column(const uint64_t column_id) const; const ObColumnParam * get_column_by_idx(const int64_t idx) const; + ObColumnParam * get_column_by_idx(const int64_t idx); const ObColumnParam * get_rowkey_column_by_idx(const int64_t idx) const; int get_rowkey_column_ids(common::ObIArray &column_ids) const; int get_rowkey_column_ids(common::ObIArray &column_ids) const; @@ -101,11 +113,13 @@ private: ObIndexType index_type_; ObIndexStatus index_status_; int64_t shadow_rowkey_column_num_; + uint64_t doc_id_col_id_; uint64_t fulltext_col_id_; uint64_t spatial_geo_col_id_; // geometry column id in data table_schema. uint64_t spatial_cellid_col_id_; // cellid column id in index table_schema. uint64_t spatial_mbr_col_id_; // mbr column id in index table_schema. common::ObString index_name_; + common::ObString fts_parser_name_; //generated storage param from columns_ids_ in ObTableModify, for performance improvement Columns columns_; ColumnMap col_map_; @@ -138,6 +152,7 @@ public: int prepare_storage_param(const common::ObIArray &column_ids); OB_INLINE bool is_valid() const { return data_table_.is_valid(); } OB_INLINE const ObTableSchemaParam & get_data_table() const { return data_table_; } + OB_INLINE ObTableSchemaParam& get_data_table_ref() { return data_table_; } OB_INLINE const ObColDescArray & get_col_descs() const { return col_descs_; } OB_INLINE const ColumnMap &get_col_map() const { return col_map_; } DECLARE_TO_STRING; diff --git a/src/share/schema/ob_table_param.cpp b/src/share/schema/ob_table_param.cpp index d634e9c5a8..cc08065050 100644 --- a/src/share/schema/ob_table_param.cpp +++ b/src/share/schema/ob_table_param.cpp @@ -1261,6 +1261,8 @@ int ObTableParam::convert(const ObTableSchema &table_schema, rowid_projector_, enable_lob_locator_v2_))) { LOG_WARN("fail to construct rowid dep column projector", K(ret)); + } else if (table_schema.is_fts_index() && OB_FAIL(convert_fulltext_index_info(table_schema))) { + LOG_WARN("fail to convert fulltext index info", K(ret)); } else { LOG_DEBUG("construct columns", K(table_id_), K(access_column_ids), K_(main_read_info)); } @@ -1336,6 +1338,12 @@ int ObTableParam::convert_group_by(const ObTableSchema &table_schema, } } } + + if (OB_SUCC(ret)) { + if (table_schema.is_fts_index() && OB_FAIL(convert_fulltext_index_info(table_schema))) { + LOG_WARN("fail to convert fulltext index info", K(ret)); + } + } LOG_DEBUG("[GROUP BY PUSHDOWN]", K(ret), K(output_column_ids), K(aggregate_column_ids), K(group_by_column_ids), K(output_projector_), K(aggregate_projector_), K(group_by_projector_)); return ret; @@ -1498,6 +1506,16 @@ int ObTableParam::convert_column_schema_to_param(const ObColumnSchemaV2 &column_ return ret; } + +int ObTableParam::convert_fulltext_index_info(const ObTableSchema &table_schema) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ob_write_string(allocator_, table_schema.get_parser_name_str(), parser_name_))) { + LOG_WARN("failed to set parser name from table schema", K(ret)); + } + return ret; +} + int64_t ObTableParam::to_string(char *buf, const int64_t buf_len) const { int64_t pos = 0; diff --git a/src/share/schema/ob_table_param.h b/src/share/schema/ob_table_param.h index 1fe05fe943..5b42c39159 100644 --- a/src/share/schema/ob_table_param.h +++ b/src/share/schema/ob_table_param.h @@ -307,6 +307,10 @@ public: inline uint64_t get_table_id() const { return table_id_; } inline int64_t is_spatial_index() const { return is_spatial_index_; } inline void set_is_spatial_index(bool is_spatial_index) { is_spatial_index_ = is_spatial_index; } + inline bool is_fts_index() const { return is_fts_index_; } + inline void set_is_fts_index(const bool is_fts_index) { is_fts_index_ = is_fts_index; } + inline int64_t is_multivalue_index() const { return is_multivalue_index_; } + inline void set_is_multivalue_index(bool is_multivalue_index) { is_multivalue_index_ = is_multivalue_index; } inline bool use_lob_locator() const { return use_lob_locator_; } inline bool enable_lob_locator_v2() const { return enable_lob_locator_v2_; } inline bool &get_enable_lob_locator_v2() { return enable_lob_locator_v2_; } @@ -320,6 +324,7 @@ public: inline const common::ObIArray &get_pad_col_projector() const { return pad_col_projector_; } inline void disable_padding() { pad_col_projector_.reset(); } inline const storage::ObTableReadInfo &get_read_info() const { return main_read_info_; } + inline const ObString &get_parser_name() const { return parser_name_; } inline const common::ObIArray *get_cg_read_infos() const { return cg_read_infos_.empty() ? nullptr : &cg_read_infos_; } @@ -362,6 +367,7 @@ private: int64_t &rowid_version, Projector &rowid_projector, bool is_use_lob_locator_v2); + int convert_fulltext_index_info(const ObTableSchema &table_schema); private: const static int64_t DEFAULT_COLUMN_MAP_BUCKET_NUM = 4; diff --git a/src/share/schema/ob_table_schema.cpp b/src/share/schema/ob_table_schema.cpp index e9339c8e45..d4e5a5a225 100644 --- a/src/share/schema/ob_table_schema.cpp +++ b/src/share/schema/ob_table_schema.cpp @@ -572,7 +572,7 @@ bool ObSimpleTableSchemaV2::is_valid() const ret = false; LOG_WARN("invalid data table_id", K(ret), K(data_table_id_)); } else if (is_index_table() && !is_normal_index() && !is_unique_index() - && !is_domain_index() && !is_spatial_index()) { + && !is_domain_index() && !is_fts_index() && !is_multivalue_index()) { ret = false; LOG_WARN("table_type is not consistent with index_type", "table_type", static_cast(table_type_), @@ -6377,6 +6377,7 @@ int64_t ObTableSchema::to_string(char *buf, const int64_t buf_len) const "constraints", ObArrayWrap(cst_array_, cst_cnt_), "column_array", ObArrayWrap(column_array_, column_cnt_), "aux_vp_tid_array", aux_vp_tid_array_, + K_(index_info), K_(define_user_id), K_(aux_lob_meta_tid), K_(aux_lob_piece_tid), @@ -8220,6 +8221,44 @@ int ObTableSchema::get_spatial_geo_column_id(uint64_t &geo_column_id) const return ret; } +int ObTableSchema::get_fulltext_column_ids(uint64_t &doc_id_col_id, uint64_t &ft_col_id) const +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; OB_SUCC(ret) && i < get_column_count(); ++i) { + const ObColumnSchemaV2 *column_schema = get_column_schema_by_idx(i); + if (OB_ISNULL(column_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, column schema is nullptr", K(ret), K(i), KPC(this)); + } else if (column_schema->is_doc_id_column()) { + doc_id_col_id = column_schema->get_column_id(); + } else if (column_schema->is_word_segment_column()) { + ft_col_id = column_schema->get_column_id(); + } + } + return ret; +} + +int ObTableSchema::get_rowkey_doc_tid(uint64_t &index_table_id) const +{ + int ret = OB_SUCCESS; + ObSEArray simple_index_infos; + index_table_id = OB_INVALID_ID; + if (OB_FAIL(get_simple_index_infos(simple_index_infos))) { + LOG_WARN("get simple_index_infos failed", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < simple_index_infos.count(); ++i) { + if (is_rowkey_doc_aux(simple_index_infos.at(i).index_type_)) { + index_table_id = simple_index_infos.at(i).table_id_; + break; + } + } + if (OB_SUCC(ret) && OB_UNLIKELY(OB_INVALID_ID == index_table_id)) { + ret = OB_ERR_INDEX_KEY_NOT_FOUND; + LOG_DEBUG("not found rowkey doc index", K(ret), K(simple_index_infos)); + } + return ret; +} + int ObTableSchema::check_has_local_index(ObSchemaGetterGuard &schema_guard, bool &has_local_index) const { int ret = OB_SUCCESS; @@ -8246,6 +8285,54 @@ int ObTableSchema::check_has_local_index(ObSchemaGetterGuard &schema_guard, bool return ret; } +int ObTableSchema::check_has_fts_index(ObSchemaGetterGuard &schema_guard, bool &has_fts_index) const +{ + int ret = OB_SUCCESS; + ObSEArray simple_index_infos; + const ObSimpleTableSchemaV2 *index_schema = NULL; + const uint64_t tenant_id = get_tenant_id(); + has_fts_index = false; + if (OB_FAIL(get_simple_index_infos(simple_index_infos))) { + LOG_WARN("get simple_index_infos failed", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < simple_index_infos.count(); ++i) { + if (OB_FAIL(schema_guard.get_simple_table_schema(tenant_id, simple_index_infos.at(i).table_id_, index_schema))) { + LOG_WARN("failed to get table schema", K(ret), K(tenant_id), K(simple_index_infos.at(i).table_id_)); + } else if (OB_ISNULL(index_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cannot get index table schema for table ", K(simple_index_infos.at(i).table_id_)); + } else if (index_schema->is_fts_index_aux() || index_schema->is_fts_doc_word_aux()) { + has_fts_index = true; + break; + } + } + return ret; +} + +int ObTableSchema::check_has_multivalue_index(ObSchemaGetterGuard &schema_guard, bool &has_multivalue_index) const +{ + int ret = OB_SUCCESS; + ObSEArray simple_index_infos; + const ObSimpleTableSchemaV2 *index_schema = NULL; + const uint64_t tenant_id = get_tenant_id(); + has_multivalue_index = false; + if (OB_FAIL(get_simple_index_infos(simple_index_infos))) { + LOG_WARN("get simple_index_infos failed", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < simple_index_infos.count(); ++i) { + if (OB_FAIL(schema_guard.get_simple_table_schema(tenant_id, simple_index_infos.at(i).table_id_, index_schema))) { + LOG_WARN("failed to get table schema", K(ret), K(tenant_id), K(simple_index_infos.at(i).table_id_)); + } else if (OB_ISNULL(index_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cannot get index table schema for table ", K(simple_index_infos.at(i).table_id_)); + } else if (index_schema->is_multivalue_index_aux()) { + has_multivalue_index = true; + break; + } + } + return ret; +} + int ObTableSchema::get_spatial_index_column_ids(common::ObIArray &column_ids) const { // spatial index is a kind of domain index @@ -8923,6 +9010,22 @@ int ObTableSchema::check_is_stored_generated_column_base_column(uint64_t column_ return ret; } +int ObTableSchema::get_doc_id_rowkey_tid(uint64_t &doc_id_rowkey_tid) const +{ + int ret = OB_SUCCESS; + doc_id_rowkey_tid = OB_INVALID_ID; + for (int64_t i = 0; OB_SUCC(ret) && i < simple_index_infos_.count(); ++i) { + if (is_doc_rowkey_aux(simple_index_infos_.at(i).index_type_)) { + doc_id_rowkey_tid = simple_index_infos_.at(i).table_id_; + break; + } + } + if (OB_INVALID_ID == doc_id_rowkey_tid) { + ret = OB_ERR_FT_COLUMN_NOT_INDEXED; + } + return ret; +} + int64_t ObPrintableTableSchema::to_string(char *buf, const int64_t buf_len) const { int64_t pos = 0; diff --git a/src/share/schema/ob_table_schema.h b/src/share/schema/ob_table_schema.h index 591aa36b2b..1da1f321e0 100644 --- a/src/share/schema/ob_table_schema.h +++ b/src/share/schema/ob_table_schema.h @@ -906,8 +906,17 @@ public: { return ObTenantTableId(tenant_id_, data_table_id_); } inline bool should_not_validate_data_index_ckm() const; inline bool should_check_major_merge_progress() const; + inline bool is_multivalue_index() const; + inline bool is_multivalue_index_aux() const; inline bool is_spatial_index() const; - inline static bool is_spatial_index(ObIndexType index_type); + inline static bool is_spatial_index(const ObIndexType index_type); + inline bool is_fts_index() const; + inline bool is_built_in_fts_index() const; + inline bool is_rowkey_doc_id() const; + inline bool is_doc_id_rowkey() const; + inline bool is_fts_index_aux() const; + inline bool is_fts_doc_word_aux() const; + inline bool is_fts_or_multivalue_index() const; inline bool is_normal_index() const; inline bool is_unique_index() const; inline static bool is_unique_index(ObIndexType index_type); @@ -919,7 +928,7 @@ public: inline static bool is_global_unique_index_table(const ObIndexType index_type); inline bool is_local_unique_index_table() const; inline bool is_domain_index() const; - inline static bool is_domain_index(ObIndexType index_type); + inline static bool is_domain_index(const ObIndexType index_type); inline bool is_index_local_storage() const; virtual bool has_tablet() const override; inline bool has_partition() const @@ -1381,6 +1390,7 @@ public: const bool no_virtual = false) const; int get_spatial_geo_column_id(uint64_t &geo_column_id) const; int get_spatial_index_column_ids(common::ObIArray &column_ids) const; + int get_fulltext_column_ids(uint64_t &doc_id_col_id, uint64_t &ft_col_id) const; // get columns for building rowid int get_column_ids_serialize_to_rowid(common::ObIArray &col_ids, @@ -1638,6 +1648,8 @@ public: int sort_column_array_by_column_id(); int check_column_array_sorted_by_column_id(const bool skip_rowkey) const; int check_has_local_index(ObSchemaGetterGuard &schema_guard, bool &has_local_index) const; + int check_has_fts_index(ObSchemaGetterGuard &schema_guard, bool &has_fts_index) const; + int check_has_multivalue_index(ObSchemaGetterGuard &schema_guard, bool &has_multivalue_index) const; int is_real_unique_index_column(ObSchemaGetterGuard &schema_guard, uint64_t column_id, bool &is_uni) const; @@ -1647,8 +1659,10 @@ public: int is_multiple_key_column(ObSchemaGetterGuard &schema_guard, uint64_t column_id, bool &is_mul) const; + int get_doc_id_rowkey_tid(uint64_t &doc_id_rowkey_tid) const; void set_aux_lob_meta_tid(const uint64_t& table_id) { aux_lob_meta_tid_ = table_id; } void set_aux_lob_piece_tid(const uint64_t& table_id) { aux_lob_piece_tid_ = table_id; } + int get_rowkey_doc_tid(uint64_t &index_table_id) const; uint64_t get_aux_lob_meta_tid() const { return aux_lob_meta_tid_; } uint64_t get_aux_lob_piece_tid() const { return aux_lob_piece_tid_; } bool has_lob_column() const; @@ -1895,15 +1909,7 @@ private: inline bool ObSimpleTableSchemaV2::is_index_local_storage() const { return USER_INDEX == table_type_ - // && schema::is_index_local_storage(index_type_); TODO(wangzhennan.wzn): use is_index_local_storage later - && (INDEX_TYPE_NORMAL_LOCAL == index_type_ - || INDEX_TYPE_UNIQUE_LOCAL == index_type_ - || INDEX_TYPE_NORMAL_GLOBAL_LOCAL_STORAGE == index_type_ - || INDEX_TYPE_UNIQUE_GLOBAL_LOCAL_STORAGE == index_type_ - || INDEX_TYPE_PRIMARY == index_type_ - || INDEX_TYPE_DOMAIN_CTXCAT == index_type_ - || INDEX_TYPE_SPATIAL_LOCAL == index_type_ - || INDEX_TYPE_SPATIAL_GLOBAL_LOCAL_STORAGE == index_type_); + && schema::is_index_local_storage(index_type_); } inline bool ObSimpleTableSchemaV2::is_global_index_table() const @@ -1915,7 +1921,8 @@ inline bool ObSimpleTableSchemaV2::is_global_index_table(const ObIndexType index { return INDEX_TYPE_NORMAL_GLOBAL == index_type || INDEX_TYPE_UNIQUE_GLOBAL == index_type - || INDEX_TYPE_SPATIAL_GLOBAL == index_type; + || INDEX_TYPE_SPATIAL_GLOBAL == index_type + || is_global_fts_index(index_type); } inline bool ObSimpleTableSchemaV2::is_global_normal_index_table() const @@ -1944,10 +1951,11 @@ inline bool ObSimpleTableSchemaV2::is_global_local_index_table() const { return INDEX_TYPE_NORMAL_GLOBAL_LOCAL_STORAGE == index_type_ || INDEX_TYPE_UNIQUE_GLOBAL_LOCAL_STORAGE == index_type_ - || INDEX_TYPE_SPATIAL_GLOBAL_LOCAL_STORAGE == index_type_; + || INDEX_TYPE_SPATIAL_GLOBAL_LOCAL_STORAGE == index_type_ + || is_global_local_fts_index(index_type_); } -inline bool ObSimpleTableSchemaV2::is_spatial_index(ObIndexType index_type) +inline bool ObSimpleTableSchemaV2::is_spatial_index(const ObIndexType index_type) { return INDEX_TYPE_SPATIAL_LOCAL == index_type || INDEX_TYPE_SPATIAL_GLOBAL == index_type @@ -1959,6 +1967,46 @@ inline bool ObSimpleTableSchemaV2::is_spatial_index() const return is_spatial_index(index_type_); } +inline bool ObSimpleTableSchemaV2::is_multivalue_index() const +{ + return share::schema::is_multivalue_index(index_type_); +} + +inline bool ObSimpleTableSchemaV2::is_multivalue_index_aux() const +{ + return share::schema::is_multivalue_index_aux(index_type_); +} + +inline bool ObSimpleTableSchemaV2::is_fts_index() const +{ + return share::schema::is_fts_index(index_type_); +} + +inline bool ObSimpleTableSchemaV2::is_built_in_fts_index() const +{ + return share::schema::is_built_in_fts_index(index_type_); +} + +inline bool ObSimpleTableSchemaV2::is_rowkey_doc_id() const +{ + return share::schema::is_rowkey_doc_aux(index_type_); +} + +inline bool ObSimpleTableSchemaV2::is_doc_id_rowkey() const +{ + return share::schema::is_doc_rowkey_aux(index_type_); +} + +inline bool ObSimpleTableSchemaV2::is_fts_index_aux() const +{ + return share::schema::is_fts_index_aux(index_type_); +} + +inline bool ObSimpleTableSchemaV2::is_fts_doc_word_aux() const +{ + return share::schema::is_fts_doc_word_aux(index_type_); +} + inline bool ObSimpleTableSchemaV2::is_normal_index() const { return INDEX_TYPE_NORMAL_LOCAL == index_type_ @@ -1975,7 +2023,8 @@ inline bool ObSimpleTableSchemaV2::is_unique_index(ObIndexType index_type) { return INDEX_TYPE_UNIQUE_LOCAL == index_type || INDEX_TYPE_UNIQUE_GLOBAL == index_type - || INDEX_TYPE_UNIQUE_GLOBAL_LOCAL_STORAGE == index_type; + || INDEX_TYPE_UNIQUE_GLOBAL_LOCAL_STORAGE == index_type + || INDEX_TYPE_UNIQUE_MULTIVALUE_LOCAL == index_type; } inline bool ObSimpleTableSchemaV2::is_domain_index() const @@ -1983,15 +2032,25 @@ inline bool ObSimpleTableSchemaV2::is_domain_index() const return is_domain_index(index_type_); } -inline bool ObSimpleTableSchemaV2::is_domain_index(ObIndexType index_type) +inline bool ObSimpleTableSchemaV2::is_domain_index(const ObIndexType index_type) { - return INDEX_TYPE_DOMAIN_CTXCAT == index_type; + return is_spatial_index(index_type) || + share::schema::is_fts_index_aux(index_type) || + share::schema::is_fts_doc_word_aux(index_type) || + share::schema::is_multivalue_index_aux(index_type); +} + +inline bool ObSimpleTableSchemaV2::is_fts_or_multivalue_index() const +{ + return share::schema::is_fts_or_multivalue_index(index_type_); } inline bool ObSimpleTableSchemaV2::should_not_validate_data_index_ckm() const { // spatial index column is different from data table column, should not validate data & index column checksum - return is_spatial_index(); + // fulltext index cannot validate data by simply column checksum comparision + // multi-value index column is different from data table column, should not validate data & index column checksum + return is_domain_index(); } inline bool ObSimpleTableSchemaV2::should_check_major_merge_progress() const @@ -2194,6 +2253,7 @@ int ObTableSchema::add_column(const ColumnType &column) index_column.type_ = column.get_meta_type(); index_column.fulltext_flag_ = column.is_fulltext_column(); index_column.spatial_flag_ = column.is_spatial_generated_column(); + index_column.multivalue_flag_ = column.is_multivalue_generated_column(); if (index_column.type_.is_decimal_int()) { index_column.type_.set_scale(column.get_accuracy().get_scale()); } diff --git a/src/share/schema/ob_table_sql_service.cpp b/src/share/schema/ob_table_sql_service.cpp index f63ceab74b..81529e3ceb 100644 --- a/src/share/schema/ob_table_sql_service.cpp +++ b/src/share/schema/ob_table_sql_service.cpp @@ -1965,6 +1965,15 @@ int ObTableSqlService::add_table( } else if (tenant_data_version < DATA_VERSION_4_3_0_0 && table.is_materialized_view()) { ret = OB_NOT_SUPPORTED; LOG_WARN("mview is not support before 4.3", KR(ret), K(table)); + } else if (tenant_data_version < DATA_VERSION_4_3_1_0 && (table.is_fts_index() || table.is_multivalue_index())) { + ret = OB_NOT_SUPPORTED; + if (table.is_multivalue_index()) { + LOG_WARN("tenant data version is less than 4.3.1, multivalue index is not supported", K(ret), K(tenant_data_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.3.1, multivalue index"); + } else { + LOG_WARN("tenant data version is less than 4.3.1, fulltext index is not supported", K(ret), K(tenant_data_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.3.1, fulltext index"); + } } else if (OB_FAIL(check_ddl_allowed(table))) { LOG_WARN("check ddl allowd failed", K(ret), K(table)); } else if (OB_FAIL(gen_table_dml(exec_tenant_id, table, update_object_status_ignore_version, dml))) { diff --git a/src/share/text_analysis/ob_text_analyzer.cpp b/src/share/text_analysis/ob_text_analyzer.cpp new file mode 100644 index 0000000000..337052448c --- /dev/null +++ b/src/share/text_analysis/ob_text_analyzer.cpp @@ -0,0 +1,193 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SHARE + +#include "share/text_analysis/ob_text_analyzer.h" + +namespace oceanbase +{ +namespace share +{ + +void ObITextAnalyzer::reset() +{ + for (int64_t i = analyze_pipeline_.count() - 1; i >= 0; --i) { + ObITokenStream *cur_ts = analyze_pipeline_.at(i); + cur_ts->~ObITokenStream(); + if (nullptr != allocator_) { + allocator_->free(cur_ts); + } + } + allocator_ = nullptr; + ctx_ = nullptr; + analyze_pipeline_.reset(); + is_inited_ = false; +} + +int ObITextAnalyzer::init(const ObTextAnalysisCtx &ctx, ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("double init", K(ret)); + } else if (OB_UNLIKELY(!ctx.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid text analysis ctx", K(ret), K(ctx)); + } else if (FALSE_IT(allocator_ = &allocator)) { + } else if (OB_FAIL(inner_init(ctx, allocator))) { + LOG_WARN("failed to inner init analyzer", K(ret), K(ctx)); + } else { + ctx_ = &ctx; + is_inited_ = true; + } + + if (OB_FAIL(ret)) { + reset(); + } + return ret; +} + +int ObITextAnalyzer::add_tokenizer(const ObTextTokenizer::TokenizerType &type) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!analyze_pipeline_.empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("there is already an tokenizer in analyse pipeline", K(ret)); + } else { + ObITokenStream *token_stream = nullptr; + switch (type) { + case ObTextTokenizer::WHITESPACE: { + if (OB_FAIL(add_token_stream(token_stream))) { + LOG_WARN("failed to add token stread", K(ret)); + } + break; + } + default: { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supported tokenizer type", K(ret)); + } + } + if (FAILEDx(analyze_pipeline_.push_back(token_stream))) { + LOG_WARN("failed to add tokenizer to analyse pipeline", K(ret), + K(type), KPC(token_stream), K_(analyze_pipeline)); + } + } + return ret; +} + +int ObITextAnalyzer::add_normalizer( + const ObTokenNormalizer::TokenNormalizerType &type, + const ObTextAnalysisCtx &ctx) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(analyze_pipeline_.empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cannot add a normalizer to an empty analyse pipeline", K(ret), K(type)); + } else { + ObITokenStream *token_stream = nullptr; + switch (type) { + case ObTokenNormalizer::STOPWORD_FILTER: { + if (OB_FAIL(add_token_stream(token_stream))) { + LOG_WARN("failed to add token stop word filter", K(ret)); + } + break; + } + case ObTokenNormalizer::TEXT_GROUPING_FILTER: { + if (OB_FAIL(add_token_stream(token_stream))) { + LOG_WARN("failed to add text grouping filter", K(ret)); + } + break; + } + case ObTokenNormalizer::ENG_BASIC_NORM: { + if (OB_FAIL(add_token_stream(token_stream))) { + LOG_WARN("failed to add basic english normalizer", K(ret)); + } + break; + } + default: { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supported tokenizer type", K(ret)); + } + } + + if (OB_SUCC(ret)) { + ObTokenNormalizer *normalizer = static_cast(token_stream); + if (OB_FAIL(normalizer->init(ctx.cs_, *get_tail_token_stream()))) { + LOG_WARN("failed to init normalizer", K(ret), K(ctx), K(type), KPC(normalizer)); + } else if (OB_FAIL(analyze_pipeline_.push_back(normalizer))) { + LOG_WARN("failed to add normalizer to analyse pipeline", K(ret), + K(type), K(ctx), KPC(normalizer), K_(analyze_pipeline)); + } + } + } + return ret; +} + +template +int ObITextAnalyzer::add_token_stream(ObITokenStream *&token_stream) +{ + int ret = OB_SUCCESS; + char *buf = nullptr; + if (OB_ISNULL(buf = static_cast(allocator_->alloc(sizeof(T))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory", K(ret), K(sizeof(T))); + } else { + token_stream = new (buf) T(); + } + return ret; +} + +ObITokenStream *ObITextAnalyzer::get_tail_token_stream() +{ + OB_ASSERT(!analyze_pipeline_.empty()); + ObITokenStream *tail_stream = analyze_pipeline_.at(analyze_pipeline_.count() - 1); + OB_ASSERT(nullptr != tail_stream); + return tail_stream; +} + +int ObEnglishTextAnalyzer::inner_init(const ObTextAnalysisCtx &ctx, ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + UNUSEDx(ctx); // TODO: generate specific analyse pipeline by ctx + if (OB_FAIL(add_tokenizer(ObTextTokenizer::WHITESPACE))) { + LOG_WARN("failed to add white space tokenizer", K(ret)); + } else if (ctx.filter_stopword_ && OB_FAIL(add_normalizer(ObTokenNormalizer::STOPWORD_FILTER, ctx))) { + LOG_WARN("failed to add stop word filter", K(ret)); + } else if (OB_FAIL(add_normalizer(ObTokenNormalizer::ENG_BASIC_NORM, ctx))) { + LOG_WARN("failed to add basic english normalizer", K(ret)); + } else if (ctx.need_grouping_ && OB_FAIL(add_normalizer(ObTokenNormalizer::TEXT_GROUPING_FILTER, ctx))) { + LOG_WARN("failed to add text grouping filter", K(ret)); + } + return ret; +} + +int ObEnglishTextAnalyzer::analyze(const ObDatum &document, ObITokenStream *&token_stream) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not inited", K(ret)); + } else { + get_tail_token_stream()->reuse(); + ObTextTokenizer *tokenizer = static_cast(analyze_pipeline_.at(0)); + if (OB_FAIL(tokenizer->open(document, ctx_->cs_))) { + LOG_WARN("failed to open tokenizer", K(ret), KPC_(ctx)); + } else { + token_stream = get_tail_token_stream(); + } + } + return ret; +} + +} // namespace share +} // namespace oceanbase diff --git a/src/share/text_analysis/ob_text_analyzer.h b/src/share/text_analysis/ob_text_analyzer.h new file mode 100644 index 0000000000..76f5f689c8 --- /dev/null +++ b/src/share/text_analysis/ob_text_analyzer.h @@ -0,0 +1,89 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_SHARE_OB_TEXT_ANALYZER_H_ +#define OCEANBASE_SHARE_OB_TEXT_ANALYZER_H_ + +#include "share/text_analysis/ob_token_stream.h" +namespace oceanbase +{ +namespace share +{ + +struct ObTextAnalysisCtx final +{ +public: + ObTextAnalysisCtx() + : cs_(nullptr), + filter_stopword_(true), + need_grouping_(false) + {} + ~ObTextAnalysisCtx() = default; + bool is_valid() const { return nullptr != cs_; } + void reset() + { + cs_ = nullptr; + filter_stopword_ = true; + need_grouping_ = false; + } + TO_STRING_KV(KP_(cs), K_(filter_stopword), K_(need_grouping)); +public: + const ObCharsetInfo *cs_; + bool filter_stopword_; + bool need_grouping_; + // language type + // word segment plugin type + // specified normalization tricks +}; + +class ObITextAnalyzer +{ +public: + ObITextAnalyzer() : allocator_(nullptr), ctx_(nullptr), analyze_pipeline_(), is_inited_(false) {} + virtual ~ObITextAnalyzer() { reset(); } + + virtual void reset(); + virtual int init(const ObTextAnalysisCtx &ctx, ObIAllocator &allocator); + virtual int analyze(const ObDatum &document, ObITokenStream *&token_stream) = 0; + VIRTUAL_TO_STRING_KV(KPC_(ctx), K_(analyze_pipeline), K_(is_inited)); +protected: + virtual int inner_init(const ObTextAnalysisCtx &ctx, ObIAllocator &allocator) = 0; + int add_tokenizer(const ObTextTokenizer::TokenizerType &type); + int add_normalizer( + const ObTokenNormalizer::TokenNormalizerType &type, + const ObTextAnalysisCtx &ctx); + ObITokenStream *get_tail_token_stream(); +private: + template + int add_token_stream(ObITokenStream *&token_stream); +protected: + ObIAllocator *allocator_; + const ObTextAnalysisCtx *ctx_; + ObSEArray analyze_pipeline_; + bool is_inited_; +}; + +class ObEnglishTextAnalyzer final : public ObITextAnalyzer +{ +public: + ObEnglishTextAnalyzer() : ObITextAnalyzer() {} + virtual ~ObEnglishTextAnalyzer() {} + + virtual int analyze(const ObDatum &document, ObITokenStream *&token_stream) override; +protected: + virtual int inner_init(const ObTextAnalysisCtx &ctx, ObIAllocator &allocator) override; +}; + +} // namespace share +} // namespace oceanbase + +#endif diff --git a/src/share/text_analysis/ob_token_stream.cpp b/src/share/text_analysis/ob_token_stream.cpp new file mode 100644 index 0000000000..48cb719804 --- /dev/null +++ b/src/share/text_analysis/ob_token_stream.cpp @@ -0,0 +1,430 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SHARE + +#include "share/text_analysis/ob_token_stream.h" +#include "share/rc/ob_tenant_base.h" + +namespace oceanbase +{ +namespace share +{ + +ObTextTokenizer::ObTextTokenizer() + : ObITokenStream(), + input_doc_(nullptr), + cs_(nullptr), + iter_end_(false), + is_inited_(false) +{ +} + +void ObTextTokenizer::reset() +{ + input_doc_ = nullptr; + cs_ = nullptr; + iter_end_ = false; + is_inited_ = false; +} + +int ObTextTokenizer::open(const ObDatum &document, const ObCharsetInfo *cs) +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("double init", K(ret), K_(is_inited), K_(iter_end), K_(cs), KPC_(input_doc)); + } else if (OB_ISNULL(cs)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Charset info is nullptr", K(ret), K(cs)); + } else if (OB_UNLIKELY(document.is_outrow())) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("out row document not supported for tokenizer yet", K(ret), K(document)); + } else { + input_doc_ = &document; + cs_ = cs; + if (document.is_null() || 0 == document.len_) { + iter_end_ = true; + } else if (OB_FAIL(inner_open(document, cs_))) { + LOG_WARN("failed to open document for tokenization", K(ret)); + } else { + is_inited_ = true; + } + } + return ret; +} + +ObTextWhitespaceTokenizer::ObTextWhitespaceTokenizer() + : ObTextTokenizer(), + curr_token_ptr_(nullptr), + trav_pos_(0) +{ +} + +void ObTextWhitespaceTokenizer::reset() +{ + curr_token_ptr_ = nullptr; + trav_pos_ = 0; + ObTextTokenizer::reset(); +} + +int ObTextWhitespaceTokenizer::inner_open(const ObDatum &document, const ObCharsetInfo *cs) +{ + int ret = OB_SUCCESS; + curr_token_ptr_ = nullptr; + trav_pos_ = 0; + return ret; +} + +int ObTextWhitespaceTokenizer::get_next(ObDatum &next_token, int64_t &token_freq) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (iter_end_) { + ret = OB_ITER_END; + } else { + const char *doc = input_doc_->ptr_; + const uint32_t doc_len = get_input_buf_len(); + int64_t token_len = 0; + // to next non-whitespace pos + while (OB_SUCC(ret) && found_delimiter()) { + const int64_t c_len = ob_mbcharlen_ptr(cs_, doc + trav_pos_, doc + doc_len); + trav_pos_ += c_len; + if (trav_pos_ >= doc_len || 0 == c_len) {// if char is invalid, just skip the rest of document + iter_end_ = true; + ret = OB_ITER_END; + } + } + + if (OB_SUCC(ret)) { + curr_token_ptr_ = get_trav_ptr(); + } + + // to next whitespace pos + while (OB_SUCC(ret) && !found_delimiter()) { + const int64_t c_len = ob_mbcharlen_ptr(cs_, doc + trav_pos_, doc + doc_len); + trav_pos_ += c_len; + token_len += c_len; + if (trav_pos_ >= doc_len) { + iter_end_ = true; + break; + } + } + + if (OB_SUCC(ret)) { + next_token.set_string(curr_token_ptr_, static_cast(token_len)); + token_freq = 1; + LOG_DEBUG("[TEXT ANALYSIS] got next token", K(ret), K(next_token), K(next_token.get_string()), + K(input_doc_->len_), K_(trav_pos)); + } + } + return ret; +} + +bool ObTextWhitespaceTokenizer::found_delimiter() +{ + bool found = false; + const char *curr_char = input_doc_->ptr_ + trav_pos_; + found = ob_isspace(cs_, *curr_char) || ob_iscntrl(cs_, *curr_char) || ob_ispunct(cs_, *curr_char); + return found; +} + +ObTokenNormalizer::ObTokenNormalizer() + : in_stream_(nullptr), + cs_(nullptr), + is_inited_(false) +{ +} + +void ObTokenNormalizer::reset() +{ + in_stream_ = nullptr; + cs_ = nullptr; + is_inited_ = false; +} + +void ObTokenNormalizer::reuse() +{ + if (nullptr != in_stream_) { + in_stream_->reuse(); + } +} + +int ObTokenNormalizer::init(const ObCharsetInfo *cs, ObITokenStream &in_stream) +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("double initialization", K(ret)); + } else if (OB_ISNULL(cs)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argment, charset info is nullptr", K(ret), KP(cs)); + } else { + in_stream_ = &in_stream; + cs_ = cs; + if (OB_FAIL(inner_init(cs_, in_stream))) { + LOG_WARN("failed to inner init token normalizer", K(ret)); + } + is_inited_ = true; + } + return ret; +} + +int ObTokenStopWordNormalizer::get_next(ObDatum &next_token, int64_t &token_freq) +{ + int ret = OB_SUCCESS; + bool found_next_valid_token = false; + token_freq = 0; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not inited", K(ret)); + } + + while (OB_SUCC(ret) && !found_next_valid_token) { + // Only filter out pure punctuation / control mark tokens for now + if (OB_FAIL(in_stream_->get_next(next_token, token_freq))) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to get next token from in stream", K(ret), KPC_(in_stream)); + } + } else if (OB_FAIL(filter_special_marks(next_token, found_next_valid_token))) { + LOG_WARN("failed to filter special marks", K(ret), K(next_token), KP_(cs), KPC_(in_stream)); + } else if (!found_next_valid_token) { + next_token.reset(); + } + } + return ret; +} + +int ObTokenStopWordNormalizer::filter_special_marks(const ObDatum &check_token, bool &is_valid) +{ + int ret = OB_SUCCESS; + const int64_t token_len = check_token.len_; + const char *token = check_token.ptr_; + int64_t special_mark_cnt = 0; + for (int64_t i = 0; i < token_len; ++i) { + const char *character = token + i; + if (ob_ispunct(cs_, *character) || ob_iscntrl(cs_, *character)) { + special_mark_cnt++; + } + } + is_valid = !(special_mark_cnt == token_len); + return ret; +} + +ObBasicEnglishNormalizer::ObBasicEnglishNormalizer() + : ObTokenNormalizer(), + norm_allocator_("TxtTokGrpFilter", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()) +{ +} + +void ObBasicEnglishNormalizer::reset() +{ + norm_allocator_.reset(); + ObTokenNormalizer::reset(); +} + +void ObBasicEnglishNormalizer::reuse() +{ + norm_allocator_.reuse(); + ObTokenNormalizer::reuse(); +} + +int ObBasicEnglishNormalizer::get_next(ObDatum &next_token, int64_t &token_freq) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else { + next_token.reset(); + token_freq = 0; + norm_allocator_.reuse(); + ObDatum tmp_datum; + bool found_alnum = false; + uint32_t norm_token_len = 0; + const char *norm_token_ptr = nullptr; + while (OB_SUCC(ret) && !found_alnum) { + tmp_datum.reset(); + if (OB_FAIL(in_stream_->get_next(tmp_datum, token_freq))) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to get next datum", K(ret)); + } + } else { + // trim leading and trailing non-alnum characters + const char *token = tmp_datum.ptr_; + const uint32_t raw_token_len = tmp_datum.len_; + uint32_t first_alnum_pos = 0; + uint32_t last_alnum_pos = raw_token_len - 1; + for (uint32_t i = 0; i < raw_token_len; ++i) { + const char *character = token + i; + if (ob_isalnum(cs_, *character)) { + first_alnum_pos = i; + found_alnum = true; + break; + } + } + + for (uint32_t i = raw_token_len - 1; i >= first_alnum_pos; --i) { + const char *character = token + i; + if (ob_isalnum(cs_, *character)) { + last_alnum_pos = i; + found_alnum = true; + break; + } + } + + if (!found_alnum) { + // skip + } else if (OB_UNLIKELY(last_alnum_pos < first_alnum_pos)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected alnum char pos", K(ret), K(last_alnum_pos), K(first_alnum_pos)); + } else { + norm_token_len = last_alnum_pos - first_alnum_pos + 1; + norm_token_ptr = token + first_alnum_pos; + } + } + } + + if (OB_FAIL(ret)) { + } else if (OB_UNLIKELY(!found_alnum)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected unfounded alnum in token", K(ret), K(tmp_datum)); + } else { + ObString norm_alnum_token(norm_token_len, norm_token_ptr); + ObString norm_lower_token; + if (OB_FAIL(ObCharset::tolower(cs_, norm_alnum_token, norm_lower_token, norm_allocator_))) { + LOG_WARN("norm token to lower case failed", K(ret), K_(cs), K(norm_alnum_token)); + } else { + next_token.set_string(norm_lower_token); + } + } + } + return ret; +} + + +ObTextTokenGroupNormalizer::ObTextTokenGroupNormalizer() + : ObTokenNormalizer(), + token_allocator_("TxtTokGrpFilter", OB_MALLOC_MIDDLE_BLOCK_SIZE, MTL_ID()), + grouping_map_(), + map_iter_(), + map_end_iter_(), + in_stream_iter_end_(false) +{ +} + +void ObTextTokenGroupNormalizer::reset() +{ + grouping_map_.destroy(); + in_stream_iter_end_ = false; + token_allocator_.reset(); + ObTokenNormalizer::reset(); +} + +void ObTextTokenGroupNormalizer::reuse() +{ + grouping_map_.reuse(); + in_stream_iter_end_ = false; + token_allocator_.reuse(); + ObTokenNormalizer::reuse(); +} + +int ObTextTokenGroupNormalizer::inner_init(const ObCharsetInfo *cs, ObITokenStream &in_stream) +{ + // TODO: use resaonable initialize bucket cnt + int ret = OB_SUCCESS; + if (OB_ISNULL(cs)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments, charset info is nullptr", K(ret), KP(cs)); + } else if (OB_FAIL(grouping_map_.create( + DEFAULT_HASH_MAP_BUCKET_CNT, + "TxtTokGrpHash", + "TxtTokGrpHash", + MTL_ID()))) { + LOG_WARN("failed to create grouping hash map", K(ret), K(MTL_ID())); + } + return ret; +} + +int ObTextTokenGroupNormalizer::get_next(ObDatum &next_token, int64_t &token_freq) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not inited", K(ret)); + } else if (!in_stream_iter_end_ && OB_FAIL(build_grouping_map())) { + LOG_WARN("failed to build text token grouping map", K(ret)); + } + + if (OB_SUCC(ret)) { + if (map_iter_ == map_end_iter_) { + ret = OB_ITER_END; + } else { + next_token.set_string(map_iter_->first); + token_freq = map_iter_->second; + ++map_iter_; + } + } + return ret; +} + +int ObTextTokenGroupNormalizer::build_grouping_map() +{ + int ret = OB_SUCCESS; + ObDatum token; + if (OB_UNLIKELY(in_stream_iter_end_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("already iterated in stream", K(ret), K_(in_stream_iter_end), KPC_(in_stream)); + } + + while (OB_SUCC(ret)) { + int64_t grouped_token_freq = 0; + int64_t curr_token_freq = 0; + if (OB_FAIL(in_stream_->get_next(token, curr_token_freq))) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to get next token from in stream", K(ret), KPC_(in_stream)); + } + } else { + ObString token_string = token.get_string(); + int hash_ret = grouping_map_.get_refactored(token_string, grouped_token_freq); + if (OB_HASH_NOT_EXIST == hash_ret) { + ObString copied_token_string; + if (OB_FAIL(ob_write_string(token_allocator_, token_string, copied_token_string))) { + LOG_WARN("failed to copy token string", K(ret), K(token_string)); + } else if (OB_FAIL(grouping_map_.set_refactored(copied_token_string, curr_token_freq))) { + LOG_WARN("failed to put first token in grouping map", K(ret), K(token), K(copied_token_string)); + } + } else if (OB_SUCCESS == hash_ret) { + // add token_freq in hash map directly, to avoid deep copy token string + *grouping_map_.get(token_string) += curr_token_freq; + } else { + ret = hash_ret; + LOG_WARN("failed to get value from grouping map", K(ret), K(token), K(token_string)); + } + } + } + + if (OB_UNLIKELY(ret != OB_ITER_END)) { + LOG_WARN("failed to iterate in token stream", K(ret), KPC_(in_stream)); + } else { + ret = OB_SUCCESS; + map_end_iter_ = grouping_map_.end(); + map_iter_ = grouping_map_.begin(); + in_stream_iter_end_ = true; + } + return ret; +} + +} // namespace share +} // namespace oceanbase diff --git a/src/share/text_analysis/ob_token_stream.h b/src/share/text_analysis/ob_token_stream.h new file mode 100644 index 0000000000..a73a2bd893 --- /dev/null +++ b/src/share/text_analysis/ob_token_stream.h @@ -0,0 +1,158 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_SHARE_OB_TOKEN_STREAM_H_ +#define OCEANBASE_SHARE_OB_TOKEN_STREAM_H_ + +#include "share/datum/ob_datum.h" +namespace oceanbase +{ +namespace share +{ + +class ObITokenStream +{ +public: + ObITokenStream() {} + virtual ~ObITokenStream() {} + virtual void reset() = 0; + virtual void reuse() = 0; + virtual int get_next(ObDatum &next_token, int64_t &token_freq) = 0; + virtual int get_next(ObDatum &next_token) + { + int64_t token_freq = 0; + return get_next(next_token, token_freq); + } + + DECLARE_PURE_VIRTUAL_TO_STRING; +}; + +class ObTextTokenizer : public ObITokenStream +{ +public: + enum TokenizerType : uint8_t + { + WHITESPACE = 0, + MAX + }; + ObTextTokenizer(); + virtual ~ObTextTokenizer() {} + int open(const ObDatum &document, const ObCharsetInfo *cs); + virtual void reset() override; + virtual void reuse() override { reset(); } +protected: + virtual int inner_open(const ObDatum &doc, const ObCharsetInfo *cs) { return OB_NOT_IMPLEMENT; } + VIRTUAL_TO_STRING_KV(KPC_(input_doc), KP_(cs), K_(iter_end), K_(is_inited)); +protected: + const ObDatum *input_doc_; + const ObCharsetInfo *cs_; + bool iter_end_; + bool is_inited_; +}; + +// tokenize by whitespace and special marks +class ObTextWhitespaceTokenizer final : public ObTextTokenizer +{ +public: + ObTextWhitespaceTokenizer(); + virtual ~ObTextWhitespaceTokenizer() {} + virtual int get_next(ObDatum &next_token, int64_t &token_freq) override; + virtual void reset() override; +protected: + virtual int inner_open(const ObDatum &document, const ObCharsetInfo *cs) override; + INHERIT_TO_STRING_KV("ObTextTokenizer", ObTextTokenizer, K_(trav_pos), KP_(curr_token_ptr)); +private: + bool found_delimiter(); + const char *get_trav_ptr() { return input_doc_->ptr_ + trav_pos_; } + uint32_t get_input_buf_len() { return input_doc_->len_; } +private: + const char *curr_token_ptr_; + int64_t trav_pos_; +}; + +class ObTokenNormalizer : public ObITokenStream +{ +public: + enum TokenNormalizerType : uint8_t + { + STOPWORD_FILTER = 0, + TEXT_GROUPING_FILTER = 1, + ENG_BASIC_NORM = 2, + MAX + }; + ObTokenNormalizer(); + virtual ~ObTokenNormalizer() {} + virtual void reset(); + virtual void reuse() override; + virtual int init(const ObCharsetInfo *cs, ObITokenStream &in_stream); + VIRTUAL_TO_STRING_KV(KPC_(in_stream), KP_(cs), K_(is_inited)); +protected: + virtual int inner_init(const ObCharsetInfo *cs, ObITokenStream &in_stream) { return OB_SUCCESS; } +protected: + ObITokenStream *in_stream_; + const ObCharsetInfo *cs_; + bool is_inited_; +}; + +// filter by punctuation mark, control mark or stop word dictionary +class ObTokenStopWordNormalizer final : public ObTokenNormalizer +{ +public: + ObTokenStopWordNormalizer() : ObTokenNormalizer() {} + virtual ~ObTokenStopWordNormalizer() {} + + virtual int get_next(ObDatum &next_token, int64_t &token_freq) override; +private: + int filter_special_marks(const ObDatum &check_token, bool &is_valid); + // int check_stop_words(const ObDatum &check_token, bool &is_valid); +}; + +// remove leading / trailing punctuations and to_lower case alphabetic characters +class ObBasicEnglishNormalizer final : public ObTokenNormalizer +{ +public: + ObBasicEnglishNormalizer(); + virtual ~ObBasicEnglishNormalizer() {}; + + virtual void reset() override; + virtual void reuse() override; + virtual int get_next(ObDatum &next_token, int64_t &token_freq) override; +private: + ObArenaAllocator norm_allocator_; +}; + +// group (deduplicate) and count +class ObTextTokenGroupNormalizer final : public ObTokenNormalizer +{ +public: + ObTextTokenGroupNormalizer(); + virtual ~ObTextTokenGroupNormalizer() { reset(); } + virtual void reset() override; + virtual void reuse() override; + // Do we need to keep the order of tokens after grouping? + virtual int get_next(ObDatum &next_token, int64_t &token_freq) override; +private: + virtual int inner_init(const ObCharsetInfo *cs, ObITokenStream &in_stream) override; + int build_grouping_map(); +private: + static const int64_t DEFAULT_HASH_MAP_BUCKET_CNT = 128; + ObArenaAllocator token_allocator_; + common::hash::ObHashMap grouping_map_; + common::hash::ObHashMap::const_iterator map_iter_; + common::hash::ObHashMap::const_iterator map_end_iter_; + bool in_stream_iter_end_; +}; + +}; // namespace share +}; // namespace oceanbase + +#endif // OCEANBASE_SHARE_OB_TOKEN_STREAM_H_ diff --git a/src/sql/CMakeLists.txt b/src/sql/CMakeLists.txt index ba38e479e5..75598e2cb6 100644 --- a/src/sql/CMakeLists.txt +++ b/src/sql/CMakeLists.txt @@ -37,12 +37,11 @@ ob_set_subtarget(ob_sql common_mixed ) ob_set_subtarget(ob_sql das - das/ob_group_scan_iter.cpp - das/ob_das_group_scan_op.cpp das/ob_das_context.cpp das/ob_das_define.cpp das/ob_das_delete_op.cpp das/ob_das_dml_ctx_define.cpp + das/ob_domain_index_lookup_op.cpp das/ob_das_extra_data.cpp das/ob_das_factory.cpp das/ob_das_insert_op.cpp @@ -62,8 +61,14 @@ ob_set_subtarget(ob_sql das das/ob_das_spatial_index_lookup_op.cpp das/ob_das_retry_ctrl.cpp das/ob_das_simple_op.cpp + das/ob_das_domain_utils.cpp das/ob_text_retrieval_op.cpp das/ob_das_attach_define.cpp + das/iter/ob_das_iter.cpp + das/iter/ob_das_merge_iter.cpp + das/iter/ob_das_lookup_iter.cpp + das/iter/ob_das_group_fold_iter.cpp + das/iter/ob_das_iter_utils.cpp ) ob_set_subtarget(ob_sql dtl @@ -303,6 +308,7 @@ ob_set_subtarget(ob_sql engine_expr engine/expr/ob_expr_bit_or.cpp engine/expr/ob_expr_bit_right_shift.cpp engine/expr/ob_expr_bit_xor.cpp + engine/expr/ob_expr_bm25.cpp engine/expr/ob_expr_bool.cpp engine/expr/ob_expr_calc_partition_id.cpp engine/expr/ob_expr_calc_urowid.cpp @@ -346,6 +352,8 @@ ob_set_subtarget(ob_sql engine_expr engine/expr/ob_expr_div.cpp engine/expr/ob_expr_dll_udf.cpp engine/expr/ob_expr_dml_event.cpp + engine/expr/ob_expr_doc_id.cpp + engine/expr/ob_expr_doc_length.cpp engine/expr/ob_expr_effective_tenant.cpp engine/expr/ob_expr_effective_tenant_id.cpp engine/expr/ob_expr_elt.cpp @@ -430,6 +438,7 @@ ob_set_subtarget(ob_sql engine_expr engine/expr/ob_expr_json_unquote.cpp engine/expr/ob_expr_json_valid.cpp engine/expr/ob_expr_json_value.cpp + engine/expr/ob_expr_json_utils.cpp engine/expr/ob_expr_json_query.cpp engine/expr/ob_expr_json_exists.cpp engine/expr/ob_expr_json_type.cpp @@ -625,6 +634,8 @@ ob_set_subtarget(ob_sql engine_expr engine/expr/ob_expr_week_of_func.cpp engine/expr/ob_expr_weight_string.cpp engine/expr/ob_expr_width_bucket.cpp + engine/expr/ob_expr_word_count.cpp + engine/expr/ob_expr_word_segment.cpp engine/expr/ob_expr_wrapper_inner.cpp engine/expr/ob_expr_xor.cpp engine/expr/ob_infix_expression.cpp diff --git a/src/sql/code_generator/ob_dml_cg_service.cpp b/src/sql/code_generator/ob_dml_cg_service.cpp index 500972c48b..98f3f2412f 100644 --- a/src/sql/code_generator/ob_dml_cg_service.cpp +++ b/src/sql/code_generator/ob_dml_cg_service.cpp @@ -1137,7 +1137,7 @@ int ObDmlCgService::convert_dml_column_info(ObTableID index_tid, for (; OB_SUCC(ret) && iter != index_schema->column_end(); ++iter) { const ObColumnSchemaV2 *column = *iter; ObObjMeta column_type; - if (!column->is_rowkey_column() && !column->is_virtual_generated_column()) { + if (!column->is_rowkey_column() && (!column->is_virtual_generated_column())) { //skip virtual generated column or rowkey column_type = column->get_meta_type(); column_type.set_scale(column->get_accuracy().get_scale()); @@ -1798,7 +1798,7 @@ int ObDmlCgService::generate_das_projector(const ObIArray &dml_column_ IntFixedArray &new_row_projector = das_ctdef.new_row_projector_; bool is_spatial_index = das_ctdef.table_param_.get_data_table().is_spatial_index() && das_ctdef.op_type_ == DAS_OP_TABLE_UPDATE; - uint8_t extra_geo = is_spatial_index ? 1 : 0; + uint8_t extra_geo = (is_spatial_index) ? 1 : 0; //generate old row projector if (!old_row.empty()) { //generate storage row projector @@ -1833,10 +1833,11 @@ int ObDmlCgService::generate_das_projector(const ObIArray &dml_column_ } } } + if (OB_SUCC(ret) && is_spatial_index && OB_FAIL(add_geo_col_projector(old_row, full_row, dml_column_ids, storage_column_ids.count(), das_ctdef, old_row_projector))) { - LOG_WARN("add geo column projector failed", K(ret)); + LOG_WARN("add geo column projector failed", K(ret)); } } //generate new row projector @@ -2274,6 +2275,41 @@ int ObDmlCgService::convert_table_dml_param(ObLogicalOperator &op, ObDASDMLBaseC return ret; } +int ObDmlCgService::fill_multivalue_extra_info_on_table_param( + share::schema::ObSchemaGetterGuard *guard, + const ObTableSchema *index_schema, + uint64_t tenant_id, + ObDASDMLBaseCtDef &das_dml_ctdef) +{ + int ret = OB_SUCCESS; + int64_t t_version = OB_INVALID_VERSION; + const ObTableSchema *table_schema = NULL; + + if (OB_ISNULL(guard) || OB_ISNULL(index_schema)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(guard), K(index_schema->get_data_table_id())); + } else if (OB_FAIL(guard->get_table_schema(tenant_id, index_schema->get_data_table_id(), table_schema))) { + LOG_WARN("fail to get schema", K(ret), K(index_schema->get_data_table_id())); + } else if (OB_ISNULL(table_schema)) { + ret = OB_SCHEMA_ERROR; + LOG_WARN("table schema is NULL", K(ret)); + } else { + ObTableSchemaParam& table_param = das_dml_ctdef.table_param_.get_data_table_ref(); + table_param.set_data_table_rowkey_column_num(table_schema->get_rowkey_column_num()); + uint64_t max_idx = table_param.get_column_count(); + for (int64_t i = max_idx - 2; i >= 0; --i) { + if (OB_ISNULL(table_param.get_column_by_idx(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("table column is NULL", K(ret)); + } else { + table_param.get_column_by_idx(i)->set_nullable_for_write(true); + } + } + } + + return ret; +} + int ObDmlCgService::fill_table_dml_param(share::schema::ObSchemaGetterGuard *guard, uint64_t table_id, ObDASDMLBaseCtDef &das_dml_ctdef) @@ -2296,6 +2332,9 @@ int ObDmlCgService::fill_table_dml_param(share::schema::ObSchemaGetterGuard *gua t_version, das_dml_ctdef.column_ids_))) { LOG_WARN("fail to convert table param", K(ret), K(das_dml_ctdef)); + } else if (table_schema->is_multivalue_index_aux() && + OB_FAIL(fill_multivalue_extra_info_on_table_param(guard, table_schema, tenant_id, das_dml_ctdef))) { + LOG_WARN("fail to set multivalue index extra info on table param", K(ret), K(das_dml_ctdef)); } return ret; } diff --git a/src/sql/code_generator/ob_dml_cg_service.h b/src/sql/code_generator/ob_dml_cg_service.h index 7187fb1903..b3c7bfe664 100644 --- a/src/sql/code_generator/ob_dml_cg_service.h +++ b/src/sql/code_generator/ob_dml_cg_service.h @@ -226,6 +226,12 @@ private: uint32_t proj_idx, ObDASDMLBaseCtDef &das_ctdef, IntFixedArray &row_projector); + int fill_multivalue_extra_info_on_table_param( + share::schema::ObSchemaGetterGuard *guard, + const ObTableSchema *index_schema, + uint64_t tenant_id, + ObDASDMLBaseCtDef &das_dml_ctdef); + int get_column_ref_base_cid(const ObLogicalOperator &op, const ObColumnRefRawExpr *col, uint64_t &base_cid); int get_table_schema_version(const ObLogicalOperator &op, uint64_t table_id, int64_t &schema_version); int generate_das_dml_ctdef(ObLogDelUpd &op, diff --git a/src/sql/code_generator/ob_expr_generator_impl.cpp b/src/sql/code_generator/ob_expr_generator_impl.cpp index 51d724b9ec..0ee99c83c4 100644 --- a/src/sql/code_generator/ob_expr_generator_impl.cpp +++ b/src/sql/code_generator/ob_expr_generator_impl.cpp @@ -2017,6 +2017,31 @@ int ObExprGeneratorImpl::visit(ObSetOpRawExpr &expr) return ret; } +int ObExprGeneratorImpl::visit(ObMatchFunRawExpr &expr) +{ + int ret = OB_SUCCESS; + // 不为 match expr 生成 expr operator + ObPostExprItem item; + item.set_accuracy(expr.get_accuracy()); + if (OB_ISNULL(sql_expr_)) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("sql_expr_ is NULL"); + } else if (expr.has_flag(IS_COLUMNLIZED)) { + int64_t idx = OB_INVALID_INDEX; + if (OB_FAIL(column_idx_provider_.get_idx(&expr, idx))) { + LOG_WARN("get index failed", K(ret)); + } else if (OB_FAIL(item.set_column(idx))) { + LOG_WARN("failed to set column", K(ret), K(expr)); + } else if (OB_FAIL(sql_expr_->add_expr_item(item, &expr))) { + LOG_WARN("failed to add expr item", K(ret)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("all match expr should have been generated", K(expr), K(&expr)); + } + return ret; +} + bool ObExprGeneratorImpl::skip_child(ObRawExpr &expr) { return expr.has_flag(IS_COLUMNLIZED) || expr.is_query_ref_expr(); diff --git a/src/sql/code_generator/ob_expr_generator_impl.h b/src/sql/code_generator/ob_expr_generator_impl.h index 10196bfe05..f0617bc859 100644 --- a/src/sql/code_generator/ob_expr_generator_impl.h +++ b/src/sql/code_generator/ob_expr_generator_impl.h @@ -97,6 +97,7 @@ private: virtual int visit(ObWinFunRawExpr &expr); virtual int visit(ObPseudoColumnRawExpr &expr); virtual int visit(ObPlQueryRefRawExpr &expr); + virtual int visit(ObMatchFunRawExpr &expr); virtual bool skip_child(ObRawExpr &expr); private: // types and constants diff --git a/src/sql/code_generator/ob_static_engine_cg.cpp b/src/sql/code_generator/ob_static_engine_cg.cpp index cd03bd22ae..0d72475777 100644 --- a/src/sql/code_generator/ob_static_engine_cg.cpp +++ b/src/sql/code_generator/ob_static_engine_cg.cpp @@ -399,10 +399,10 @@ int ObStaticEngineCG::check_expr_columnlized(const ObRawExpr *expr) || (expr->is_column_ref_expr() && static_cast(expr)->is_virtual_generated_column()) || (expr->is_column_ref_expr() && is_shadow_column(static_cast(expr)->get_column_id()))) { // skip - } else if ((expr->is_aggr_expr() || (expr->is_win_func_expr())) + } else if ((expr->is_aggr_expr() || (expr->is_win_func_expr()) || expr->is_match_against_expr()) && !expr->has_flag(IS_COLUMNLIZED)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("aggr_expr or win_func_expr should be columnlized", K(ret), KPC(expr)); + LOG_WARN("aggr, win_func, match_against exprs should be columnlized", K(ret), KPC(expr)); } else if (!expr->has_flag(IS_COLUMNLIZED)) { if (0 == expr->get_param_count()) { ret = OB_ERR_UNEXPECTED; @@ -1016,6 +1016,8 @@ int ObStaticEngineCG::generate_spec_final(ObLogicalOperator &op, ObOpSpec &spec) } else if (lookup_ctdef != nullptr && OB_FAIL(lookup_ctdef->pd_expr_spec_.set_calc_exprs(spec.calc_exprs_, tsc_spec.max_batch_size_))) { LOG_WARN("assign all pushdown exprs failed", K(ret)); + } else if (OB_FAIL(tsc_spec.tsc_ctdef_.attach_spec_.set_calc_exprs(spec.calc_exprs_, tsc_spec.max_batch_size_))) { + LOG_WARN("set max batch size to attach spec failed", K(ret)); } } @@ -9003,5 +9005,6 @@ int ObStaticEngineCG::check_window_functions_order(const ObIArrayget_optimizer_context().get_phy_plan_type() == OB_PHY_PLAN_LOCAL || @@ -52,10 +53,13 @@ int ObTscCgService::generate_tsc_ctdef(ObLogTableScan &op, ObTableScanCtDef &tsc ret = OB_ERR_UNEXPECTED; LOG_WARN("schema guard is null", K(ret)); } else if (OB_FAIL(schema_guard->get_table_schema(op.get_table_id(), - op.get_ref_table_id(), - op.get_stmt(), - table_schema))) { + op.get_ref_table_id(), + op.get_stmt(), + table_schema))) { LOG_WARN("get table schema failed", K(ret), K(op.get_ref_table_id())); + } else if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null table scahem ptr", K(ret)); } else if (OB_FAIL(ObSQLUtils::check_location_access_priv(table_schema->get_external_file_location(), cg_.opt_ctx_->get_session_info()))) { LOG_WARN("fail to check location access priv", K(ret)); @@ -91,10 +95,14 @@ int ObTscCgService::generate_tsc_ctdef(ObLogTableScan &op, ObTableScanCtDef &tsc if (OB_SUCC(ret)) { bool has_rowscn = false; scan_ctdef.ref_table_id_ = op.get_real_index_table_id(); + if (op.is_text_retrieval_scan()) { + scan_ctdef.ir_scan_type_ = ObTSCIRScanType::OB_IR_INV_IDX_SCAN; + } if (OB_FAIL(generate_das_scan_ctdef(op, scan_ctdef, has_rowscn))) { LOG_WARN("generate das scan ctdef failed", K(ret), K(scan_ctdef.ref_table_id_)); } else { tsc_ctdef.flashback_item_.need_scn_ |= has_rowscn; + root_ctdef = &scan_ctdef; } } @@ -117,6 +125,9 @@ int ObTscCgService::generate_tsc_ctdef(ObLogTableScan &op, ObTableScanCtDef &tsc op.get_stmt(), table_schema))) { LOG_WARN("get table schema failed", K(ret), K(op.get_ref_table_id())); + } else if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr to table schema", K(ret)); } else if (OB_FAIL(generate_table_loc_meta(op.get_table_id(), *op.get_stmt(), *table_schema, @@ -126,51 +137,6 @@ int ObTscCgService::generate_tsc_ctdef(ObLogTableScan &op, ObTableScanCtDef &tsc } } } - if (OB_SUCC(ret) && op.get_index_back()) { - void *lookup_buf = cg_.phy_plan_->get_allocator().alloc(sizeof(ObDASScanCtDef)); - void *loc_meta_buf = cg_.phy_plan_->get_allocator().alloc(sizeof(ObDASTableLocMeta)); - if (OB_ISNULL(lookup_buf) || OB_ISNULL(loc_meta_buf)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("allocate lookup ctdef buffer failed", K(ret), K(lookup_buf), K(loc_meta_buf)); - } else { - bool has_rowscn = false; - const ObTableSchema *table_schema = nullptr; - ObSqlSchemaGuard *schema_guard = cg_.opt_ctx_->get_sql_schema_guard(); - tsc_ctdef.lookup_ctdef_ = new(lookup_buf) ObDASScanCtDef(cg_.phy_plan_->get_allocator()); - tsc_ctdef.lookup_ctdef_->ref_table_id_ = op.get_real_ref_table_id(); - tsc_ctdef.lookup_loc_meta_ = new(loc_meta_buf) ObDASTableLocMeta(cg_.phy_plan_->get_allocator()); - - if (OB_FAIL(generate_das_scan_ctdef(op, *tsc_ctdef.lookup_ctdef_, has_rowscn))) { - LOG_WARN("generate das lookup scan ctdef failed", K(ret)); - } else if (OB_FAIL(schema_guard->get_table_schema(op.get_table_id(), - op.get_ref_table_id(), - op.get_stmt(), - table_schema))) { - LOG_WARN("get table schema failed", K(ret), K(op.get_ref_table_id())); - } else if (OB_FAIL(generate_table_loc_meta(op.get_table_id(), - *op.get_stmt(), - *table_schema, - *cg_.opt_ctx_->get_session_info(), - *tsc_ctdef.lookup_loc_meta_))) { - LOG_WARN("generate table loc meta failed", K(ret)); - } else { - tsc_ctdef.flashback_item_.need_scn_ |= has_rowscn; - } - - if (OB_SUCC(ret) && op.get_index_back() && op.get_is_index_global()) { - if (OB_ISNULL(op.get_calc_part_id_expr()) || op.get_rowkey_exprs().empty()) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("calc_part_id_expr is null or rowkeys` count is zero", K(ret)); - } else if (OB_FAIL(cg_.generate_calc_part_id_expr(*op.get_calc_part_id_expr(), - tsc_ctdef.lookup_loc_meta_, - tsc_ctdef.calc_part_id_expr_))) { - LOG_WARN("fail to generate calc part id expr", K(ret), KP(op.get_calc_part_id_expr())); - } else if (OB_FAIL(cg_.generate_rt_exprs(op.get_rowkey_exprs(), tsc_ctdef.global_index_rowkey_exprs_))) { - LOG_WARN("fail to generate rowkey exprs", K(ret)); - } - } - } - } if (OB_SUCC(ret)) { ObArray bnlj_params; @@ -189,6 +155,40 @@ int ObTscCgService::generate_tsc_ctdef(ObLogTableScan &op, ObTableScanCtDef &tsc } } + bool need_attach = false; + if (OB_SUCC(ret) && op.is_text_retrieval_scan()) { + if (OB_FAIL(generate_text_ir_ctdef(op, tsc_ctdef, root_ctdef))) { + LOG_WARN("failed to generate text ir ctdef", K(ret)); + } else { + need_attach = true; + } + } + + if (OB_SUCC(ret) && op.is_multivalue_index_scan()) { + ObDASIRAuxLookupCtDef *aux_lookup_ctdef = nullptr; + if (OB_FAIL(generate_doc_id_lookup_ctdef(op, tsc_ctdef, root_ctdef, aux_lookup_ctdef))) { + LOG_WARN("failed to generate text ir ctdef", K(ret)); + } else { + root_ctdef = aux_lookup_ctdef; + need_attach = true; + } + } + + if (OB_SUCC(ret) && op.get_index_back()) { + ObDASTableLookupCtDef *lookup_ctdef = nullptr; + if (OB_FAIL(generate_table_lookup_ctdef(op, tsc_ctdef, root_ctdef, lookup_ctdef))) { + LOG_WARN("generate table lookup ctdef failed", K(ret)); + } else { + root_ctdef = lookup_ctdef; + } + } + + if (OB_SUCC(ret) && need_attach) { + tsc_ctdef.lookup_ctdef_ = nullptr; + tsc_ctdef.lookup_loc_meta_ = nullptr; + tsc_ctdef.attach_spec_.attach_ctdef_ = root_ctdef; + } + LOG_DEBUG("generate tsc ctdef finish", K(ret), K(op), K(tsc_ctdef), K(tsc_ctdef.scan_ctdef_.pd_expr_spec_.ext_file_column_exprs_)); return ret; @@ -204,8 +204,8 @@ int ObTscCgService::generate_table_param(const ObLogTableScan &op, ObDASScanCtDe ObArray tsc_out_cols; ObSqlSchemaGuard *schema_guard = cg_.opt_ctx_->get_sql_schema_guard(); CK(OB_NOT_NULL(schema_guard)); - if (OB_UNLIKELY((pd_agg && 0 == scan_ctdef.aggregate_column_ids_.count()) || - pd_group_by && 0 == scan_ctdef.group_by_column_ids_.count())) { + if (OB_UNLIKELY(pd_agg && 0 == scan_ctdef.aggregate_column_ids_.count()) || + OB_UNLIKELY(pd_group_by && 0 == scan_ctdef.group_by_column_ids_.count())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("Invalid argument", K(ret), K(pd_agg), K(scan_ctdef.aggregate_column_ids_.count()), K(pd_group_by), K(scan_ctdef.group_by_column_ids_.count())); @@ -218,8 +218,13 @@ int ObTscCgService::generate_table_param(const ObLogTableScan &op, ObDASScanCtDe ret = OB_ERR_UNEXPECTED; LOG_WARN("NULL ptr", K(ret), K(table_schema)); } else if (table_schema->is_spatial_index() && FALSE_IT(scan_ctdef.table_param_.set_is_spatial_index(true))) { - } else if (OB_FAIL(extract_das_output_column_ids(op, index_id, *table_schema, tsc_out_cols))) { + } else if (table_schema->is_fts_index() && FALSE_IT(scan_ctdef.table_param_.set_is_fts_index(true))) { + } else if (table_schema->is_multivalue_index_aux() && FALSE_IT(scan_ctdef.table_param_.set_is_multivalue_index(true))) { + } else if (OB_FAIL(extract_das_output_column_ids(op, scan_ctdef, *table_schema, tsc_out_cols))) { LOG_WARN("extract tsc output column ids failed", K(ret)); + } + + if (OB_FAIL(ret)) { } else if (FALSE_IT(scan_ctdef.table_param_.get_enable_lob_locator_v2() = (cg_.get_cur_cluster_version() >= CLUSTER_VERSION_4_1_0_0))) { } else if (OB_FAIL(scan_ctdef.table_param_.convert(*table_schema, @@ -356,11 +361,17 @@ int ObTscCgService::generate_agent_vt_access_meta(const ObLogTableScan &op, ObTa op.get_stmt(), table_schema))) { LOG_WARN("get table schema failed", K(ret), K(op.get_real_ref_table_id())); + } else if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr to table schema", K(ret)); } else if (OB_FAIL(cg_.opt_ctx_->get_sql_schema_guard()->get_table_schema(op.get_table_id(), agent_vt_meta.vt_table_id_, op.get_stmt(), vt_table_schema))) { LOG_WARN("get table schema failed", K(agent_vt_meta.vt_table_id_), K(ret)); + } else if (OB_ISNULL(vt_table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr to virtual table schema", K(ret)); } else { // set vt has tenant_id column for (int64_t nth_col = 0; OB_SUCC(ret) && nth_col < range_columns.count(); ++nth_col) { @@ -416,10 +427,10 @@ int ObTscCgService::generate_tsc_filter(const ObLogTableScan &op, ObTableScanSpe ObArray scan_pushdown_filters; ObArray lookup_pushdown_filters; ObDASScanCtDef &scan_ctdef = spec.tsc_ctdef_.scan_ctdef_; - ObDASScanCtDef *lookup_ctdef = spec.tsc_ctdef_.lookup_ctdef_; - if (OB_FAIL(const_cast(op).extract_pushdown_filters(nonpushdown_filters, - scan_pushdown_filters, - lookup_pushdown_filters))) { + ObDASScanCtDef *lookup_ctdef = spec.tsc_ctdef_.get_lookup_ctdef(); + if (OB_FAIL(op.extract_pushdown_filters(nonpushdown_filters, + scan_pushdown_filters, + lookup_pushdown_filters))) { LOG_WARN("extract pushdown filters failed", K(ret)); } else if (op.get_contains_fake_cte()) { // do nothing @@ -439,7 +450,6 @@ int ObTscCgService::generate_tsc_filter(const ObLogTableScan &op, ObTableScanSpe lookup_ctdef->pd_expr_spec_))) { LOG_WARN("generate pd storage flag for lookup ctdef failed", K(ret)); } - if (OB_SUCC(ret) && !scan_pushdown_filters.empty()) { bool pd_filter = scan_ctdef.pd_expr_spec_.pd_storage_flag_.is_filter_pushdown(); if (OB_FAIL(cg_.generate_rt_exprs(scan_pushdown_filters, scan_ctdef.pd_expr_spec_.pushdown_filters_))) { @@ -542,11 +552,18 @@ int ObTscCgService::generate_pd_storage_flag(const ObLogPlan *log_plan, //2. all columns required by TSC operator filters //3. all columns required by pushdown aggr expr int ObTscCgService::extract_das_access_exprs(const ObLogTableScan &op, - ObTableID scan_table_id, + ObDASScanCtDef &scan_ctdef, ObIArray &access_exprs) { int ret = OB_SUCCESS; - if (op.get_index_back() && scan_table_id == op.get_real_index_table_id()) { + const ObTableID &scan_table_id = scan_ctdef.ref_table_id_; + if ((op.is_text_retrieval_scan() && scan_table_id != op.get_ref_table_id()) + || (op.is_multivalue_index_scan() && scan_table_id == op.get_doc_id_index_table_id())) { + // non main table scan in text retrieval + if (OB_FAIL(extract_text_ir_access_columns(op, scan_ctdef, access_exprs))) { + LOG_WARN("failed to extract text ir access columns", K(ret)); + } + } else if (op.get_index_back() && scan_table_id == op.get_real_index_table_id()) { //this das scan is index scan and will lookup the data table later //index scan + lookup data table: the index scan only need access //range condition columns + index filter columns + the data table rowkeys @@ -648,6 +665,9 @@ int ObTscCgService::extract_tsc_access_columns(const ObLogTableScan &op, LOG_WARN("extract pushdown filters failed", K(ret)); } else if (OB_FAIL(append_array_no_dup(tsc_exprs, op.get_output_exprs()))) { LOG_WARN("append output exprs failed", K(ret)); + } else if (op.is_text_retrieval_scan() && OB_FAIL(filter_out_match_exprs(tsc_exprs))) { + // the matching columns of match expr are only used as semantic identifiers and are not actually accessed + LOG_WARN("failed to filter out fts exprs", K(ret)); } else if (OB_FAIL(ObRawExprUtils::extract_column_exprs(tsc_exprs, access_exprs, true))) { LOG_WARN("extract column exprs failed", K(ret)); } @@ -697,12 +717,19 @@ int ObTscCgService::generate_access_ctdef(const ObLogTableScan &op, ObArray access_exprs; if (OB_FAIL(cg_.opt_ctx_->get_schema_guard()->get_table_schema(MTL_ID(), table_id, table_schema))) { LOG_WARN("get table schema failed", K(ret), K(table_id)); - } else if (OB_FAIL(extract_das_access_exprs(op, scan_ctdef.ref_table_id_, access_exprs))) { + } else if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr to table schema", K(ret)); + } else if (OB_FAIL(extract_das_access_exprs(op, scan_ctdef, access_exprs))) { LOG_WARN("extract das access exprs failed", K(ret)); } else if (table_schema->is_spatial_index() && OB_FAIL(generate_geo_access_ctdef(op, *table_schema, access_exprs))) { LOG_WARN("extract das geo access exprs failed", K(ret)); + } else if (table_schema->is_multivalue_index_aux() + && OB_FAIL(extract_doc_id_index_back_access_columns(op, access_exprs))) { + LOG_WARN("append das multivlaue doc id access exprs failed", K(ret)); } + ARRAY_FOREACH(access_exprs, i) { ObRawExpr *expr = access_exprs.at(i); if (OB_UNLIKELY(OB_ISNULL(expr))) { @@ -731,7 +758,7 @@ int ObTscCgService::generate_access_ctdef(const ObLogTableScan &op, ObColumnRefRawExpr* col_expr = static_cast(expr); bool is_mapping_vt_table = op.get_real_ref_table_id() != op.get_ref_table_id(); ObTableID real_table_id = is_mapping_vt_table ? op.get_real_ref_table_id() : op.get_table_id(); - if (!col_expr->has_flag(IS_COLUMN) || col_expr->get_table_id() != real_table_id) { + if (!col_expr->has_flag(IS_COLUMN) || (col_expr->get_table_id() != real_table_id && !col_expr->is_doc_id_column())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("Expected basic column", K(ret), K(*col_expr), K(col_expr->has_flag(IS_COLUMN)), @@ -763,7 +790,12 @@ int ObTscCgService::generate_pushdown_aggr_ctdef(const ObLogTableScan &op, const uint64_t aggregate_output_count = pushdown_aggr_exprs.count(); const ObIArray &group_by_columns = op.get_pushdown_groupby_columns(); const uint64_t group_by_column_count = group_by_columns.count(); - if (op.get_index_back() && aggregate_output_count > 0) { + if (op.is_text_retrieval_scan()) { + // text retrieval scan on fulltext index + if (OB_FAIL(generate_text_ir_pushdown_expr_ctdef(op, scan_ctdef))) { + LOG_WARN("failed to generate text ir pushdown aggregate ctdef", K(ret), K(op)); + } + } else if (op.get_index_back() && aggregate_output_count > 0) { ret = OB_NOT_SUPPORTED; LOG_WARN("pushdown aggr to table scan not supported in index lookup", K(op.get_table_id()), K(op.get_ref_table_id()), K(op.get_index_table_id())); @@ -888,19 +920,29 @@ int ObTscCgService::generate_das_scan_ctdef(const ObLogTableScan &op, } int ObTscCgService::extract_das_output_column_ids(const ObLogTableScan &op, - ObTableID table_id, + ObDASScanCtDef &scan_ctdef, const ObTableSchema &index_schema, ObIArray &output_cids) { int ret = OB_SUCCESS; ObArray das_output_cols; + const ObTableID &table_id = scan_ctdef.ref_table_id_; - if (op.get_index_back() && op.get_real_index_table_id() == table_id) { + if ((op.is_text_retrieval_scan() && table_id != op.get_ref_table_id()) || + (op.is_multivalue_index_scan() && table_id == op.get_doc_id_index_table_id())) { + // non main table scan in text retrieval + if (OB_FAIL(extract_text_ir_das_output_column_ids(op, scan_ctdef, output_cids))) { + LOG_WARN("failed to extract text retrieval das output column ids", K(ret)); + } + } else if (op.get_index_back() && op.get_real_index_table_id() == table_id) { //this situation is index lookup, and the index table scan is being processed //the output column id of index lookup is the rowkey of the data table const ObTableSchema *table_schema = nullptr; if (OB_FAIL(cg_.opt_ctx_->get_schema_guard()->get_table_schema(MTL_ID(), op.get_real_ref_table_id(), table_schema))) { LOG_WARN("get table schema failed", K(ret), K(op.get_ref_table_id())); + } else if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr to table schema", K(ret)); } else if (OB_FAIL(table_schema->get_rowkey_column_ids(output_cids))) { LOG_WARN("get rowkey column ids failed", K(ret)); } else if (nullptr != op.get_group_id_expr() && op.use_batch()) { @@ -930,6 +972,20 @@ int ObTscCgService::extract_das_output_column_ids(const ObLogTableScan &op, LOG_WARN("store cell colum id failed", K(ret), K(mbr_col_id)); } } + + if (OB_SUCC(ret) && index_schema.is_multivalue_index()) { + uint64_t doc_id_col_id = OB_INVALID_ID; + uint64_t ft_col_id = OB_INVALID_ID; + const ObColumnSchemaV2 *doc_id_col_schema = nullptr; + if (OB_FAIL(index_schema.get_fulltext_column_ids(doc_id_col_id, ft_col_id))) { + LOG_WARN("fail to get fulltext column ids", K(ret)); + } else if (OB_INVALID_ID == doc_id_col_id) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get doc id column.", K(ret)); + } else if (OB_FAIL(output_cids.push_back(doc_id_col_id))) { + LOG_WARN("store colum id failed", K(ret)); + } + } //column expr in non-pushdown filter need to be output, //because filter_row will use it in TSC operator } else if (OB_FAIL(extract_tsc_access_columns(op, das_output_cols))) { @@ -1048,7 +1104,722 @@ int ObTscCgService::generate_table_loc_meta(uint64_t table_loc_id, return ret; } +int ObTscCgService::generate_text_ir_ctdef(const ObLogTableScan &op, + ObTableScanCtDef &tsc_ctdef, + ObDASBaseCtDef *&root_ctdef) +{ + int ret = OB_SUCCESS; + ObMatchFunRawExpr *match_against = op.get_text_retrieval_info().match_expr_; + ObIAllocator &ctdef_alloc = cg_.phy_plan_->get_allocator(); + ObSqlSchemaGuard *schema_guard = cg_.opt_ctx_->get_sql_schema_guard(); + ObDASIRScanCtDef *ir_scan_ctdef = nullptr; + ObDASSortCtDef *sort_ctdef = nullptr; + const bool use_approx_pre_agg = true; // TODO: support differentiate use approx agg or not + if (OB_ISNULL(match_against) || OB_ISNULL(schema_guard)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null pointer", K(ret), KP(match_against), KP(schema_guard)); + } else if (OB_UNLIKELY(OB_INVALID_ID == op.get_text_retrieval_info().inv_idx_tid_ + || (op.need_text_retrieval_calc_relevance() && OB_INVALID_ID == op.get_text_retrieval_info().fwd_idx_tid_))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid fulltext index table id", K(ret), KPC(match_against)); + } else if (OB_UNLIKELY(ObTSCIRScanType::OB_IR_INV_IDX_SCAN != tsc_ctdef.scan_ctdef_.ir_scan_type_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected ir scan type for inverted index scan", K(ret), K(tsc_ctdef.scan_ctdef_)); + } else if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_IR_SCAN, ctdef_alloc, ir_scan_ctdef))) { + LOG_WARN("allocate ir scan ctdef failed", K(ret)); + } else if (op.need_text_retrieval_calc_relevance()) { + ObDASScanCtDef *inv_idx_scan_ctdef = &tsc_ctdef.scan_ctdef_; + ObDASScanCtDef *inv_idx_agg_ctdef = nullptr; + ObDASScanCtDef *doc_id_idx_agg_ctdef = nullptr; + ObDASScanCtDef *fwd_idx_agg_ctdef = nullptr; + bool has_rowscn = false; + if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_TABLE_SCAN, ctdef_alloc, inv_idx_agg_ctdef))) { + LOG_WARN("allocate inv idx agg ctdef failed", K(ret)); + } else { + inv_idx_agg_ctdef->ref_table_id_ = op.get_text_retrieval_info().inv_idx_tid_; + inv_idx_agg_ctdef->pd_expr_spec_.pd_storage_flag_.set_aggregate_pushdown(true); + inv_idx_agg_ctdef->ir_scan_type_ = ObTSCIRScanType::OB_IR_INV_IDX_AGG; + if (OB_FAIL(generate_das_scan_ctdef(op, *inv_idx_agg_ctdef, has_rowscn))) { + LOG_WARN("failed to generate das scan ctdef", K(ret)); + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_TABLE_SCAN, ctdef_alloc, doc_id_idx_agg_ctdef))) { + LOG_WARN("allocate doc id idx agg ctdef failed", K(ret)); + } else { + doc_id_idx_agg_ctdef->ref_table_id_ = op.get_text_retrieval_info().doc_id_idx_tid_; + doc_id_idx_agg_ctdef->pd_expr_spec_.pd_storage_flag_.set_aggregate_pushdown(true); + doc_id_idx_agg_ctdef->ir_scan_type_ = ObTSCIRScanType::OB_IR_DOC_ID_IDX_AGG; + if (OB_FAIL(generate_das_scan_ctdef(op, *doc_id_idx_agg_ctdef, has_rowscn))) { + LOG_WARN("failed to generate das scan ctdef", K(ret)); + } + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_TABLE_SCAN, ctdef_alloc, fwd_idx_agg_ctdef))) { + LOG_WARN("allocate fwd idx agg ctdef failed", K(ret)); + } else { + fwd_idx_agg_ctdef->ref_table_id_ = op.get_text_retrieval_info().fwd_idx_tid_; + fwd_idx_agg_ctdef->pd_expr_spec_.pd_storage_flag_.set_aggregate_pushdown(true); + fwd_idx_agg_ctdef->ir_scan_type_ = ObTSCIRScanType::OB_IR_FWD_IDX_AGG; + if (OB_FAIL(generate_das_scan_ctdef(op, *fwd_idx_agg_ctdef, has_rowscn))) { + LOG_WARN("generate das scan ctdef failed", K(ret)); + } + } + } + + if (OB_SUCC(ret)) { + int64_t ir_scan_children_cnt = use_approx_pre_agg ? 3 : 4; + if (OB_ISNULL(ir_scan_ctdef->children_ = OB_NEW_ARRAY(ObDASBaseCtDef*, &ctdef_alloc, ir_scan_children_cnt))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate ir scan ctdef children failed", K(ret)); + } else { + ir_scan_ctdef->children_cnt_ = ir_scan_children_cnt; + if (use_approx_pre_agg) { + // TODO: reduce more scan with approx + ir_scan_ctdef->children_[0] = inv_idx_scan_ctdef; + ir_scan_ctdef->children_[1] = inv_idx_agg_ctdef; + ir_scan_ctdef->children_[2] = doc_id_idx_agg_ctdef; + ir_scan_ctdef->has_inv_agg_ = true; + ir_scan_ctdef->has_doc_id_agg_ = true; + } else { + ir_scan_ctdef->children_[0] = inv_idx_scan_ctdef; + ir_scan_ctdef->children_[1] = inv_idx_agg_ctdef; + ir_scan_ctdef->children_[2] = doc_id_idx_agg_ctdef; + ir_scan_ctdef->children_[3] = fwd_idx_agg_ctdef; + ir_scan_ctdef->has_inv_agg_ = true; + ir_scan_ctdef->has_doc_id_agg_ = true; + ir_scan_ctdef->has_fwd_agg_ = true; + } + } + } + } else { + if (OB_ISNULL(ir_scan_ctdef->children_ = OB_NEW_ARRAY(ObDASBaseCtDef*, &ctdef_alloc, 1))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate ir scan ctdef children failed", K(ret)); + } else { + ir_scan_ctdef->children_cnt_ = 1; + ir_scan_ctdef->children_[0] = &tsc_ctdef.scan_ctdef_; + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(generate_text_ir_spec_exprs(op, *ir_scan_ctdef))) { + LOG_WARN("failed to generate text ir spec exprs", K(ret), KPC(match_against)); + } else { + const ObCostTableScanInfo *est_cost_info = op.get_est_cost_info(); + int partition_row_cnt = 0; + if (!use_approx_pre_agg + || nullptr == est_cost_info + || nullptr == est_cost_info->table_meta_info_ + || 0 == est_cost_info->table_meta_info_->part_count_) { + // No estimated info or approx agg not allowed, do total document count on execution; + } else { + partition_row_cnt = est_cost_info->table_meta_info_->table_row_count_ / est_cost_info->table_meta_info_->part_count_; + } + ir_scan_ctdef->estimated_total_doc_cnt_ = partition_row_cnt; + } + } + + if (OB_SUCC(ret) && op.get_text_retrieval_info().need_sort()) { + ObSEArray order_items; + if (OB_FAIL(order_items.push_back(op.get_text_retrieval_info().sort_key_))) { + LOG_WARN("append order item array failed", K(ret)); + } else if (OB_FAIL(generate_das_sort_ctdef( + order_items, + op.get_text_retrieval_info().with_ties_, + op.get_text_retrieval_info().topk_limit_expr_, + op.get_text_retrieval_info().topk_offset_expr_, + ir_scan_ctdef, + sort_ctdef))) { + LOG_WARN("generate sort ctdef failed", K(ret)); + } + } + + if (OB_SUCC(ret)) { + ObDASIRAuxLookupCtDef *aux_lookup_ctdef = nullptr; + ObDASBaseCtDef *ir_output_ctdef = nullptr == sort_ctdef ? + static_cast(ir_scan_ctdef) : static_cast(sort_ctdef); + if (OB_FAIL(generate_doc_id_lookup_ctdef(op, tsc_ctdef, ir_output_ctdef, aux_lookup_ctdef))) { + LOG_WARN("generate doc id lookup ctdef failed", K(ret)); + } else if (OB_FAIL(append_fts_relavence_project_col(aux_lookup_ctdef, ir_scan_ctdef))) { + LOG_WARN("failed to append fts relavence project col", K(ret)); + } else { + root_ctdef = aux_lookup_ctdef; + } + } + return ret; +} + +int ObTscCgService::append_fts_relavence_project_col( + ObDASIRAuxLookupCtDef *aux_lookup_ctdef, + ObDASIRScanCtDef *ir_scan_ctdef) +{ + int ret = OB_SUCCESS; + + if (OB_NOT_NULL(ir_scan_ctdef)) { + if (ir_scan_ctdef->relevance_proj_col_ != nullptr) { + ObArray result_outputs; + if (OB_FAIL(result_outputs.push_back(ir_scan_ctdef->relevance_proj_col_))) { + LOG_WARN("store relevance projector column expr failed", K(ret)); + } else if (OB_FAIL(append(result_outputs, aux_lookup_ctdef->result_output_))) { + LOG_WARN("append tmp array failed", K(ret)); + } else { + aux_lookup_ctdef->result_output_.destroy(); + if (OB_FAIL(aux_lookup_ctdef->result_output_.init(result_outputs.count()))) { + LOG_WARN("reserve slot failed", K(ret)); + } else if (OB_FAIL(aux_lookup_ctdef->result_output_.assign(result_outputs))) { + LOG_WARN("store relevance projector column expr failed", K(ret)); + } else { + aux_lookup_ctdef->relevance_proj_col_ = ir_scan_ctdef->relevance_proj_col_; + } + } + } + } + return ret; +} + +int ObTscCgService::extract_text_ir_access_columns( + const ObLogTableScan &op, + const ObDASScanCtDef &scan_ctdef, + ObIArray &access_exprs) +{ + int ret = OB_SUCCESS; + const ObTextRetrievalInfo &tr_info = op.get_text_retrieval_info(); + if (scan_ctdef.ref_table_id_ == op.get_doc_id_index_table_id()) { + if (OB_FAIL(extract_doc_id_index_back_access_columns(op, access_exprs))) { + LOG_WARN("failed to extract doc id index back access columns", K(ret)); + } + } else { + switch (scan_ctdef.ir_scan_type_) { + case ObTSCIRScanType::OB_IR_INV_IDX_SCAN: + if (OB_FAIL(add_var_to_array_no_dup(access_exprs, static_cast(tr_info.token_cnt_column_)))) { + LOG_WARN("failed to push token cnt column to access exprs", K(ret)); + } else if (OB_FAIL(add_var_to_array_no_dup(access_exprs, static_cast(tr_info.doc_id_column_)))) { + LOG_WARN("failed to push document id column to access exprs", K(ret)); + } else if (OB_FAIL(add_var_to_array_no_dup(access_exprs, static_cast(tr_info.doc_length_column_)))) { + LOG_WARN("failed to add document length column to access exprs", K(ret)); + } + break; + case ObTSCIRScanType::OB_IR_DOC_ID_IDX_AGG: + if (OB_FAIL(add_var_to_array_no_dup(access_exprs, tr_info.total_doc_cnt_->get_param_expr((0))))) { + LOG_WARN("failed to push document id column to access exprs", K(ret)); + } + break; + case ObTSCIRScanType::OB_IR_INV_IDX_AGG: + if (OB_FAIL(add_var_to_array_no_dup(access_exprs, tr_info.related_doc_cnt_->get_param_expr(0)))) { + LOG_WARN("failed to push token cnt column to access exprs", K(ret)); + } + break; + case ObTSCIRScanType::OB_IR_FWD_IDX_AGG: + if (OB_FAIL(add_var_to_array_no_dup(access_exprs, tr_info.doc_token_cnt_->get_param_expr(0)))) { + LOG_WARN("failed to push token cnt column to access exprs", K(ret)); + } + break; + default: + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected text ir scan type", K(ret), K(scan_ctdef)); + } + } + return ret; +} + +int ObTscCgService::extract_text_ir_das_output_column_ids( + const ObLogTableScan &op, + const ObDASScanCtDef &scan_ctdef, + ObIArray &output_cids) +{ + int ret = OB_SUCCESS; + const ObTextRetrievalInfo &tr_info = op.get_text_retrieval_info(); + if (scan_ctdef.ref_table_id_ == op.get_doc_id_index_table_id()) { + if (OB_FAIL(extract_doc_id_index_back_output_column_ids(op, output_cids))) { + LOG_WARN("failed to get doc id index back cids", K(ret), K(scan_ctdef.ref_table_id_)); + } + } else if (ObTSCIRScanType::OB_IR_INV_IDX_SCAN == scan_ctdef.ir_scan_type_) { + if (OB_FAIL(output_cids.push_back( + static_cast(tr_info.token_cnt_column_)->get_column_id()))) { + LOG_WARN("failed to push output token cnt col id", K(ret)); + } else if (OB_FAIL(output_cids.push_back( + static_cast(tr_info.doc_id_column_)->get_column_id()))) { + LOG_WARN("failed to push output doc id col id", K(ret)); + } else if (OB_FAIL(output_cids.push_back( + static_cast(tr_info.doc_length_column_)->get_column_id()))) { + LOG_WARN("failed to push output doc length col id", K(ret)); + } + } + return ret; +} + +int ObTscCgService::generate_text_ir_pushdown_expr_ctdef( + const ObLogTableScan &op, + ObDASScanCtDef &scan_ctdef) +{ + int ret = OB_SUCCESS; + const uint64_t scan_table_id = scan_ctdef.ref_table_id_; + const ObTextRetrievalInfo &tr_info = op.get_text_retrieval_info(); + if (OB_FAIL(cg_.mark_expr_self_produced(tr_info.match_expr_))) { + LOG_WARN("failed to mark raw agg expr", K(ret), KPC(tr_info.match_expr_)); + } else if (!scan_ctdef.pd_expr_spec_.pd_storage_flag_.is_aggregate_pushdown()) { + // this das scan do not need aggregate pushdown + } else { + ObSEArray agg_expr_arr; + switch (scan_ctdef.ir_scan_type_) { + case ObTSCIRScanType::OB_IR_DOC_ID_IDX_AGG: + if (OB_FAIL(add_var_to_array_no_dup(agg_expr_arr, tr_info.total_doc_cnt_))) { + LOG_WARN("failed to push document id column to access exprs", K(ret)); + } + break; + case ObTSCIRScanType::OB_IR_INV_IDX_AGG: + if (OB_FAIL(add_var_to_array_no_dup(agg_expr_arr, tr_info.related_doc_cnt_))) { + LOG_WARN("failed to push document id column to access exprs", K(ret)); + } + break; + case ObTSCIRScanType::OB_IR_FWD_IDX_AGG: + if (OB_FAIL(add_var_to_array_no_dup(agg_expr_arr, tr_info.doc_token_cnt_))) { + LOG_WARN("failed to push document id column to access exprs", K(ret)); + } + break; + default: + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected text ir scan type with aggregate", K(ret)); + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(scan_ctdef.aggregate_column_ids_.init(agg_expr_arr.count()))) { + LOG_WARN("failed to init aggregate column ids", K(ret), K(agg_expr_arr.count())); + } else if (OB_FAIL(scan_ctdef.pd_expr_spec_.pd_storage_aggregate_output_.reserve(agg_expr_arr.count()))) { + LOG_WARN("failed to reserve memory for aggregate output expr array", K(ret)); + } + + for (int64_t i = 0; OB_SUCC(ret) && i < agg_expr_arr.count(); ++i) { + ObAggFunRawExpr *agg_expr = agg_expr_arr.at(i); + ObExpr *expr = nullptr; + ObRawExpr *param_expr = nullptr; + ObColumnRefRawExpr *param_col_expr = nullptr; + if (OB_ISNULL(agg_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected agg expr", K(ret), KPC(agg_expr)); + } else if (OB_FAIL(cg_.generate_rt_expr(*agg_expr, expr))) { + LOG_WARN("failed to generate runtime expr", K(ret)); + } else if (OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to generate runtime expr", K(ret), KPC(agg_expr)); + } else if (OB_FAIL(scan_ctdef.pd_expr_spec_.pd_storage_aggregate_output_.push_back(expr))) { + LOG_WARN("failed to append expr to aggregate output", K(ret)); + } else if (OB_FAIL(cg_.mark_expr_self_produced(agg_expr))) { + LOG_WARN("failed to mark raw agg expr", K(ret), KPC(agg_expr)); + } else if (OB_UNLIKELY(agg_expr->get_real_param_exprs().empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected count all agg expr in text retrieval scan", K(ret)); + } else if (OB_ISNULL(param_expr = agg_expr->get_param_expr(0)) + || OB_UNLIKELY(!param_expr->is_column_ref_expr())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected agg param expr type", K(ret), KPC(param_expr)); + } else if (OB_ISNULL(param_col_expr = static_cast(param_expr))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null param column expr", K(ret)); + } else if (OB_UNLIKELY(param_col_expr->get_table_id() != op.get_table_id() && !param_col_expr->is_doc_id_column())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexoected column to aggregate", K(ret), KPC(param_col_expr), K(op.get_table_id())); + } else if (OB_FAIL(scan_ctdef.aggregate_column_ids_.push_back(param_col_expr->get_column_id()))) { + LOG_WARN("failed to append aggregate column ids", K(ret)); + } + } + } + return ret; +} + +int ObTscCgService::generate_text_ir_spec_exprs(const ObLogTableScan &op, + ObDASIRScanCtDef &text_ir_scan_ctdef) +{ + int ret = OB_SUCCESS; + ObSEArray result_output; + const ObTextRetrievalInfo &tr_info = op.get_text_retrieval_info(); + if (OB_ISNULL(tr_info.match_expr_) || OB_ISNULL(tr_info.relevance_expr_) || + OB_ISNULL(tr_info.doc_id_column_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (OB_FAIL(cg_.generate_rt_expr(*tr_info.match_expr_->get_search_key(), text_ir_scan_ctdef.search_text_))) { + LOG_WARN("cg rt expr for search text failed", K(ret)); + } else if (OB_ISNULL(tr_info.pushdown_match_filter_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null match filter", K(ret)); + } else if (OB_FAIL(cg_.generate_rt_expr(*tr_info.pushdown_match_filter_, text_ir_scan_ctdef.match_filter_))) { + LOG_WARN("cg rt expr for match filter failed", K(ret)); + } else { + const UIntFixedArray &inv_scan_col_id = text_ir_scan_ctdef.get_inv_idx_scan_ctdef()->access_column_ids_; + const ObColumnRefRawExpr *doc_id_column = static_cast(tr_info.doc_id_column_); + const ObColumnRefRawExpr *doc_length_column = static_cast(tr_info.doc_length_column_); + + int64_t doc_id_col_idx = -1; + int64_t doc_length_col_idx = -1; + for (int64_t i = 0; i < inv_scan_col_id.count(); ++i) { + if (inv_scan_col_id.at(i) == doc_id_column->get_column_id()) { + doc_id_col_idx = i; + } else if (inv_scan_col_id.at(i) == doc_length_column->get_column_id()) { + doc_length_col_idx = i; + } + } + if (OB_UNLIKELY(-1 == doc_id_col_idx || -1 == doc_length_col_idx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected doc id not found in inverted index scan access columns", + K(ret), K(text_ir_scan_ctdef), K(doc_id_col_idx), K(doc_length_col_idx)); + } else { + text_ir_scan_ctdef.inv_scan_doc_id_col_ = + text_ir_scan_ctdef.get_inv_idx_scan_ctdef()->pd_expr_spec_.access_exprs_.at(doc_id_col_idx); + text_ir_scan_ctdef.inv_scan_doc_length_col_ = + text_ir_scan_ctdef.get_inv_idx_scan_ctdef()->pd_expr_spec_.access_exprs_.at(doc_length_col_idx); + if (OB_FAIL(result_output.push_back(text_ir_scan_ctdef.inv_scan_doc_id_col_))) { + LOG_WARN("failed to append output exprs", K(ret)); + } + } + } + + if (OB_SUCC(ret) && op.need_text_retrieval_calc_relevance()) { + if (OB_ISNULL(tr_info.relevance_expr_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null relevance expr", K(ret)); + } else if (OB_FAIL(cg_.generate_rt_expr(*tr_info.relevance_expr_, text_ir_scan_ctdef.relevance_expr_))) { + LOG_WARN("cg rt expr for relevance expr failed", K(ret)); + } else if (OB_FAIL(cg_.generate_rt_expr(*tr_info.match_expr_, + text_ir_scan_ctdef.relevance_proj_col_))) { + LOG_WARN("cg rt expr for relevance score proejction failed", K(ret)); + } else if (OB_ISNULL(text_ir_scan_ctdef.relevance_proj_col_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected relevance pseudo score colum not found", K(ret)); + } else if (OB_FAIL(result_output.push_back(text_ir_scan_ctdef.relevance_expr_))) { + LOG_WARN("failed to append relevance expr", K(ret)); + } + } + + if (FAILEDx(text_ir_scan_ctdef.result_output_.assign(result_output))) { + LOG_WARN("failed to assign result output", K(ret), K(result_output)); + } + + return ret; +} + +int ObTscCgService::generate_doc_id_lookup_ctdef(const ObLogTableScan &op, + ObTableScanCtDef &tsc_ctdef, + ObDASBaseCtDef *ir_scan_ctdef, + ObDASIRAuxLookupCtDef *&aux_lookup_ctdef) +{ + int ret = OB_SUCCESS; + + const ObTableSchema *data_schema = nullptr; + const ObTableSchema *index_schema = nullptr; + ObDASScanCtDef *scan_ctdef = nullptr; + ObSqlSchemaGuard *schema_guard = cg_.opt_ctx_->get_sql_schema_guard(); + uint64_t doc_id_index_tid = OB_INVALID_ID; + + aux_lookup_ctdef = nullptr; + if (OB_ISNULL(schema_guard)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr to schema guard", K(ret)); + } else if (OB_FAIL(schema_guard->get_table_schema(op.get_ref_table_id(), data_schema))) { + LOG_WARN("get table schema failed", K(ret), K(op.get_ref_table_id())); + } else if (OB_ISNULL(data_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get data table schema", K(ret)); + } else if (OB_FAIL(data_schema->get_doc_id_rowkey_tid(doc_id_index_tid))) { + LOG_WARN("failed to get doc id rowkey index tid", K(ret), KPC(data_schema)); + } else if (OB_FAIL(schema_guard->get_table_schema(op.get_ref_table_id(), + doc_id_index_tid, + op.get_stmt(), + index_schema))) { + LOG_WARN("get table schema failed", K(ret), K(doc_id_index_tid)); + } else if (OB_ISNULL(index_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get doc_id index schema", K(ret), K(doc_id_index_tid)); + } else if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_TABLE_SCAN, cg_.phy_plan_->get_allocator(), scan_ctdef))) { + LOG_WARN("alloc das ctdef failed", K(ret)); + } else if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_IR_AUX_LOOKUP, cg_.phy_plan_->get_allocator(), aux_lookup_ctdef))) { + LOG_WARN("alloc aux lookup ctdef failed", K(ret)); + } else if (OB_ISNULL(aux_lookup_ctdef->children_ = OB_NEW_ARRAY(ObDASBaseCtDef*, &cg_.phy_plan_->get_allocator(), 2))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret)); + } else { + bool has_rowscn = false; + ObArray result_outputs; + scan_ctdef->ref_table_id_ = doc_id_index_tid; + aux_lookup_ctdef->children_cnt_ = 2; + ObDASTableLocMeta *scan_loc_meta = OB_NEWx(ObDASTableLocMeta, &cg_.phy_plan_->get_allocator(), cg_.phy_plan_->get_allocator()); + if (OB_ISNULL(scan_loc_meta)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate scan location meta failed", K(ret)); + } else if (OB_FAIL(generate_das_scan_ctdef(op, *scan_ctdef, has_rowscn))) { + LOG_WARN("generate das lookup scan ctdef failed", K(ret)); + } else if (OB_FAIL(result_outputs.assign(scan_ctdef->result_output_))) { + LOG_WARN("construct aux lookup ctdef failed", K(ret)); + } else if (OB_FAIL(generate_table_loc_meta(op.get_table_id(), + *op.get_stmt(), + *index_schema, + *cg_.opt_ctx_->get_session_info(), + *scan_loc_meta))) { + LOG_WARN("generate table loc meta failed", K(ret)); + } else if (OB_FAIL(tsc_ctdef.attach_spec_.attach_loc_metas_.push_back(scan_loc_meta))) { + LOG_WARN("store scan loc meta failed", K(ret)); + } else { + aux_lookup_ctdef->children_[0] = ir_scan_ctdef; + aux_lookup_ctdef->children_[1] = scan_ctdef; + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(aux_lookup_ctdef->result_output_.assign(result_outputs))) { + LOG_WARN("assign result output failed", K(ret)); + } + } + } + + return ret; +} + +int ObTscCgService::generate_table_lookup_ctdef(const ObLogTableScan &op, + ObTableScanCtDef &tsc_ctdef, + ObDASBaseCtDef *scan_ctdef, + ObDASTableLookupCtDef *&lookup_ctdef) +{ + int ret = OB_SUCCESS; + ObIAllocator &allocator = cg_.phy_plan_->get_allocator(); + tsc_ctdef.lookup_loc_meta_ = OB_NEWx(ObDASTableLocMeta, &allocator, allocator); + if (OB_ISNULL(tsc_ctdef.lookup_loc_meta_)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate lookup location meta buffer failed", K(ret)); + } else if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_TABLE_SCAN, + cg_.phy_plan_->get_allocator(), + tsc_ctdef.lookup_ctdef_))) { + LOG_WARN("alloc das ctdef failed", K(ret)); + } else { + bool has_rowscn = false; + const ObTableSchema *table_schema = nullptr; + ObSqlSchemaGuard *schema_guard = cg_.opt_ctx_->get_sql_schema_guard(); + tsc_ctdef.lookup_ctdef_->ref_table_id_ = op.get_real_ref_table_id(); + + if (OB_FAIL(generate_das_scan_ctdef(op, *tsc_ctdef.lookup_ctdef_, has_rowscn))) { + LOG_WARN("generate das lookup scan ctdef failed", K(ret)); + } else if (OB_FAIL(schema_guard->get_table_schema(op.get_table_id(), + op.get_ref_table_id(), + op.get_stmt(), + table_schema))) { + LOG_WARN("get table schema failed", K(ret), K(op.get_ref_table_id())); + } else if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr to table schema", K(ret)); + } else if (OB_FAIL(generate_table_loc_meta(op.get_table_id(), + *op.get_stmt(), + *table_schema, + *cg_.opt_ctx_->get_session_info(), + *tsc_ctdef.lookup_loc_meta_))) { + LOG_WARN("generate table loc meta failed", K(ret)); + } else { + tsc_ctdef.flashback_item_.need_scn_ |= has_rowscn; + } + + if (OB_SUCC(ret) && op.get_index_back() && op.get_is_index_global()) { + if (OB_ISNULL(op.get_calc_part_id_expr()) || op.get_rowkey_exprs().empty()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("calc_part_id_expr is null or rowkeys` count is zero", K(ret)); + } else if (OB_FAIL(cg_.generate_calc_part_id_expr(*op.get_calc_part_id_expr(), + tsc_ctdef.lookup_loc_meta_, + tsc_ctdef.calc_part_id_expr_))) { + LOG_WARN("fail to generate calc part id expr", K(ret), KP(op.get_calc_part_id_expr())); + } else if (OB_FAIL(cg_.generate_rt_exprs(op.get_rowkey_exprs(), + tsc_ctdef.global_index_rowkey_exprs_))) { + LOG_WARN("fail to generate rowkey exprs", K(ret)); + } + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_TABLE_LOOKUP, allocator, lookup_ctdef))) { + LOG_WARN("alloc aux lookup ctdef failed", K(ret)); + } else if (OB_ISNULL(lookup_ctdef->children_ = OB_NEW_ARRAY(ObDASBaseCtDef*, &allocator, 2))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret)); + } else { + lookup_ctdef->children_cnt_ = 2; + if (OB_FAIL(tsc_ctdef.attach_spec_.attach_loc_metas_.push_back(tsc_ctdef.lookup_loc_meta_))) { + LOG_WARN("store scan loc meta failed", K(ret)); + } else { + lookup_ctdef->children_[0] = scan_ctdef; + lookup_ctdef->children_[1] = tsc_ctdef.lookup_ctdef_; + } + } + } + + //generate lookup result output exprs + if (OB_SUCC(ret)) { + ObArray result_outputs; + if (OB_FAIL(result_outputs.assign(tsc_ctdef.lookup_ctdef_->result_output_))) { + LOG_WARN("assign result output failed", K(ret)); + } else if (DAS_OP_IR_AUX_LOOKUP == scan_ctdef->op_type_) { + //add relevance score pseudo column to final scan result output + ObDASIRAuxLookupCtDef *aux_lookup_ctdef = static_cast(scan_ctdef); + if (aux_lookup_ctdef->relevance_proj_col_ != nullptr) { + if (OB_FAIL(result_outputs.push_back(aux_lookup_ctdef->relevance_proj_col_))) { + LOG_WARN("store result outputs failed", K(ret)); + } + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(lookup_ctdef->result_output_.assign(result_outputs))) { + LOG_WARN("assign result output failed", K(ret)); + } + } + } + return ret; +} + +int ObTscCgService::extract_doc_id_index_back_access_columns( + const ObLogTableScan &op, + ObIArray &access_exprs) +{ + int ret = OB_SUCCESS; + ObArray domain_col_exprs; + if (OB_UNLIKELY(0 == op.get_domain_exprs().count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected empty domain expr array", K(ret)); + } else if (OB_FAIL(ObRawExprUtils::extract_column_exprs(op.get_domain_exprs(), domain_col_exprs, true))) { + LOG_WARN("failed to extract domain column ref exprs", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < domain_col_exprs.count(); ++i) { + ObRawExpr *raw_expr = domain_col_exprs.at(i); + ObColumnRefRawExpr *col_expr = static_cast(raw_expr); + if (col_expr->is_doc_id_column() + || (col_expr->get_table_id() == op.get_table_id() && col_expr->is_rowkey_column())) { + if (OB_FAIL(add_var_to_array_no_dup(access_exprs, raw_expr))) { + LOG_WARN("failed to push doc id index back access column to access exprs", K(ret)); + } + } + } + return ret; +} + +int ObTscCgService::extract_doc_id_index_back_output_column_ids( + const ObLogTableScan &op, + ObIArray &output_cids) +{ + // outpout main table rowkey for index back + int ret = OB_SUCCESS; + const ObTableSchema *table_schema = nullptr; + ObArray rowkey_cids; + if (OB_FAIL(cg_.opt_ctx_->get_schema_guard()->get_table_schema(MTL_ID(), op.get_real_ref_table_id(), table_schema))) { + LOG_WARN("get table schema failed", K(ret), K(op.get_ref_table_id())); + } else if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null table schema", K(ret)); + } else if (OB_FAIL(table_schema->get_rowkey_column_ids(rowkey_cids))) { + LOG_WARN("get rowkey column ids failed", K(ret)); + } else if (OB_FAIL(append(output_cids, rowkey_cids))) { + LOG_WARN("failed to append output column ids", K(ret)); + } + return ret; +} + +int ObTscCgService::filter_out_match_exprs(ObIArray &exprs) { + int ret = OB_SUCCESS; + ObSEArray temp_exprs; + for (int64_t i = 0; OB_SUCC(ret) && i < exprs.count(); i++) { + if (OB_ISNULL(exprs.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (!exprs.at(i)->has_flag(CNT_MATCH_EXPR) && OB_FAIL(temp_exprs.push_back(exprs.at(i)))) { + LOG_WARN("failed to push back expr", K(ret)); + } + } + if (OB_SUCC(ret) && OB_FAIL(exprs.assign(temp_exprs))) { + LOG_WARN("failed to assign exprs", K(ret)); + } + return ret; +} + +int ObTscCgService::generate_das_sort_ctdef( + const ObIArray &sort_keys, + const bool fetch_with_ties, + ObRawExpr *topk_limit_expr, + ObRawExpr *topk_offset_expr, + ObDASBaseCtDef *child_ctdef, + ObDASSortCtDef *&sort_ctdef) +{ + int ret = OB_SUCCESS; + const int64_t sort_cnt = sort_keys.count(); + ObIAllocator &ctdef_alloc = cg_.phy_plan_->get_allocator(); + if (OB_UNLIKELY(0 == sort_cnt) || OB_ISNULL(child_ctdef)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid sort arg", K(ret), K(sort_cnt), KPC(child_ctdef)); + } else if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_SORT, ctdef_alloc, sort_ctdef))) { + LOG_WARN("alloc sort ctdef failed ", K(ret)); + } else if (OB_FAIL(sort_ctdef->sort_collations_.init(sort_cnt))) { + LOG_WARN("failed to init sort collations", K(ret)); + } else if (OB_FAIL(sort_ctdef->sort_cmp_funcs_.init(sort_cnt))) { + LOG_WARN("failed to init sort cmp funcs", K(ret)); + } else if (OB_FAIL(sort_ctdef->sort_exprs_.init(sort_cnt))) { + LOG_WARN("failed to init sort exprs", K(ret)); + } else if (OB_ISNULL(sort_ctdef->children_ = OB_NEW_ARRAY(ObDASBaseCtDef*, &ctdef_alloc, 1))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate ir scan ctdef children failed", K(ret)); + } else if (nullptr != topk_limit_expr && + OB_FAIL(cg_.generate_rt_expr(*topk_limit_expr, sort_ctdef->limit_expr_))) { + LOG_WARN("cg rt expr for top-k limit expr failed", K(ret)); + } else if (nullptr != topk_offset_expr && + OB_FAIL(cg_.generate_rt_expr(*topk_offset_expr, sort_ctdef->offset_expr_))) { + LOG_WARN("cg rt expr for top-k offset expr failed", K(ret)); + } else { + sort_ctdef->children_cnt_ = 1; + sort_ctdef->children_[0] = child_ctdef; + sort_ctdef->fetch_with_ties_ = fetch_with_ties; + } + + ObSEArray result_output; + int64_t field_idx = 0; + for (int64_t i = 0; i < sort_keys.count() && OB_SUCC(ret); ++i) { + const OrderItem &order_item = sort_keys.at(i); + ObExpr *expr = nullptr; + if (OB_FAIL(cg_.generate_rt_expr(*order_item.expr_, expr))) { + LOG_WARN("failed to generate rt expr", K(ret)); + } else { + ObSortFieldCollation field_collation(field_idx++, + expr->datum_meta_.cs_type_, + order_item.is_ascending(), + (order_item.is_null_first() ^ order_item.is_ascending()) ? NULL_LAST : NULL_FIRST); + ObSortCmpFunc cmp_func; + cmp_func.cmp_func_ = ObDatumFuncs::get_nullsafe_cmp_func( + expr->datum_meta_.type_, + expr->datum_meta_.type_, + field_collation.null_pos_, + field_collation.cs_type_, + expr->datum_meta_.scale_, + lib::is_oracle_mode(), + expr->obj_meta_.has_lob_header(), + expr->datum_meta_.precision_, + expr->datum_meta_.precision_); + if (OB_ISNULL(cmp_func.cmp_func_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cmp_func is null, check datatype is valid", K(ret)); + } else if (OB_FAIL(sort_ctdef->sort_cmp_funcs_.push_back(cmp_func))) { + LOG_WARN("failed to append sort function", K(ret)); + } else if (OB_FAIL(sort_ctdef->sort_collations_.push_back(field_collation))) { + LOG_WARN("failed to push back field collation", K(ret)); + } else if (OB_FAIL(sort_ctdef->sort_exprs_.push_back(expr))) { + LOG_WARN("failed to push back expr", K(ret)); + } else { + field_idx++; + } + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(append_array_no_dup(result_output, sort_ctdef->sort_exprs_))) { + LOG_WARN("failed to append sort exprs to result output", K(ret)); + } else if (ObDASTaskFactory::is_attached(child_ctdef->op_type_) + && OB_FAIL(append_array_no_dup(result_output, static_cast(child_ctdef)->result_output_))) { + LOG_WARN("failed to append child result output", K(ret)); + } else if (OB_FAIL(sort_ctdef->result_output_.assign(result_output))) { + LOG_WARN("failed to assign result output", K(ret)); + } + return ret; +} } // namespace sql } // namespace oceanbase diff --git a/src/sql/code_generator/ob_tsc_cg_service.h b/src/sql/code_generator/ob_tsc_cg_service.h index 9510709cef..3b0b82a650 100644 --- a/src/sql/code_generator/ob_tsc_cg_service.h +++ b/src/sql/code_generator/ob_tsc_cg_service.h @@ -55,17 +55,49 @@ private: int generate_das_scan_ctdef(const ObLogTableScan &op, ObDASScanCtDef &scan_ctdef, bool &has_rowscn); int generate_table_param(const ObLogTableScan &op, ObDASScanCtDef &scan_ctdef); int extract_das_output_column_ids(const ObLogTableScan &op, - common::ObTableID table_id, + ObDASScanCtDef &scan_ctdef, const ObTableSchema &index_schema, common::ObIArray &output_cids); int extract_das_access_exprs(const ObLogTableScan &op, - common::ObTableID scan_table_id, + ObDASScanCtDef &scan_ctdef, common::ObIArray &access_exprs); //extract these column exprs need by TSC operator, these column will output by DAS scan int extract_tsc_access_columns(const ObLogTableScan &op, common::ObIArray &access_exprs); int extract_das_column_ids(const common::ObIArray &column_exprs, common::ObIArray &column_ids); int generate_geo_access_ctdef(const ObLogTableScan &op, const ObTableSchema &index_schema, ObArray &access_exprs); + int generate_text_ir_ctdef(const ObLogTableScan &op, ObTableScanCtDef &tsc_ctdef, ObDASBaseCtDef *&root_ctdef); + int extract_text_ir_access_columns(const ObLogTableScan &op, + const ObDASScanCtDef &scan_ctdef, + ObIArray &access_exprs); + int extract_text_ir_das_output_column_ids(const ObLogTableScan &op, + const ObDASScanCtDef &scan_ctdef, + ObIArray &output_cids); + int generate_text_ir_pushdown_expr_ctdef(const ObLogTableScan &op, ObDASScanCtDef &scan_ctdef); + int generate_text_ir_spec_exprs(const ObLogTableScan &op, + ObDASIRScanCtDef &text_ir_scan_ctdef); + int generate_doc_id_lookup_ctdef(const ObLogTableScan &op, + ObTableScanCtDef &tsc_ctdef, + ObDASBaseCtDef *ir_scan_ctdef, + ObDASIRAuxLookupCtDef *&aux_lookup_ctdef); + int generate_table_lookup_ctdef(const ObLogTableScan &op, + ObTableScanCtDef &tsc_ctdef, + ObDASBaseCtDef *scan_ctdef, + ObDASTableLookupCtDef *&lookup_ctdef); + int extract_doc_id_index_back_access_columns(const ObLogTableScan &op, + ObIArray &access_exprs); + int extract_doc_id_index_back_output_column_ids(const ObLogTableScan &op, + ObIArray &output_cids); + int filter_out_match_exprs(ObIArray &exprs); + int append_fts_relavence_project_col( + ObDASIRAuxLookupCtDef *aux_lookup_ctdef, + ObDASIRScanCtDef *ir_scan_ctdef); + int generate_das_sort_ctdef(const ObIArray &sort_keys, + const bool fetch_with_ties, + ObRawExpr *topk_limit_expr, + ObRawExpr *topk_offset_expr, + ObDASBaseCtDef *child_ctdef, + ObDASSortCtDef *&sort_ctdef); private: ObStaticEngineCG &cg_; }; diff --git a/src/sql/das/iter/ob_das_group_fold_iter.cpp b/src/sql/das/iter/ob_das_group_fold_iter.cpp new file mode 100644 index 0000000000..04838b64d1 --- /dev/null +++ b/src/sql/das/iter/ob_das_group_fold_iter.cpp @@ -0,0 +1,377 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_ENG +#include "sql/das/iter/ob_das_group_fold_iter.h" + +namespace oceanbase +{ +using namespace common; +using namespace storage; +namespace sql +{ + +int ObGroupResultSaveRows::init(const common::ObIArray &exprs, + ObEvalCtx &eval_ctx, + int64_t max_size, + int64_t group_id_idx, + bool need_check_output_datum, + common::ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("init twice", K(ret)); + } else { + inited_ = true; + need_check_output_datum_ = need_check_output_datum; + exprs_ = &exprs; + eval_ctx_ = &eval_ctx; + max_size_ = max_size; + group_id_idx_ = group_id_idx; + if (OB_ISNULL(store_rows_ = + static_cast(allocator.alloc(max_size * sizeof(LastDASStoreRow))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory", K(max_size), K(ret)); + } else { + for (int64_t i = 0; i < max_size_; i++) { + new (store_rows_ + i) LastDASStoreRow(allocator); + store_rows_[i].reuse_ = true; + } + } + } + + return ret; +} + +int ObGroupResultSaveRows::save(bool is_vectorized, int64_t start_pos, int64_t size) +{ + int ret = OB_SUCCESS; + if (start_pos + size > max_size_) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument, exceeds max size", K(ret), K(start_pos), K(size), K_(max_size)); + } else { + if (is_vectorized) { + ObEvalCtx::BatchInfoScopeGuard batch_info_guard(*eval_ctx_); + batch_info_guard.set_batch_size(start_pos + size); + for (int64_t i = 0; OB_SUCC(ret) && i < size; i++) { + batch_info_guard.set_batch_idx(start_pos + i); + OZ(store_rows_[i].save_store_row(*exprs_, *eval_ctx_)); + } + } else { + OZ(store_rows_[0].save_store_row(*exprs_, *eval_ctx_)); + } + start_pos_ = 0; + saved_size_ = size; + } + + return ret; +} + +int ObGroupResultSaveRows::to_expr(bool is_vectorized, int64_t start_pos, int64_t size) +{ + int ret = OB_SUCCESS; + if (is_vectorized) { + if (start_pos + size > saved_size_) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(start_pos), K(size), K_(saved_size), K(ret)); + } else { + ObEvalCtx::BatchInfoScopeGuard batch_info_guard(*eval_ctx_); + batch_info_guard.set_batch_size(size); + for (int64_t i = 0; OB_SUCC(ret) && i < size; i++) { + batch_info_guard.set_batch_idx(i); + OZ(store_rows_[start_pos + i].store_row_->to_expr(*exprs_, *eval_ctx_)); + } + } + } else { + OZ(store_rows_[0].store_row_->to_expr(*exprs_, *eval_ctx_)); + } + + return ret; +} + +int64_t ObGroupResultSaveRows::cur_group_idx() +{ + return start_pos_ >= saved_size_ ? + OB_INVALID_INDEX : store_rows_[start_pos_].store_row_->cells()[group_id_idx_].get_int(); +} + + +void ObGroupResultSaveRows::reuse() +{ + start_pos_ = 0; + saved_size_ = 0; +} + +void ObGroupResultSaveRows::reset() +{ + inited_ = false; + exprs_ = nullptr; + eval_ctx_ = nullptr; + saved_size_ = 0; + max_size_ = 1; + start_pos_ = 0; + group_id_idx_ = 0; + need_check_output_datum_ = false; + if (OB_NOT_NULL(store_rows_)) { + for (int64_t i = 0; i < max_size_; i++) { + store_rows_[i].~LastDASStoreRow(); + } + store_rows_ = nullptr; + } +} + +int ObDASGroupFoldIter::set_scan_group(int64_t group_id) +{ + int ret = OB_SUCCESS; + // TODO bingfan: add defensive check + if (OB_INVALID_INDEX == group_id) { + cur_group_idx_ += 1; + } else { + cur_group_idx_ = group_id; + } + if (group_save_rows_.need_check_output_datum_) { + reset_expr_datum_ptr(); + } + if (cur_group_idx_ >= group_size_) { + ret = OB_ITER_END; + } + LOG_TRACE("set group id for fold iter", K(cur_group_idx_), K(group_id), K(group_size_), K(lbt())); + LOG_DEBUG("set scan group", K(ret), K(group_id), K(*this)); + return ret; +} + +void ObDASGroupFoldIter::init_group_range(int64_t cur_group_idx, int64_t group_size) +{ + available_group_idx_ = MIN_GROUP_INDEX; + cur_group_idx_ = cur_group_idx; + group_size_ = group_size; +} + +void ObDASGroupFoldIter::reset_expr_datum_ptr() +{ + if (OB_NOT_NULL(group_save_rows_.exprs_)) { + FOREACH_CNT(e, *group_save_rows_.exprs_) + { + (*e)->locate_datums_for_update(*group_save_rows_.eval_ctx_, group_save_rows_.max_size_); + ObEvalInfo &info = (*e)->get_eval_info(*group_save_rows_.eval_ctx_); + info.point_to_frame_ = true; + } + } +} + +int ObDASGroupFoldIter::inner_init(ObDASIterParam ¶m) +{ + int ret = OB_SUCCESS; + if (param.type_ != ObDASIterType::DAS_ITER_GROUP_FOLD || OB_ISNULL(param.group_id_expr_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("inner init das iter with bad param", K(param), K(ret)); + } else { + ObDASGroupFoldIterParam &group_fold_param = static_cast(param); + cur_group_idx_ = 0; + available_group_idx_ = MIN_GROUP_INDEX; + group_size_ = 0; + need_check_output_datum_ = group_fold_param.need_check_output_datum_; + iter_tree_ = group_fold_param.iter_tree_; + iter_alloc_ = new (iter_alloc_buf_) common::ObArenaAllocator(); + iter_alloc_->set_attr(ObMemAttr(MTL_ID(), "ScanDASCtx")); + + /********* init group store rows *********/ + int64_t group_id_idx = OB_INVALID_INDEX; + for (int64_t i = 0 ; i < output_->count(); i++) { + if (output_->at(i) == group_id_expr_) { + group_id_idx = i; + } + } + if (group_id_idx == OB_INVALID_INDEX) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("failed to get group id idx", K(ret)); + } else if (OB_FAIL(group_save_rows_.init(*output_, + *eval_ctx_, + max_size_, + group_id_idx, + need_check_output_datum_, + *iter_alloc_))) { + LOG_WARN("failed to init group save rows", K(ret)); + } + } + + return ret; +} + +int ObDASGroupFoldIter::inner_reuse() +{ + int ret = OB_SUCCESS; + cur_group_idx_ = 0; + available_group_idx_ = MIN_GROUP_INDEX; + group_size_ = 0; + group_save_rows_.reuse(); + return ret; +} + +int ObDASGroupFoldIter::inner_release() +{ + int ret = OB_SUCCESS; + cur_group_idx_ = 0; + available_group_idx_ = MIN_GROUP_INDEX; + group_size_ = 0; + group_save_rows_.reset(); + iter_tree_ = nullptr; + if (OB_NOT_NULL(iter_alloc_)) { + iter_alloc_->reset(); + iter_alloc_->~ObArenaAllocator(); + iter_alloc_ = nullptr; + } + return ret; +} + +int ObDASGroupFoldIter::inner_get_next_rows(int64_t &count, int64_t capacity) +{ + int ret = OB_SUCCESS; + int64_t storage_count = 0; + int64_t ret_count = 0; + int64_t group_idx = MIN_GROUP_INDEX; + LOG_DEBUG("das group fold iter get next rows begin", K_(available_group_idx), K_(cur_group_idx)); + + if (available_group_idx_ > cur_group_idx_) { + ret = OB_ITER_END; + LOG_TRACE("available_group_idx > cur_group_idx, no available rows", K_(available_group_idx), K_(cur_group_idx)); + } else { + while (MIN_GROUP_INDEX != available_group_idx_ && available_group_idx_ < cur_group_idx_) { + group_save_rows_.next_start_pos(); + available_group_idx_ = group_save_rows_.cur_group_idx(); + if (OB_INVALID_INDEX == available_group_idx_) { + // row_store_ has been consumed and new data needs to be fetched from the storage layer. + available_group_idx_ = MIN_GROUP_INDEX; + } + } + } + + // fetch new data from storage layer. + while (OB_SUCC(ret) && MIN_GROUP_INDEX == available_group_idx_) { + if (OB_FAIL(iter_tree_->get_next_rows(storage_count, capacity))) { + if (OB_ITER_END == ret) { + if (storage_count > 0) { + ret = OB_SUCCESS; + } else { + LOG_DEBUG("underlying iter tree reached iter end", K_(available_group_idx), K_(cur_group_idx)); + // subsequent calls to get next rows will no longer be able to return rows. + available_group_idx_ = INT64_MAX; + } + } else { + LOG_WARN("underlying iter tree failed to get next rows", K(ret)); + } + } + + if (OB_SUCC(ret)) { + const ObBitVector *skip = nullptr; + PRINT_VECTORIZED_ROWS(SQL, DEBUG, *eval_ctx_, *output_, storage_count, skip); + ObDatum *group_idx_batch = group_id_expr_->locate_batch_datums(*group_save_rows_.eval_ctx_); + for (int64_t i = 0; OB_SUCC(ret) && i < storage_count; i++) { + group_idx = group_idx_batch[i].get_int(); + if (group_idx >= cur_group_idx_) { + if (OB_FAIL(group_save_rows_.save(true, i, storage_count - i))) { + LOG_WARN("das group fold iter failed to save batch result", K(ret)); + } else { + available_group_idx_ = group_idx; + } + break; + } + } + } + } // while end + + if (OB_SUCC(ret)) { + if (available_group_idx_ == cur_group_idx_) { // there are rows available in row_store_. + int64_t start_pos = group_save_rows_.get_start_pos(); + while (cur_group_idx_ == available_group_idx_) { + group_idx = group_save_rows_.cur_group_idx(); + if (cur_group_idx_ == group_idx) { + group_save_rows_.next_start_pos(); + ret_count++; + } else { + available_group_idx_ = group_idx; + if (OB_INVALID_INDEX == available_group_idx_) { + available_group_idx_ = MIN_GROUP_INDEX; + } + } + } // while end + + if (ret_count > 0) { + OZ(group_save_rows_.to_expr(true, start_pos, ret_count)); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("das group fold iter do not get any rows", K(ret_count), K_(group_save_rows), + K_(cur_group_idx), K_(available_group_idx), K(ret)); + } + // the group_idx of the data in row_store is already larger than cur_group_idx, + // which means there is no more data for this group. + if (OB_SUCC(ret) && MIN_GROUP_INDEX != available_group_idx_ && cur_group_idx_ != available_group_idx_) { + ret = OB_ITER_END; + } + } else { + OB_ASSERT(available_group_idx_ > cur_group_idx_ && available_group_idx_ != INT64_MAX); + LOG_TRACE("all new rows from storage layer have greater group idx", K_(available_group_idx), K_(cur_group_idx)); + ret = OB_ITER_END; + } + } + count = ret_count; + + LOG_DEBUG("das group fold iter get next rows end", K(ret_count), K(storage_count), K(*this)); + return ret; +} + +int ObDASGroupFoldIter::inner_get_next_row() +{ + int ret = OB_SUCCESS; + if (available_group_idx_ > cur_group_idx_) { + ret = OB_ITER_END; + LOG_TRACE("available_group_idx > cur_group_idx, no available rows", K_(available_group_idx), K_(cur_group_idx)); + } else if (available_group_idx_ == cur_group_idx_) { + OZ(group_save_rows_.to_expr(false, 0, 1)); + available_group_idx_ = MIN_GROUP_INDEX; + } else { + // fetch new data from storage layer. + ObDatum *group_idx = NULL; + while (OB_SUCC(ret) && available_group_idx_ < cur_group_idx_) { + if (OB_FAIL(iter_tree_->get_next_row())) { + if (OB_ITER_END != ret) { + LOG_WARN("das group fold iter failed to get next row", K(ret), K_(available_group_idx), K_(cur_group_idx)); + } else { + available_group_idx_ = INT64_MAX; + } + } else if (OB_FAIL(group_id_expr_->eval(*group_save_rows_.eval_ctx_, group_idx))) { + LOG_WARN("failed to eval group id", K(ret)); + } else { + available_group_idx_ = group_idx->get_int(); + } + } // while end + + if (OB_SUCC(ret)) { + if (available_group_idx_ == cur_group_idx_) { + // return result + available_group_idx_ = MIN_GROUP_INDEX; + } else { + if (OB_FAIL(group_save_rows_.save(false, 0, 1))) { + LOG_WARN("failed to save last row", K(ret)); + } else { + ret = OB_ITER_END; + } + } + } + } + + return ret; +} + +} // namespace sql +} // namespace oceanbase diff --git a/src/sql/das/iter/ob_das_group_fold_iter.h b/src/sql/das/iter/ob_das_group_fold_iter.h new file mode 100644 index 0000000000..d87eadaf59 --- /dev/null +++ b/src/sql/das/iter/ob_das_group_fold_iter.h @@ -0,0 +1,132 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OBDEV_SRC_SQL_DAS_ITER_OB_DAS_GROUP_FOLD_ITER_H_ +#define OBDEV_SRC_SQL_DAS_ITER_OB_DAS_GROUP_FOLD_ITER_H_ + +#include "sql/das/iter/ob_das_iter.h" +#include "sql/das/iter/ob_das_merge_iter.h" +#include "common/row/ob_row_iterator.h" +#include "sql/engine/basic/ob_chunk_datum_store.h" +#include "sql/engine/ob_exec_context.h" + +namespace oceanbase +{ +using namespace common; +using namespace storage; +namespace sql +{ + +class ObGroupResultSaveRows +{ +public: + ObGroupResultSaveRows() + : inited_(false), + exprs_(NULL), + eval_ctx_(NULL), + saved_size_(0), + max_size_(1), + start_pos_(0), + group_id_idx_(0), + store_rows_(NULL), + need_check_output_datum_(false) + {} + + int init(const common::ObIArray &exprs, + ObEvalCtx &eval_ctx, + int64_t max_size, + int64_t group_id_idx, + bool need_check_output_datum, + common::ObIAllocator &allocator); + int save(bool is_vectorized, int64_t start_pos, int64_t size); + int to_expr(bool is_vectorized, int64_t start_pos, int64_t size); + int64_t cur_group_idx(); + void next_start_pos() { start_pos_++; } + int64_t get_start_pos() { return start_pos_; } + void reuse(); + void reset(); + TO_STRING_KV(K_(saved_size), + K_(start_pos), + K_(max_size), + K_(group_id_idx)); + +public: + typedef ObChunkDatumStore::LastStoredRow LastDASStoreRow; + + bool inited_; + const common::ObIArray *exprs_; + ObEvalCtx *eval_ctx_; + int64_t saved_size_; + int64_t max_size_; + int64_t start_pos_; + int64_t group_id_idx_; + LastDASStoreRow *store_rows_; + bool need_check_output_datum_; +}; + +struct ObDASGroupFoldIterParam : public ObDASIterParam +{ + bool need_check_output_datum_; + ObDASIter *iter_tree_; + + virtual bool is_valid() const + { + return ObDASIterParam::is_valid() && iter_tree_ != nullptr; + } +}; + +class ObDASGroupFoldIter : public ObDASIter +{ +public: + ObDASGroupFoldIter() + : cur_group_idx_(0), + available_group_idx_(MIN_GROUP_INDEX), + group_size_(0), + need_check_output_datum_(false), + group_save_rows_(), + iter_tree_(nullptr), + iter_alloc_(nullptr) + {} + + virtual ~ObDASGroupFoldIter() {} + int set_scan_group(int64_t group_id); + void init_group_range(int64_t cur_group_idx, int64_t group_size); + + INHERIT_TO_STRING_KV("ObDASIter", ObDASIter, K_(cur_group_idx), K_(available_group_idx), + K_(group_size), K_(group_save_rows)); + +protected: + virtual int inner_init(ObDASIterParam ¶m) override; + virtual int inner_reuse() override; + virtual int inner_release() override; + virtual int inner_get_next_row() override; + virtual int inner_get_next_rows(int64_t &count, int64_t capacity) override; + +private: + void reset_expr_datum_ptr(); + static const int64_t MIN_GROUP_INDEX = -1; + int64_t cur_group_idx_; // 本次要读的group_idx + int64_t available_group_idx_; // 当前row_store中可读的最小group_idx + int64_t group_size_; + bool need_check_output_datum_; + ObGroupResultSaveRows group_save_rows_; + + ObDASIter *iter_tree_; + common::ObArenaAllocator *iter_alloc_; + char iter_alloc_buf_[sizeof(common::ObArenaAllocator)]; +}; + + +} // namespace sql +} // namespace oceanbase + +#endif /* OBDEV_SRC_SQL_DAS_ITER_OB_DAS_GROUP_FOLD_ITER_H_ */ diff --git a/src/sql/das/iter/ob_das_iter.cpp b/src/sql/das/iter/ob_das_iter.cpp new file mode 100644 index 0000000000..684d4e6aee --- /dev/null +++ b/src/sql/das/iter/ob_das_iter.cpp @@ -0,0 +1,132 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + + +#define USING_LOG_PREFIX SQL_DAS +#include "sql/das/iter/ob_das_iter.h" + + +namespace oceanbase +{ +using namespace common; +namespace sql +{ + +int ObDASIter::set_merge_status(MergeType merge_type) +{ + int ret = OB_SUCCESS; + ObDASIter *child = child_; + for (; child != nullptr && OB_SUCC(ret); child = child->right_) { + if (OB_FAIL(child->set_merge_status(merge_type))) { + LOG_WARN("failed to set merge status", K(ret)); + } + } + return ret; +} + +int ObDASIter::init(ObDASIterParam ¶m) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("das iter init twice", K(ret)); + } else if (!param.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid das iter param", K(param), K(ret)); + } else { + inited_ = true; + type_ = param.type_; + max_size_ = param.max_size_; + eval_ctx_ = param.eval_ctx_; + exec_ctx_ = param.exec_ctx_; + output_ = param.output_; + group_id_expr_ = param.group_id_expr_; + child_ = param.child_; + right_ = param.right_; + if (OB_FAIL(inner_init(param))) { + LOG_WARN("failed to inner init das iter", K(param), K(ret)); + } + } + + return ret; +} + +int ObDASIter::reuse() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!inited_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("reuse das iter before init", K(ret)); + } else if (OB_FAIL(inner_reuse())) { + LOG_WARN("failed to inner reuse das iter", K(ret), KPC(this)); + } + return ret; +} + +int ObDASIter::release() +{ + int ret = OB_SUCCESS; + int child_ret = OB_SUCCESS; + ObDASIter *child = child_; + int tmp_ret = OB_SUCCESS; + while (child != nullptr) { + ObDASIter *right = child->right_; + if (OB_TMP_FAIL(child->release())) { + LOG_WARN("failed to release child iter", K(tmp_ret), KPC(child)); + child_ret = tmp_ret; + } + child = right; + } + if (OB_FAIL(inner_release())) { + LOG_WARN("failed to inner release das iter", K(ret), KPC(this)); + } else { + ret = child_ret; + } + inited_ = false; + right_ = nullptr; + child_ = nullptr; + group_id_expr_ = nullptr; + output_ = nullptr; + exec_ctx_ = nullptr; + eval_ctx_ = nullptr; + max_size_ = 0; + type_ = ObDASIterType::DAS_ITER_INVALID; + return ret; +} + +int ObDASIter::get_next_row() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!inited_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("das iter get next row before init", K(ret)); + } else { + ret = inner_get_next_row(); + } + return ret; +} + + +int ObDASIter::get_next_rows(int64_t &count, int64_t capacity) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!inited_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("das iter get next rows before init", K(ret)); + } else { + ret = inner_get_next_rows(count, capacity); + } + return ret; +} + +} // namespace sql +} // namespace oceanbase diff --git a/src/sql/das/iter/ob_das_iter.h b/src/sql/das/iter/ob_das_iter.h new file mode 100644 index 0000000000..a8cc473a63 --- /dev/null +++ b/src/sql/das/iter/ob_das_iter.h @@ -0,0 +1,152 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OBDEV_SRC_SQL_DAS_ITER_OB_DAS_ITER_H_ +#define OBDEV_SRC_SQL_DAS_ITER_OB_DAS_ITER_H_ +#include "sql/engine/expr/ob_expr.h" +#include "sql/engine/ob_exec_context.h" +#include "lib/container/ob_fixed_array.h" +#include "sql/das/ob_das_context.h" + +namespace oceanbase +{ +using namespace common; +namespace sql +{ + +class ObDASIter; + +enum ObDASIterType : uint32_t +{ + DAS_ITER_INVALID = 0, + DAS_ITER_SCAN, + DAS_ITER_MERGE, + DAS_ITER_GROUP_FOLD, + DAS_ITER_LOOKUP, + // append DASIterType before me + DAS_ITER_MAX +}; + +enum MergeType : uint32_t { + SEQUENTIAL_MERGE = 0, + SORT_MERGE +}; + +struct ObDASIterParam +{ +public: + ObDASIterParam() + : type_(ObDASIterType::DAS_ITER_INVALID), + max_size_(0), + eval_ctx_(nullptr), + exec_ctx_(nullptr), + output_(nullptr), + group_id_expr_(nullptr), + child_(nullptr), + right_(nullptr) + {} + + virtual ~ObDASIterParam() {} + + void assgin(const ObDASIterParam ¶m) + { + type_ = param.type_; + max_size_ = param.max_size_; + eval_ctx_ = param.eval_ctx_; + exec_ctx_ = param.exec_ctx_; + output_ = param.output_; + group_id_expr_ = param.group_id_expr_; + child_ = param.child_; + right_ = param.right_; + } + + virtual bool is_valid() const + { + return eval_ctx_ != nullptr && exec_ctx_ != nullptr && output_ != nullptr; + } + + ObDASIterType type_; + int64_t max_size_; + ObEvalCtx *eval_ctx_; + ObExecContext *exec_ctx_; + const ObIArray *output_; + const ObExpr *group_id_expr_; + ObDASIter *child_; + ObDASIter *right_; + TO_STRING_KV(K_(type), K_(max_size), K_(eval_ctx), K_(exec_ctx), KPC_(output), K_(group_id_expr), + K_(child), K_(right)); +}; + +class ObDASIter +{ +public: + ObDASIter() + : type_(ObDASIterType::DAS_ITER_INVALID), + max_size_(0), + eval_ctx_(nullptr), + exec_ctx_(nullptr), + output_(nullptr), + group_id_expr_(nullptr), + child_(nullptr), + right_(nullptr), + inited_(false) + {} + virtual ~ObDASIter() { release(); } + + VIRTUAL_TO_STRING_KV(K_(type), K_(max_size), K_(eval_ctx), K_(exec_ctx), K_(output), + K_(group_id_expr), K_(child), K_(right), K_(inited)); + + void set_type(ObDASIterType type) { type_ = type; } + ObDASIterType get_type() const { return type_; } + + // The state of ObDASMergeIter may change many times during execution, e.g., the merge_type + // changing from SEQUENTIAL_MERGE to SORT_MERGE, or the creation of a new batch of DAS tasks. + // Therefore, the status needs to be explicitly set before calling get next rows. + virtual int set_merge_status(MergeType merge_type); + + int init(ObDASIterParam ¶m); + OB_INLINE bool is_inited() const { return inited_; } + // Make the iter go back to the state after calling init(). + int reuse(); + // Make the iter go back to the state before calling init(). + int release(); + + // get_next_row(s) should be called after init(). + int get_next_row(); + int get_next_rows(int64_t &count, int64_t capacity); + + +protected: + virtual int inner_init(ObDASIterParam ¶m) = 0; + virtual int inner_reuse() = 0; + virtual int inner_release() = 0; + virtual int inner_get_next_row() = 0; + virtual int inner_get_next_rows(int64_t &count, int64_t capacity) = 0; + + ObDASIterType type_; + int64_t max_size_; + ObEvalCtx *eval_ctx_; + ObExecContext *exec_ctx_; + const ObIArray *output_; + const ObExpr *group_id_expr_; + ObDASIter *child_; + ObDASIter *right_; + +private: + bool inited_; +}; + +} // namespace sql +} // namespace oceanbase + + +#endif /* OBDEV_SRC_SQL_DAS_ITER_OB_DAS_ITER_H_ */ diff --git a/src/sql/das/iter/ob_das_iter_utils.cpp b/src/sql/das/iter/ob_das_iter_utils.cpp new file mode 100644 index 0000000000..d971918192 --- /dev/null +++ b/src/sql/das/iter/ob_das_iter_utils.cpp @@ -0,0 +1,358 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_DAS +#include "sql/das/iter/ob_das_iter_utils.h" + +namespace oceanbase +{ +namespace sql +{ + +/***************** public begin *****************/ +int ObDASIterUtils::create_table_scan_iter_tree(const ObTableScanCtDef &tsc_ctdef, + ObTableScanRtDef &tsc_rtdef, + ObEvalCtx &eval_ctx, + ObExecContext &exec_ctx, + ObFixedArray &eval_infos, + const ObTableScanSpec &spec, + ObDASMergeIter *&scan_iter, + ObDASIter *&iter_tree) +{ + int ret = OB_SUCCESS; + ObDASMergeIter *iter = nullptr; + const ObDASScanCtDef *scan_ctdef = &tsc_ctdef.scan_ctdef_; + ObDASScanRtDef *scan_rtdef = &tsc_rtdef.scan_rtdef_; + common::ObIAllocator &alloc = exec_ctx.get_allocator(); + ObDASIterParam param; + param.max_size_ = eval_ctx.is_vectorized() ? eval_ctx.max_batch_size_ : 1; + param.eval_ctx_ = &eval_ctx; + param.exec_ctx_ = &exec_ctx; + param.output_ = &tsc_ctdef.get_das_output_exprs(); + param.group_id_expr_ = scan_ctdef->group_id_expr_; + param.child_ = nullptr; + param.right_ = nullptr; + if (OB_FAIL(create_das_merge_iter_help(param, + alloc, + false, + eval_infos, + spec, + iter))) { + LOG_WARN("failed to create das merge iter", K(ret)); + } else { + scan_iter = iter; + iter_tree = iter; + } + + return ret; +} + +int ObDASIterUtils::create_local_lookup_iter_tree(const ObTableScanCtDef &tsc_ctdef, + ObTableScanRtDef &tsc_rtdef, + ObEvalCtx &eval_ctx, + ObExecContext &exec_ctx, + ObFixedArray &eval_infos, + const ObTableScanSpec &spec, + ObDASMergeIter *&scan_iter, + ObDASIter *&iter_tree) +{ + int ret = OB_SUCCESS; + // Currently, the iter tree of local index lookup is the same as that of table scan, + // this is because local index lookup is executed within a single DAS task. + // TODO bingfan: unify local index lookup and global index lookup. + if (OB_FAIL(create_table_scan_iter_tree(tsc_ctdef, + tsc_rtdef, + eval_ctx, + exec_ctx, + eval_infos, + spec, + scan_iter, + iter_tree))) { + LOG_WARN("failed to create local index lookup iter tree", K(ret)); + } + + return ret; +} + +int ObDASIterUtils::create_global_lookup_iter_tree(const ObTableScanCtDef &tsc_ctdef, + ObTableScanRtDef &tsc_rtdef, + ObEvalCtx &eval_ctx, + ObExecContext &exec_ctx, + ObFixedArray &eval_infos, + const ObTableScanSpec &spec, + bool can_retry, + ObDASMergeIter *&scan_iter, + ObDASIter *&iter_tree) +{ + int ret = OB_SUCCESS; + ObDASMergeIter *index_table_iter = nullptr; + ObDASMergeIter *data_table_iter = nullptr; + ObDASGlobalLookupIter *lookup_iter = nullptr; + /********* create index table iter *********/ + const ObDASScanCtDef *scan_ctdef = &tsc_ctdef.scan_ctdef_; + ObDASScanRtDef *scan_rtdef = &tsc_rtdef.scan_rtdef_; + common::ObIAllocator &alloc = exec_ctx.get_allocator(); + ObDASIterParam param; + param.max_size_ = eval_ctx.is_vectorized() ? eval_ctx.max_batch_size_ : 1; + param.eval_ctx_ = &eval_ctx; + param.exec_ctx_ = &exec_ctx; + param.output_ = &scan_ctdef->result_output_; + param.group_id_expr_ = scan_ctdef->group_id_expr_; + param.child_ = nullptr; + param.right_ = nullptr; + if (OB_FAIL(create_das_merge_iter_help(param, + alloc, + false, + eval_infos, + spec, + index_table_iter))) { + LOG_WARN("failed to create index table iter", K(ret)); + } + /********* create data table iter *********/ + if (OB_SUCC(ret)) { + param.output_ = &tsc_ctdef.lookup_ctdef_->result_output_; + param.child_ = nullptr; + param.right_ = index_table_iter; + if (OB_FAIL(create_das_merge_iter_help(param, + alloc, + true, + eval_infos, + spec, + data_table_iter))) { + LOG_WARN("failed to create data table iter", K(ret)); + } + } + /********* create global lookup iter *********/ + if (OB_SUCC(ret)) { + index_table_iter->set_global_lookup_iter(data_table_iter); + param.child_ = data_table_iter; + param.right_ = nullptr; + if (OB_FAIL(create_das_global_lookup_iter_help(param, + alloc, + 10000, // hard code 10000 + index_table_iter, + data_table_iter, + can_retry, + tsc_ctdef, + tsc_rtdef, + lookup_iter))) { + LOG_WARN("failed to create das global lookup iter", K(ret)); + } + } + if (OB_SUCC(ret)) { + scan_iter = index_table_iter; + iter_tree = lookup_iter; + } + + return ret; +} + +int ObDASIterUtils::create_group_fold_iter(const ObTableScanCtDef &tsc_ctdef, + ObTableScanRtDef &tsc_rtdef, + ObEvalCtx &eval_ctx, + ObExecContext &exec_ctx, + ObFixedArray &eval_infos, + const ObTableScanSpec &spec, + ObDASIter *iter_tree, + ObDASGroupFoldIter *&fold_iter) +{ + int ret = OB_SUCCESS; + ObDASGroupFoldIter *iter = nullptr; + const ObDASScanCtDef *scan_ctdef = &tsc_ctdef.scan_ctdef_; + ObDASScanRtDef *scan_rtdef = &tsc_rtdef.scan_rtdef_; + common::ObIAllocator &alloc = exec_ctx.get_allocator(); + ObDASIterParam param; + param.max_size_ = eval_ctx.is_vectorized() ? eval_ctx.max_batch_size_ : 1; + param.eval_ctx_ = &eval_ctx; + param.exec_ctx_ = &exec_ctx; + param.output_ = &tsc_ctdef.get_das_output_exprs(); + param.group_id_expr_ = tsc_ctdef.scan_ctdef_.group_id_expr_; + param.child_ = iter_tree; + param.right_ = nullptr; + if (OB_SUCC(ret)) { + if (OB_FAIL(create_das_group_fold_iter_help(param, + alloc, + scan_rtdef->need_check_output_datum_, + iter_tree, + iter))) { + LOG_WARN("failed to create das group fold iter", K(ret)); + } + } + if (OB_SUCC(ret)) { + fold_iter = iter; + } + + return ret; +} + +/***************** private begin *****************/ +int ObDASIterUtils::create_das_merge_iter_help(ObDASIterParam ¶m, + common::ObIAllocator &alloc, + bool is_global_lookup, + ObFixedArray &eval_infos, + const ObTableScanSpec &spec, + ObDASMergeIter *&result) +{ + int ret = OB_SUCCESS; + void *iter_buf = nullptr; + ObDASMergeIter *iter = nullptr; + if (OB_UNLIKELY(!param.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid das iter param", K(param), K(ret)); + } else if (OB_ISNULL(iter_buf = alloc.alloc(sizeof(ObDASMergeIter)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory", K(ret)); + } else { + iter = new (iter_buf) ObDASMergeIter(); + } + + if (OB_SUCC(ret) && OB_NOT_NULL(iter)) { + ObDASMergeIterParam merge_param; + merge_param.assgin(param); + merge_param.type_ = DAS_ITER_MERGE; + merge_param.eval_infos_ = &eval_infos; + merge_param.need_update_partition_id_ = !is_global_lookup; + merge_param.pdml_partition_id_ = spec.pdml_partition_id_; + merge_param.partition_id_calc_type_ = spec.partition_id_calc_type_; + merge_param.should_scan_index_ = spec.should_scan_index(); + merge_param.ref_table_id_ = spec.ref_table_id_; + merge_param.is_vectorized_ = spec.is_vectorized(); + merge_param.frame_info_ = &spec.plan_->get_expr_frame_info(); + merge_param.execute_das_directly_ = !is_global_lookup && !spec.use_dist_das_; + merge_param.enable_rich_format_ = !is_global_lookup && spec.use_rich_format_; + + if (OB_FAIL(iter->init(merge_param))) { + LOG_WARN("failed to init das merge iter", K(ret)); + } + } + if (OB_SUCC(ret)) { + result = iter; + } else { + if (OB_NOT_NULL(iter)) { + iter->release(); + iter = nullptr; + } + if (OB_NOT_NULL(iter_buf)) { + alloc.free(iter_buf); + iter_buf = nullptr; + } + result = nullptr; + } + + return ret; +} + +int ObDASIterUtils::create_das_global_lookup_iter_help(ObDASIterParam ¶m, + common::ObIAllocator &alloc, + int64_t default_batch_row_count, + ObDASMergeIter *index_table_iter, + ObDASMergeIter *data_table_iter, + bool can_retry, + const ObTableScanCtDef &tsc_ctdef, + ObTableScanRtDef &tsc_rtdef, + ObDASGlobalLookupIter *&result) +{ + int ret = OB_SUCCESS; + void *iter_buf = nullptr; + ObDASGlobalLookupIter *iter = nullptr; + if (!param.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid das iter param", K(param), K(ret)); + } else if (OB_ISNULL(iter_buf = alloc.alloc(sizeof(ObDASGlobalLookupIter)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory", K(ret)); + } else { + iter = new (iter_buf) ObDASGlobalLookupIter(); + } + + if (OB_SUCC(ret) && OB_NOT_NULL(iter)) { + ObDASLookupIterParam lookup_param; + lookup_param.assgin(param); + lookup_param.type_ = ObDASIterType::DAS_ITER_LOOKUP; + lookup_param.default_batch_row_count_ = default_batch_row_count; + lookup_param.index_table_iter_ = index_table_iter; + lookup_param.data_table_iter_ = data_table_iter; + lookup_param.can_retry_ = can_retry; + lookup_param.calc_part_id_ = tsc_ctdef.calc_part_id_expr_; + lookup_param.lookup_ctdef_ = tsc_ctdef.lookup_ctdef_; + lookup_param.lookup_rtdef_ = tsc_rtdef.lookup_rtdef_; + lookup_param.rowkey_exprs_ = &tsc_ctdef.global_index_rowkey_exprs_; + if (OB_FAIL(iter->init(lookup_param))) { + LOG_WARN("failed to init das global lookup iter", K(ret)); + } + } + if (OB_SUCC(ret)) { + result = iter; + } else { + if (OB_NOT_NULL(iter)) { + iter->release(); + iter = nullptr; + } + if (OB_NOT_NULL(iter_buf)) { + alloc.free(iter_buf); + iter_buf = nullptr; + } + result = nullptr; + } + + return ret; +} + +int ObDASIterUtils::create_das_group_fold_iter_help(ObDASIterParam ¶m, + common::ObIAllocator &alloc, + bool need_check_output_datum, + ObDASIter *iter_tree, + ObDASGroupFoldIter *&result) +{ + int ret = OB_SUCCESS; + void *iter_buf = nullptr; + ObDASGroupFoldIter *iter = nullptr; + if (!param.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid das iter param", K(param), K(ret)); + } else if (OB_ISNULL(iter_buf = alloc.alloc(sizeof(ObDASGroupFoldIter)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory", K(ret)); + } else { + iter = new (iter_buf) ObDASGroupFoldIter(); + } + + if (OB_SUCC(ret) && OB_NOT_NULL(iter)) { + ObDASGroupFoldIterParam group_fold_param; + group_fold_param.assgin(param); + group_fold_param.type_ = ObDASIterType::DAS_ITER_GROUP_FOLD; + group_fold_param.need_check_output_datum_ = need_check_output_datum; + group_fold_param.iter_tree_ = iter_tree; + + if (OB_FAIL(iter->init(group_fold_param))) { + LOG_WARN("failed to init das group fold iter", K(ret)); + } + } + if (OB_SUCC(ret)) { + result = iter; + } else { + if (OB_NOT_NULL(iter)) { + iter->release(); + iter = nullptr; + } + if (OB_NOT_NULL(iter_buf)) { + alloc.free(iter_buf); + iter_buf = nullptr; + } + result = nullptr; + } + + return ret; +} + +} // namespace sql +} // namespace oceanbase diff --git a/src/sql/das/iter/ob_das_iter_utils.h b/src/sql/das/iter/ob_das_iter_utils.h new file mode 100644 index 0000000000..5a0bb2e2d0 --- /dev/null +++ b/src/sql/das/iter/ob_das_iter_utils.h @@ -0,0 +1,96 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OBDEV_SRC_SQL_DAS_ITER_OB_DAS_ITER_UTILS_H_ +#define OBDEV_SRC_SQL_DAS_ITER_OB_DAS_ITER_UTILS_H_ + +#include "sql/das/iter/ob_das_merge_iter.h" +#include "sql/das/iter/ob_das_lookup_iter.h" +#include "sql/das/iter/ob_das_group_fold_iter.h" +#include "sql/engine/table/ob_table_scan_op.h" + +namespace oceanbase +{ +namespace sql +{ +class ObDASIterUtils +{ +public: + static int create_table_scan_iter_tree(const ObTableScanCtDef &tsc_ctdef, + ObTableScanRtDef &tsc_rtdef, + ObEvalCtx &eval_ctx, + ObExecContext &exec_ctx, + ObFixedArray &eval_infos, + const ObTableScanSpec &spec, + ObDASMergeIter *&scan_iter, + ObDASIter *&iter_tree); + + static int create_local_lookup_iter_tree(const ObTableScanCtDef &tsc_ctdef, + ObTableScanRtDef &tsc_rtdef, + ObEvalCtx &eval_ctx, + ObExecContext &exec_ctx, + ObFixedArray &eval_infos, + const ObTableScanSpec &spec, + ObDASMergeIter *&scan_iter, + ObDASIter *&iter_tree); + + static int create_global_lookup_iter_tree(const ObTableScanCtDef &tsc_ctdef, + ObTableScanRtDef &tsc_rtdef, + ObEvalCtx &eval_ctx, + ObExecContext &exec_ctx, + ObFixedArray &eval_infos, + const ObTableScanSpec &spec, + bool can_retry, + ObDASMergeIter *&scan_iter, + ObDASIter *&iter_tree); + + static int create_group_fold_iter(const ObTableScanCtDef &tsc_ctdef, + ObTableScanRtDef &tsc_rtdef, + ObEvalCtx &eval_ctx, + ObExecContext &exec_ctx, + ObFixedArray &eval_infos, + const ObTableScanSpec &spec, + ObDASIter *iter_tree, + ObDASGroupFoldIter *&fold_iter); + +private: + static int create_das_merge_iter_help(ObDASIterParam ¶m, + common::ObIAllocator &alloc, + bool is_global_lookup, + ObFixedArray &eval_infos, + const ObTableScanSpec &spec, + ObDASMergeIter *&result); + + static int create_das_global_lookup_iter_help(ObDASIterParam ¶m, + common::ObIAllocator &alloc, + int64_t default_batch_row_count, + ObDASMergeIter *index_table_iter, + ObDASMergeIter *data_table_iter, + bool can_retry, + const ObTableScanCtDef &tsc_ctdef, + ObTableScanRtDef &tsc_rtdef, + ObDASGlobalLookupIter *&result); + + static int create_das_group_fold_iter_help(ObDASIterParam ¶m, + common::ObIAllocator &alloc, + bool need_check_output_datum, + ObDASIter *iter_tree, + ObDASGroupFoldIter *&result); + + ObDASIterUtils() = delete; + ~ObDASIterUtils() = delete; +}; + +} // namespace sql +} // namespace oceanbase + +#endif /* OBDEV_SRC_SQL_DAS_ITER_OB_DAS_ITER_UTILS_H_ */ diff --git a/src/sql/das/iter/ob_das_lookup_iter.cpp b/src/sql/das/iter/ob_das_lookup_iter.cpp new file mode 100644 index 0000000000..579d3dc30f --- /dev/null +++ b/src/sql/das/iter/ob_das_lookup_iter.cpp @@ -0,0 +1,487 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_DAS +#include "sql/das/iter/ob_das_lookup_iter.h" +#include "sql/das/iter/ob_das_merge_iter.h" + +namespace oceanbase +{ +using namespace common; +namespace sql +{ + +int ObDASLookupIter::inner_init(ObDASIterParam ¶m) +{ + int ret = OB_SUCCESS; + if (param.type_ != ObDASIterType::DAS_ITER_LOOKUP) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("specific_init with bad param type", K(param.type_)); + } else { + ObDASLookupIterParam &lookup_param = static_cast(param); + state_ = LookupState::INDEX_SCAN; + index_end_ = false; + default_batch_row_count_ = lookup_param.default_batch_row_count_; + lookup_rowkey_cnt_ = 0; + lookup_row_cnt_ = 0; + index_table_iter_ = lookup_param.index_table_iter_; + data_table_iter_ = lookup_param.data_table_iter_; + can_retry_ = lookup_param.can_retry_; + calc_part_id_ = lookup_param.calc_part_id_; + lookup_ctdef_ = lookup_param.lookup_ctdef_; + lookup_rtdef_ = lookup_param.lookup_rtdef_; + rowkey_exprs_ = lookup_param.rowkey_exprs_; + iter_alloc_ = new (iter_alloc_buf_) common::ObArenaAllocator(); + iter_alloc_->set_attr(ObMemAttr(MTL_ID(), "TableLookup")); + lookup_rtdef_->scan_allocator_.set_alloc(iter_alloc_); + lookup_rtdef_->stmt_allocator_.set_alloc(iter_alloc_); + } + return ret; +} + +int ObDASLookupIter::inner_reuse() +{ + int ret = OB_SUCCESS; + // the reuse() of index table iter will be handled in TSC. + if (OB_FAIL(data_table_iter_->reuse())) { + LOG_WARN("failed to reuse data table iter", K(ret)); + } + if (OB_NOT_NULL(iter_alloc_)) { + iter_alloc_->reset_remain_one_page(); + } + lookup_row_cnt_ = 0; + lookup_rowkey_cnt_ = 0; + index_end_ = false; + state_ = LookupState::INDEX_SCAN; + return ret; +} + +int ObDASLookupIter::inner_release() +{ + int ret = OB_SUCCESS; + if (OB_NOT_NULL(iter_alloc_)) { + iter_alloc_->reset(); + iter_alloc_->~ObArenaAllocator(); + iter_alloc_ = nullptr; + } + index_table_iter_ = nullptr; + data_table_iter_ = nullptr; + return ret; +} + +void ObDASLookupIter::reset_lookup_state() +{ + lookup_rowkey_cnt_ = 0; + lookup_row_cnt_ = 0; + if (OB_NOT_NULL(data_table_iter_)) { + data_table_iter_->reuse(); + } + if (OB_NOT_NULL(iter_alloc_)) { + iter_alloc_->reset_remain_one_page(); + } +} + +int ObDASLookupIter::inner_get_next_row() +{ + int ret = OB_SUCCESS; + bool got_next_row = false; + int64_t simulate_batch_row_cnt = - EVENT_CALL(EventTable::EN_TABLE_LOOKUP_BATCH_ROW_COUNT); + int64_t default_row_batch_cnt = simulate_batch_row_cnt > 0 ? simulate_batch_row_cnt : default_batch_row_count_; + LOG_DEBUG("simulate lookup row batch count", K(simulate_batch_row_cnt), K(default_row_batch_cnt)); + do { + switch (state_) { + case INDEX_SCAN: { + reset_lookup_state(); + while (OB_SUCC(ret) && !index_end_ && lookup_rowkey_cnt_ < default_row_batch_cnt) { + if (OB_FAIL(index_table_iter_->get_next_row())) { + if(OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to get next row from index table", K(ret)); + } else { + index_end_ = true; + ret = OB_SUCCESS; + } + } else if (OB_FAIL(add_rowkey())) { + LOG_WARN("failed to add row key", K(ret)); + } else { + ++lookup_rowkey_cnt_; + } + } + + if (OB_SUCC(ret)) { + if (OB_LIKELY(lookup_rowkey_cnt_ > 0)) { + state_ = DO_LOOKUP; + } else { + state_ = FINISHED; + } + } + break; + } + + case DO_LOOKUP: { + if (OB_FAIL(do_index_lookup())) { + LOG_WARN("failed to do index lookup", K(ret)); + } else { + state_ = OUTPUT_ROWS; + } + break; + } + + case OUTPUT_ROWS: { + if (OB_FAIL(data_table_iter_->get_next_row())) { + if (OB_LIKELY(OB_ITER_END == ret)) { + ret = OB_SUCCESS; + if (OB_FAIL(check_index_lookup())) { + LOG_WARN("failed to check table lookup", K(ret)); + } else { + state_ = INDEX_SCAN; + } + } else { + LOG_WARN("failed to get next row from data table", K(ret)); + } + } else { + got_next_row = true; + ++lookup_row_cnt_; + } + break; + } + + case FINISHED: { + ret = OB_ITER_END; + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected lookup state", K_(state)); + } + } + } while (!got_next_row && OB_SUCC(ret)); + + return ret; +} + +int ObDASLookupIter::inner_get_next_rows(int64_t &count, int64_t capacity) +{ + int ret = OB_SUCCESS; + bool get_next_rows = false; + int64_t simulate_batch_row_cnt = - EVENT_CALL(EventTable::EN_TABLE_LOOKUP_BATCH_ROW_COUNT); + int64_t default_row_batch_cnt = simulate_batch_row_cnt > 0 ? simulate_batch_row_cnt : default_batch_row_count_; + LOG_DEBUG("simulate lookup row batch count", K(simulate_batch_row_cnt), K(default_row_batch_cnt)); + do { + switch (state_) { + case INDEX_SCAN: { + reset_lookup_state(); + int64_t storage_count = 0; + int64_t index_capacity = 0; + while (OB_SUCC(ret) && !index_end_ && lookup_rowkey_cnt_ < default_row_batch_cnt) { + storage_count = 0; + index_capacity = std::min(max_size_, default_row_batch_cnt - lookup_rowkey_cnt_); + if (OB_FAIL(index_table_iter_->get_next_rows(storage_count, index_capacity))) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to get next rows from index table", K(ret)); + } else { + if (storage_count == 0) { + index_end_ = true; + } + ret = OB_SUCCESS; + } + } + if (OB_SUCC(ret) && storage_count > 0) { + if (OB_FAIL(add_rowkeys(storage_count))) { + LOG_WARN("failed to add row keys", K(ret)); + } else { + lookup_rowkey_cnt_ += storage_count; + } + } + } + + if (OB_SUCC(ret)) { + if (OB_LIKELY(lookup_rowkey_cnt_ > 0)) { + state_ = DO_LOOKUP; + } else { + state_ = FINISHED; + } + } + break; + } + + case DO_LOOKUP: { + if (OB_FAIL(do_index_lookup())) { + LOG_WARN("failed to do index lookup", K(ret)); + } else { + state_ = OUTPUT_ROWS; + } + break; + } + + case OUTPUT_ROWS: { + count = 0; + if (OB_FAIL(data_table_iter_->get_next_rows(count, capacity))) { + if (OB_LIKELY(OB_ITER_END == ret)) { + ret = OB_SUCCESS; + if (count > 0) { + lookup_row_cnt_ += count; + get_next_rows = true; + } else { + if (OB_FAIL(check_index_lookup())) { + LOG_WARN("failed to check table lookup", K(ret)); + } else { + state_ = INDEX_SCAN; + } + } + } else { + LOG_WARN("failed to get next rows from data table", K(ret)); + } + } else { + lookup_row_cnt_ += count; + get_next_rows = true; + } + break; + } + + case FINISHED: { + ret = OB_ITER_END; + break; + } + } + } while (!get_next_rows && OB_SUCC(ret)); + + return ret; +} + +int ObDASLookupIter::build_lookup_range(ObNewRange &range) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(rowkey_exprs_) || OB_ISNULL(eval_ctx_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(ret), K(rowkey_exprs_), K(eval_ctx_)); + } else { + int64_t rowkey_cnt = rowkey_exprs_->count(); + ObObj *obj_ptr = nullptr; + void *buf = nullptr; + if (OB_ISNULL(buf = iter_alloc_->alloc(sizeof(ObObj) * rowkey_cnt))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate enough memory", K(ret), K(rowkey_cnt)); + } else { + obj_ptr = new (buf) ObObj[rowkey_cnt]; + } + + for (int64_t i = 0; OB_SUCC(ret) && i < rowkey_cnt; ++i) { + ObObj tmp_obj; + ObExpr *expr = rowkey_exprs_->at(i); + ObDatum &col_datum = expr->locate_expr_datum(*eval_ctx_); + if (OB_FAIL(col_datum.to_obj(tmp_obj, expr->obj_meta_, expr->obj_datum_map_))) { + LOG_WARN("failed to convert datum to obj", K(ret)); + } else if (OB_FAIL(ob_write_obj(*iter_alloc_, tmp_obj, obj_ptr[i]))) { + LOG_WARN("failed to deep copy rowkey", K(ret), K(tmp_obj)); + } + } + + int64_t group_id = 0; + if (OB_NOT_NULL(group_id_expr_)) { + ObDatum &group_datum = group_id_expr_->locate_expr_datum(*eval_ctx_); + OB_ASSERT(T_PSEUDO_GROUP_ID == group_id_expr_->type_); + group_id = group_datum.get_int(); + } + + if (OB_SUCC(ret)) { + ObRowkey row_key(obj_ptr, rowkey_cnt); + if (OB_FAIL(range.build_range(lookup_ctdef_->ref_table_id_, row_key))) { + LOG_WARN("failed to build lookup range", K(ret), K(lookup_ctdef_->ref_table_id_), K(row_key)); + } else { + range.group_idx_ = group_id; + } + LOG_DEBUG("build lookup range", K(ret), K(row_key), K(range)); + } + } + + return ret; +} + +int ObDASLookupIter::build_trans_info_datum(const ObExpr *trans_info_expr, ObDatum *&datum_ptr) +{ + int ret = OB_SUCCESS; + datum_ptr = nullptr; + if (OB_ISNULL(trans_info_expr) || OB_ISNULL(eval_ctx_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(ret), K(trans_info_expr), K(eval_ctx_)); + } else { + void *buf = nullptr; + ObDatum &col_datum = trans_info_expr->locate_expr_datum(*eval_ctx_); + int64_t pos = sizeof(ObDatum); + int64_t len = sizeof(ObDatum) + col_datum.len_; + if (OB_ISNULL(buf = iter_alloc_->alloc(len))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate enough memory", K(ret)); + } else if (FALSE_IT(datum_ptr = new (buf) ObDatum)) { + } else if (OB_FAIL(datum_ptr->deep_copy(col_datum, static_cast(buf), len, pos))) { + LOG_WARN("failed to deep copy datum", K(ret), K(pos), K(len)); + } + } + + return ret; +} + +int ObDASGlobalLookupIter::add_rowkey() +{ + int ret = OB_SUCCESS; + ObObjectID partition_id = OB_INVALID_ID; + ObTabletID tablet_id(OB_INVALID_ID); + ObDASScanOp *das_scan_op = nullptr; + ObDASTabletLoc *tablet_loc = nullptr; + ObDASCtx *das_ctx = nullptr; + bool reuse_das_op = false; + OB_ASSERT(data_table_iter_->get_type() == DAS_ITER_MERGE); + if (OB_ISNULL(exec_ctx_) || OB_ISNULL(das_ctx = &exec_ctx_->get_das_ctx())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get das ctx", KPC_(exec_ctx)); + } else { + ObDASMergeIter *merge_iter = static_cast(data_table_iter_); + if (OB_FAIL(ObExprCalcPartitionBase::calc_part_and_tablet_id(calc_part_id_, + *eval_ctx_, + partition_id, + tablet_id))) { + LOG_WARN("failed to calc part id", K(ret), KPC(calc_part_id_)); + } else if (OB_FAIL(das_ctx->extended_tablet_loc(*lookup_rtdef_->table_loc_, + tablet_id, + tablet_loc))) { + LOG_WARN("failed to get tablet loc by tablet_id", K(ret)); + } else if (OB_FAIL(merge_iter->create_das_task(tablet_loc, das_scan_op, reuse_das_op))) { + LOG_WARN("failed to create das task", K(ret)); + } else if (!reuse_das_op) { + das_scan_op->set_scan_ctdef(lookup_ctdef_); + das_scan_op->set_scan_rtdef(lookup_rtdef_); + das_scan_op->set_can_part_retry(can_retry_); + } + } + if (OB_SUCC(ret)) { + storage::ObTableScanParam &scan_param = das_scan_op->get_scan_param(); + ObNewRange lookup_range; + if (OB_FAIL(build_lookup_range(lookup_range))) { + LOG_WARN("failed to build lookup range", K(ret)); + } else if (OB_FAIL(scan_param.key_ranges_.push_back(lookup_range))) { + LOG_WARN("failed to push back lookup range", K(ret)); + } else { + scan_param.is_get_ = true; + } + } + + const ObExpr *trans_info_expr = lookup_ctdef_->trans_info_expr_; + if (OB_SUCC(ret) && OB_NOT_NULL(trans_info_expr)) { + void *buf = nullptr; + ObDatum *datum_ptr = nullptr; + if (OB_FAIL(build_trans_info_datum(trans_info_expr, datum_ptr))) { + LOG_WARN("failed to build trans info datum", K(ret)); + } else if (OB_ISNULL(datum_ptr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(ret)); + } else if (OB_FAIL(das_scan_op->trans_info_array_.push_back(datum_ptr))) { + LOG_WARN("failed to push back trans info array", K(ret), KPC(datum_ptr)); + } + } + + return ret; +} + +int ObDASGlobalLookupIter::add_rowkeys(int64_t count) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(eval_ctx_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K_(eval_ctx)); + } else { + ObEvalCtx::BatchInfoScopeGuard batch_info_guard(*eval_ctx_); + batch_info_guard.set_batch_size(count); + for(int i = 0; OB_SUCC(ret) && i < count; i++) { + batch_info_guard.set_batch_idx(i); + if(OB_FAIL(add_rowkey())) { + LOG_WARN("failed to add rowkey", K(ret), K(i)); + } + } + } + + return ret; +} + +int ObDASGlobalLookupIter::do_index_lookup() +{ + int ret = OB_SUCCESS; + OB_ASSERT(data_table_iter_->get_type() == DAS_ITER_MERGE); + ObDASMergeIter *merge_iter = static_cast(data_table_iter_); + if (OB_FAIL(merge_iter->do_table_scan())) { + LOG_WARN("failed to do global index lookup", K(ret)); + } else if (OB_FAIL(merge_iter->set_merge_status(merge_iter->get_merge_type()))) { + LOG_WARN("failed to set merge status for das iter", K(ret)); + } + return ret; +} + +int ObDASGlobalLookupIter::check_index_lookup() +{ + int ret = OB_SUCCESS; + OB_ASSERT(data_table_iter_->get_type() == DAS_ITER_MERGE); + ObDASMergeIter *merge_iter = static_cast(data_table_iter_); + if (GCONF.enable_defensive_check() && + lookup_ctdef_->pd_expr_spec_.pushdown_filters_.empty()) { + if (OB_UNLIKELY(lookup_rowkey_cnt_ != lookup_row_cnt_)) { + ret = OB_ERR_DEFENSIVE_CHECK; + ObSQLSessionInfo *my_session = exec_ctx_->get_my_session(); + ObString func_name = ObString::make_string("check_lookup_row_cnt"); + LOG_USER_ERROR(OB_ERR_DEFENSIVE_CHECK, func_name.length(), func_name.ptr()); + LOG_ERROR("Fatal Error!!! Catch a defensive error!", + K(ret), K(lookup_rowkey_cnt_), K(lookup_row_cnt_), + "main table id", lookup_ctdef_->ref_table_id_, + KPC(my_session->get_tx_desc())); + + int64_t row_num = 0; + for (DASTaskIter task_iter = merge_iter->begin_task_iter(); !task_iter.is_end(); ++task_iter) { + ObDASScanOp *das_op = static_cast(*task_iter); + if (das_op->trans_info_array_.count() == das_op->get_scan_param().key_ranges_.count()) { + for (int64_t i = 0; i < das_op->trans_info_array_.count(); i++) { + row_num++; + ObDatum *datum = das_op->trans_info_array_.at(i); + LOG_ERROR("dump GLobalIndexBack das task lookup range and trans info", + K(row_num), KPC(datum), + K(das_op->get_scan_param().key_ranges_.at(i)), + K(das_op->get_tablet_id())); + } + } else { + for (int64_t i = 0; i < das_op->get_scan_param().key_ranges_.count(); i++) { + row_num++; + LOG_ERROR("dump GLobalIndexBack das task lookup range", + K(row_num), + K(das_op->get_scan_param().key_ranges_.at(i)), + K(das_op->get_tablet_id())); + } + } + } + } + } + + int simulate_error = EVENT_CALL(EventTable::EN_DAS_SIMULATE_DUMP_WRITE_BUFFER); + if (0 != simulate_error) { + for (DASTaskIter task_iter = merge_iter->begin_task_iter(); !task_iter.is_end(); ++task_iter) { + ObDASScanOp *das_op = static_cast(*task_iter); + for (int64_t i = 0; i < das_op->trans_info_array_.count(); i++) { + ObDatum *datum = das_op->trans_info_array_.at(i); + LOG_INFO("dump GLobalIndexBack das task trans info", K(i), + KPC(das_op->trans_info_array_.at(i)), + K(das_op->get_scan_param().key_ranges_.at(i)), + K(das_op->get_tablet_id())); + } + } + } + + return ret; +} + +} // namespace sql +} // namespace oceanbase diff --git a/src/sql/das/iter/ob_das_lookup_iter.h b/src/sql/das/iter/ob_das_lookup_iter.h new file mode 100644 index 0000000000..5e9b545418 --- /dev/null +++ b/src/sql/das/iter/ob_das_lookup_iter.h @@ -0,0 +1,128 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OBDEV_SRC_SQL_DAS_ITER_OB_DAS_LOOKUP_ITER_H_ +#define OBDEV_SRC_SQL_DAS_ITER_OB_DAS_LOOKUP_ITER_H_ + +#include "sql/das/iter/ob_das_iter.h" +#include "lib/utility/ob_tracepoint.h" +namespace oceanbase +{ +using namespace common; +namespace sql +{ + +struct ObDASLookupIterParam : public ObDASIterParam +{ + int64_t default_batch_row_count_; + bool can_retry_; + const ObExpr *calc_part_id_; + const ObDASScanCtDef *lookup_ctdef_; + ObDASScanRtDef *lookup_rtdef_; + const ExprFixedArray *rowkey_exprs_; + ObTableID ref_table_id_; + ObDASIter *index_table_iter_; + ObDASIter *data_table_iter_; + + virtual bool is_valid() const override + { + return ObDASIterParam::is_valid() && + index_table_iter_ != nullptr && data_table_iter_ != nullptr && calc_part_id_ != nullptr && + lookup_ctdef_ != nullptr && lookup_rtdef_ != nullptr && rowkey_exprs_ != nullptr; + } +}; + +class ObDASLookupIter : public ObDASIter +{ +public: + ObDASLookupIter() + : calc_part_id_(nullptr), + lookup_ctdef_(nullptr), + lookup_rtdef_(nullptr), + rowkey_exprs_(nullptr), + index_table_iter_(nullptr), + data_table_iter_(nullptr), + lookup_rowkey_cnt_(0), + lookup_row_cnt_(0), + can_retry_(false), + state_(INDEX_SCAN), + index_end_(false), + default_batch_row_count_(0), + iter_alloc_(nullptr) + {} + virtual ~ObDASLookupIter() {} + + INHERIT_TO_STRING_KV("ObDASIter", ObDASIter, K_(state), K_(index_end), K(lookup_ctdef_->ref_table_id_)); + +protected: + virtual int inner_init(ObDASIterParam ¶m) override; + virtual int inner_reuse() override; + virtual int inner_release() override; + virtual int inner_get_next_row() override; + virtual int inner_get_next_rows(int64_t &count, int64_t capacity) override; + virtual void reset_lookup_state(); + + virtual int add_rowkey() = 0; + virtual int add_rowkeys(int64_t count) = 0; + virtual int do_index_lookup() = 0; + virtual int check_index_lookup() = 0; + +protected: + const ObExpr *calc_part_id_; + const ObDASScanCtDef *lookup_ctdef_; + ObDASScanRtDef *lookup_rtdef_; + const ExprFixedArray *rowkey_exprs_; + ObDASIter *index_table_iter_; + ObDASIter *data_table_iter_; + int64_t lookup_rowkey_cnt_; + int64_t lookup_row_cnt_; + bool can_retry_; + int build_lookup_range(ObNewRange &range); + int build_trans_info_datum(const ObExpr *trans_info_expr, ObDatum *&datum_ptr); + +private: + enum LookupState : uint32_t + { + INDEX_SCAN, + DO_LOOKUP, + OUTPUT_ROWS, + FINISHED + }; + + LookupState state_; + bool index_end_; + int64_t default_batch_row_count_; + common::ObArenaAllocator *iter_alloc_; + char iter_alloc_buf_[sizeof(common::ObArenaAllocator)]; +}; + +class ObDASGlobalLookupIter : public ObDASLookupIter +{ +public: + ObDASGlobalLookupIter() + : ObDASLookupIter() + {} + virtual ~ObDASGlobalLookupIter() {} + +protected: + virtual int add_rowkey() override; + virtual int add_rowkeys(int64_t count) override; + virtual int do_index_lookup() override; + virtual int check_index_lookup() override; +}; + +} // namespace sql +} // namespace oceanbase + + + +#endif /* OBDEV_SRC_SQL_DAS_ITER_OB_DAS_LOOKUP_ITER_H_ */ diff --git a/src/sql/das/iter/ob_das_merge_iter.cpp b/src/sql/das/iter/ob_das_merge_iter.cpp new file mode 100644 index 0000000000..b7a3735e72 --- /dev/null +++ b/src/sql/das/iter/ob_das_merge_iter.cpp @@ -0,0 +1,766 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_DAS +#include "sql/das/iter/ob_das_merge_iter.h" +#include "sql/das/ob_data_access_service.h" + +namespace oceanbase +{ +using namespace common; +namespace sql +{ + +int MergeStoreRows::init(common::ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(store_rows_ = + static_cast(allocator.alloc(max_size_ * sizeof(LastDASStoreRow))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory", K_(max_size), K(ret)); + } else { + for (int64_t i = 0; i < max_size_; i++) { + new (store_rows_ + i) LastDASStoreRow(allocator); + store_rows_[i].reuse_ = true; + } + } + + return ret; +} + +int MergeStoreRows::save(bool is_vectorized, int64_t size) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(size > max_size_) || OB_ISNULL(store_rows_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error for save store rows", K(size), K_(max_size), K(store_rows_), K(ret)); + } else { + if (is_vectorized) { + ObEvalCtx::BatchInfoScopeGuard batch_info_guard(*eval_ctx_); + batch_info_guard.set_batch_size(size); + for (int64_t i = 0; OB_SUCC(ret) && i < size; i++) { + batch_info_guard.set_batch_idx(i); + if (OB_FAIL(store_rows_[i].save_store_row(*exprs_, *eval_ctx_))) { + LOG_WARN("das merge iter failed to store rows", K(ret)); + } + } + } else if (OB_FAIL(store_rows_[0].save_store_row(*exprs_, *eval_ctx_))) { + LOG_WARN("das merge iter failed to store rows", K(ret)); + } + } + if (OB_SUCC(ret)) { + cur_idx_ = 0; + saved_size_ = size; + } + return ret; +} + +int MergeStoreRows::to_expr(bool is_vectorized, int64_t size) +{ + int ret = OB_SUCCESS; + if (is_vectorized) { + if (cur_idx_ + size > saved_size_) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument, exceeds saved size", K_(cur_idx), K(size), K_(saved_size), K(ret)); + } else { + ObEvalCtx::BatchInfoScopeGuard batch_info_guard(*eval_ctx_); + batch_info_guard.set_batch_size(size); + for (int64_t i = 0; OB_SUCC(ret) && i < size; i++) { + batch_info_guard.set_batch_idx(i); + OZ(store_rows_[cur_idx_ + i].store_row_->to_expr(*exprs_, *eval_ctx_)); + } + cur_idx_ += size; + } + } else { + OZ(store_rows_[cur_idx_].store_row_->to_expr(*exprs_, *eval_ctx_)); + cur_idx_++; + } + + return ret; +} + +int64_t MergeStoreRows::cur_group_idx() +{ + OB_ASSERT(cur_idx_ < saved_size_); + return store_rows_[cur_idx_].store_row_->cells()[group_id_idx_].get_int(); +} + +void MergeStoreRows::reuse() +{ + cur_idx_ = OB_INVALID_INDEX; + saved_size_ = 0; +} + +void MergeStoreRows::reset() +{ + exprs_ = nullptr; + eval_ctx_ = nullptr; + group_id_idx_ = OB_INVALID_INDEX; + max_size_ = 1; + saved_size_ = 0; + cur_idx_ = OB_INVALID_INDEX; + if (OB_NOT_NULL(store_rows_)) { + for (int64_t i = 0; i < max_size_; i++) { + store_rows_[i].~LastDASStoreRow(); + } + store_rows_ = nullptr; + } +} + +int ObDASMergeIter::set_merge_status(MergeType merge_type) +{ + int ret = OB_SUCCESS; + merge_type_ = merge_type; + if (merge_type == MergeType::SEQUENTIAL_MERGE) { + get_next_row_ = &ObDASMergeIter::get_next_seq_row; + get_next_rows_ = &ObDASMergeIter::get_next_seq_rows; + seq_task_idx_ = 0; + DASTaskIter task_iter = das_ref_->begin_task_iter(); + if (need_update_partition_id_) { + if (OB_FAIL(update_output_tablet_id(*task_iter))) { + LOG_WARN("failed to update output tablet id", K(ret), K((*task_iter)->get_tablet_loc()->tablet_id_)); + } + } + } else { + get_next_row_ = &ObDASMergeIter::get_next_sorted_row; + get_next_rows_ = &ObDASMergeIter::get_next_sorted_rows; + need_prepare_sort_merge_info_ = true; + } + + return ret; +} + +void ObDASMergeIter::set_global_lookup_iter(ObDASMergeIter *global_lookup_iter) +{ + wild_datum_info_.global_lookup_iter_ = global_lookup_iter; +} + +common::ObIAllocator *ObDASMergeIter::get_das_alloc() +{ + common::ObIAllocator *alloc = nullptr; + if (OB_NOT_NULL(das_ref_)) { + alloc = &das_ref_->get_das_alloc(); + } + return alloc; +} + +int ObDASMergeIter::create_das_task(const ObDASTabletLoc *tablet_loc, ObDASScanOp *&scan_op, bool &reuse_op) +{ + int ret = OB_SUCCESS; + ObIDASTaskOp *task_op = nullptr; + reuse_op = false; + if (OB_ISNULL(das_ref_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr das ref", K(ret)); + } else if (OB_NOT_NULL(task_op = das_ref_->find_das_task(tablet_loc, DAS_OP_TABLE_SCAN))) { + // reuse scan op + reuse_op = true; + } else if (OB_FAIL(das_ref_->create_das_task(tablet_loc, DAS_OP_TABLE_SCAN, task_op))) { + LOG_WARN("das ref failed to create das task", K(ret)); + } + if (OB_SUCC(ret)) { + scan_op = static_cast(task_op); + } + return ret; +} + +bool ObDASMergeIter::has_task() const +{ + bool bret = false; + if (OB_NOT_NULL(das_ref_)) { + bret = das_ref_->has_task(); + } + return bret; +} + +int32_t ObDASMergeIter::get_das_task_cnt() const +{ + int32_t cnt = 0; + if (OB_NOT_NULL(das_ref_)) { + cnt = das_ref_->get_das_task_cnt(); + } + return cnt; +} + +DASTaskIter ObDASMergeIter::begin_task_iter() +{ + DASTaskIter task_iter; + if (OB_NOT_NULL(das_ref_)) { + task_iter = das_ref_->begin_task_iter(); + } + return task_iter; +} + +bool ObDASMergeIter::is_all_local_task() const +{ + bool bret = false; + if (OB_NOT_NULL(das_ref_)) { + bret = das_ref_->is_all_local_task(); + } + return bret; +} + +int ObDASMergeIter::rescan_das_task(ObDASScanOp *scan_op) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(das_ref_) || OB_ISNULL(scan_op)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(das_ref_), K(scan_op), K(ret)); + } else if (OB_FAIL(MTL(ObDataAccessService*)->rescan_das_task(*das_ref_, *scan_op))) { + LOG_WARN("failed to rescan das task", K(ret)); + } + return ret; +} + +int ObDASMergeIter::do_table_scan() +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(das_ref_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr das ref", K(das_ref_), K(ret)); + } else if (OB_FAIL(das_ref_->execute_all_task())) { + LOG_WARN("failed to execute all das task", K(ret)); + } else { + DASTaskIter task_iter = das_ref_->begin_task_iter(); + for (; OB_SUCC(ret) && !task_iter.is_end(); ++task_iter) { + ObIDASTaskOp *das_task_ptr = task_iter.get_item(); + if (OB_ISNULL(das_task_ptr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected das task nullptr", K(ret)); + } else if (OB_FAIL(das_tasks_arr_.push_back(das_task_ptr))) { + LOG_WARN("failed to push back das task ptr", K(ret)); + } + } // for end + LOG_DEBUG("[DAS ITER] do table scan", K(ref_table_id_), K(das_tasks_arr_.count())); + } + return ret; +} + +int ObDASMergeIter::inner_init(ObDASIterParam ¶m) +{ + int ret = OB_SUCCESS; + if (param.type_ != ObDASIterType::DAS_ITER_MERGE) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("inner init das iter with bad param type", K(param)); + } else { + ObDASMergeIterParam &merge_param = static_cast(param); + eval_infos_ = merge_param.eval_infos_; + need_update_partition_id_ = merge_param.need_update_partition_id_; + pdml_partition_id_ = merge_param.pdml_partition_id_; + partition_id_calc_type_ = merge_param.partition_id_calc_type_; + ref_table_id_ = merge_param.ref_table_id_; + should_scan_index_ = merge_param.should_scan_index_; + is_vectorized_ = merge_param.is_vectorized_; + iter_alloc_ = new (iter_alloc_buf_) common::ObArenaAllocator(); + iter_alloc_->set_attr(ObMemAttr(MTL_ID(), "ScanDASCtx")); + das_ref_ = new (das_ref_buf_) ObDASRef(*eval_ctx_, *exec_ctx_); + das_ref_->set_mem_attr(ObMemAttr(MTL_ID(), "ScanDASCtx")); + das_ref_->set_expr_frame_info(merge_param.frame_info_); + das_ref_->set_execute_directly(merge_param.execute_das_directly_); + das_ref_->set_enable_rich_format(merge_param.enable_rich_format_); + + if (group_id_expr_ != nullptr) { + for (int64_t i = 0; i < output_->count(); i++) { + if (output_->at(i) == group_id_expr_) { + group_id_idx_ = i; + break; + } + } + if (group_id_idx_ == OB_INVALID_INDEX) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("fail to get group id idx", K(ret), KPC_(group_id_expr), KPC_(output)); + } + } + } + return ret; +} + +int ObDASMergeIter::inner_reuse() +{ + int ret = OB_SUCCESS; + seq_task_idx_ = OB_INVALID_INDEX; + for (int64_t i = 0; i < merge_store_rows_arr_.count(); i++) { + merge_store_rows_arr_.at(i).reset(); + } + merge_store_rows_arr_.reuse(); + if (OB_NOT_NULL(iter_alloc_)) { + iter_alloc_->reset_remain_one_page(); + } + if (OB_NOT_NULL(das_ref_)) { + if (OB_FAIL(das_ref_->close_all_task())) { + LOG_WARN("das ref failed to close das task", K(ret)); + } + das_ref_->reuse(); + } + das_tasks_arr_.reuse(); + merge_state_arr_.reuse(); + return ret; +} + +int ObDASMergeIter::inner_release() +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; i < merge_store_rows_arr_.count(); i++) { + merge_store_rows_arr_.at(i).reset(); + } + merge_store_rows_arr_.reset(); + if (OB_NOT_NULL(iter_alloc_)) { + iter_alloc_->reset(); + iter_alloc_->~ObArenaAllocator(); + iter_alloc_ = nullptr; + } + if (OB_NOT_NULL(das_ref_)) { + if (OB_FAIL(das_ref_->close_all_task())) { + LOG_WARN("das ref failed to close das task", K(ret)); + } + das_ref_->reset(); + das_ref_->~ObDASRef(); + das_ref_ = nullptr; + } + das_tasks_arr_.reset(); + merge_state_arr_.reset(); + return ret; +} + +int ObDASMergeIter::inner_get_next_row() +{ + int ret = OB_SUCCESS; + clear_evaluated_flag(); + if (OB_FAIL((this->*get_next_row_)())) { + if (ret != OB_ITER_END) { + LOG_WARN("das iter failed to get next row", K(ret)); + } + } + return ret; +} + +int ObDASMergeIter::inner_get_next_rows(int64_t &count, int64_t capacity) +{ + int ret = OB_SUCCESS; + clear_evaluated_flag(); + if (OB_FAIL((this->*get_next_rows_)(count, capacity))) { + if (OB_UNLIKELY(ret != OB_ITER_END)) { + LOG_WARN("das merge iter failed to get next rows", K(ret)); + } + } + LOG_DEBUG("das merge iter get next rows end", K(count), K(merge_type_), K(merge_state_arr_), K(ret)); + const ObBitVector *skip = nullptr; + PRINT_VECTORIZED_ROWS(SQL, DEBUG, *eval_ctx_, *output_, count, skip); + return ret; +} + +void ObDASMergeIter::reset_datum_ptr(ObDASScanOp *scan_op, int64_t &capacity) +{ + ObDASCtx &das_ctx = scan_op->get_rtdef()->eval_ctx_->exec_ctx_.get_das_ctx(); + if (das_ctx.in_das_group_scan_) { + int64_t simulate_max_rowsets = - EVENT_CALL(EventTable::EN_DAS_SIMULATE_MAX_ROWSETS); + capacity = (simulate_max_rowsets > 0 && simulate_max_rowsets < capacity) ? simulate_max_rowsets : capacity; + scan_op->reset_access_datums_ptr(capacity); + } else { + reset_wild_datum_ptr(); + } +} + +void ObDASMergeIter::reset_wild_datum_ptr() +{ + if (OB_NOT_NULL(wild_datum_info_.exprs_) && wild_datum_info_.max_output_rows_ > 0) { + FOREACH_CNT(e, *wild_datum_info_.exprs_) + { + (*e)->locate_datums_for_update(*eval_ctx_, wild_datum_info_.max_output_rows_); + ObEvalInfo &info = (*e)->get_eval_info(*eval_ctx_); + info.point_to_frame_ = true; + } + wild_datum_info_.exprs_ = nullptr; + wild_datum_info_.max_output_rows_ = 0; + } + + // global index scan and its lookup maybe share some expr, + // so remote lookup task change its datum ptr, + // and also lead index scan touch the wild datum ptr + // so need to associate the result iterator of scan and lookup + // resetting the index scan result datum ptr will also reset the lookup result datum ptr + if (OB_NOT_NULL(wild_datum_info_.global_lookup_iter_)) { + wild_datum_info_.global_lookup_iter_->reset_wild_datum_ptr(); + } +} + +void ObDASMergeIter::update_wild_datum_ptr(int64_t rows_count) +{ + wild_datum_info_.exprs_ = output_; + wild_datum_info_.max_output_rows_ = std::max(wild_datum_info_.max_output_rows_, rows_count); +} + +void ObDASMergeIter::clear_evaluated_flag() +{ + if (OB_NOT_NULL(eval_infos_)) { + for (int64_t i = 0; i < eval_infos_->count(); i++) { + eval_infos_->at(i)->clear_evaluated_flag(); + } + } +} + +int ObDASMergeIter::update_output_tablet_id(ObIDASTaskOp *output_das_task) +{ + int ret = OB_SUCCESS; + if (OB_NOT_NULL(pdml_partition_id_) && OB_NOT_NULL(eval_ctx_) && OB_NOT_NULL(output_das_task)) { + const ObDASTabletLoc *tablet_loc = nullptr; + int64_t output_id = OB_INVALID_ID; + if (partition_id_calc_type_ > 0) { + tablet_loc = output_das_task->get_tablet_loc(); + } else if (should_scan_index_) { + tablet_loc = ObDASUtils::get_related_tablet_loc(*output_das_task->get_tablet_loc(), ref_table_id_); + } else { + tablet_loc = output_das_task->get_tablet_loc(); + } + + if (OB_ISNULL(tablet_loc)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr tablet loc", K(ret)); + } else { + if (partition_id_calc_type_ == 0) { + output_id = tablet_loc->tablet_id_.id(); + } else if (partition_id_calc_type_ == 1) { + output_id = tablet_loc->first_level_part_id_ != OB_INVALID_ID ? + tablet_loc->first_level_part_id_ : tablet_loc->partition_id_; + } else if (partition_id_calc_type_ == 2) { + output_id = tablet_loc->partition_id_; + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("got invalid partition id calc type", K(partition_id_calc_type_), K(ret)); + } + } + + if (OB_SUCC(ret)) { + const ObExpr *expr = pdml_partition_id_; + if (is_vectorized_) { + ObDatum *datums = expr->locate_datums_for_update(*eval_ctx_, max_size_); + for (int64_t i = 0; i < max_size_; i++) { + datums[i].set_int(output_id); + } + } else { + expr->locate_datum_for_write(*eval_ctx_).set_int(output_id); + } + expr->set_evaluated_projected(*eval_ctx_); + LOG_TRACE("find the partition id expr in pdml table scan", K(ret), K(output_id), K(expr), KPC(tablet_loc)); + } + } + return ret; +} + +int ObDASMergeIter::get_next_seq_row() +{ + int ret = OB_SUCCESS; + bool got_row = false; + if (OB_UNLIKELY(seq_task_idx_ == OB_INVALID_INDEX)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected invalid index", K(ret)); + } else if (OB_UNLIKELY(seq_task_idx_ == das_tasks_arr_.count())) { + ret = OB_ITER_END; + } else { + while (OB_SUCC(ret) && !got_row) { + clear_evaluated_flag(); + ObDASScanOp *scan_op = DAS_SCAN_OP(das_tasks_arr_.at(seq_task_idx_)); + if (OB_ISNULL(scan_op)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected das task op type", K(ret)); + } else { + if (OB_SUCC(scan_op->get_output_result_iter()->get_next_row())) { + got_row = true; + } else if (OB_ITER_END == ret) { + ++seq_task_idx_; + if (seq_task_idx_ == das_tasks_arr_.count()) { + // keep the ret = OB_ITER_END + } else { + ret = OB_SUCCESS; + scan_op = DAS_SCAN_OP(das_tasks_arr_.at(seq_task_idx_)); + if (need_update_partition_id_) { + if (OB_FAIL(update_output_tablet_id(scan_op))) { + LOG_WARN("failed to update output tablet id", K(ret), K(scan_op->get_tablet_loc()->tablet_id_)); + } + } + } + } else { + LOG_WARN("das iter failed to get next row", K(ret)); + } + } + } // while end + } + return ret; +} + +int ObDASMergeIter::get_next_seq_rows(int64_t &count, int64_t capacity) +{ + int ret = OB_SUCCESS; + bool got_rows = false; + if (OB_UNLIKELY(seq_task_idx_ == OB_INVALID_INDEX)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected invalid index", K(ret)); + } else if (OB_UNLIKELY(seq_task_idx_ == das_tasks_arr_.count())) { + ret = OB_ITER_END; + } else { + while (OB_SUCC(ret) && !got_rows) { + clear_evaluated_flag(); + ObDASScanOp *scan_op = DAS_SCAN_OP(das_tasks_arr_.at(seq_task_idx_)); + if (OB_ISNULL(scan_op)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected das task op type", K(ret)); + } else { + if (scan_op->is_local_task()) { + reset_datum_ptr(scan_op, capacity); + } + count = 0; + ret = scan_op->get_output_result_iter()->get_next_rows(count, capacity); + if (OB_ITER_END == ret && count > 0) { + ret = OB_SUCCESS; + } + if (OB_SUCC(ret)) { + got_rows = true; + if (!scan_op->is_local_task()) { + update_wild_datum_ptr(count); + } + } else if (OB_ITER_END == ret) { + ++seq_task_idx_; + if (seq_task_idx_ == das_tasks_arr_.count()) { + // keep the ret = OB_ITER_END + } else { + ret = OB_SUCCESS; + scan_op = DAS_SCAN_OP(das_tasks_arr_.at(seq_task_idx_)); + if (need_update_partition_id_) { + if (OB_FAIL(update_output_tablet_id(scan_op))) { + LOG_WARN("update output tablet id failed", K(ret), K(scan_op->get_tablet_loc()->tablet_id_)); + } + } + } + } else { + LOG_WARN("das iter failed to get next rows", K(ret)); + } + } + } // while end + } + return ret; +} + +int ObDASMergeIter::get_next_sorted_row() +{ + int ret = OB_SUCCESS; + int64_t output_idx = OB_INVALID_INDEX; + if (OB_FAIL(prepare_sort_merge_info())) { + LOG_WARN("failed to prepare sort merge info", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < das_tasks_arr_.count(); i++) { + if (!merge_state_arr_[i].das_task_iter_end_) { + if (!merge_state_arr_[i].row_store_have_data_) { + clear_evaluated_flag(); + ObDASScanOp *scan_op = DAS_SCAN_OP(das_tasks_arr_[i]); + if (OB_ISNULL(scan_op)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected das task op type", K(ret), KPC(das_tasks_arr_[i])); + } else if (OB_SUCC(scan_op->get_output_result_iter()->get_next_row())) { + if (OB_FAIL(merge_store_rows_arr_[i].save(false, 1))) { + LOG_WARN("failed to save store row", K(ret)); + } else { + merge_state_arr_[i].row_store_have_data_ = true; + compare(i, output_idx); + } + } else if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + merge_state_arr_[i].das_task_iter_end_ = true; + } else { + LOG_WARN("das iter failed to get next row", K(ret)); + } + } else { + compare(i, output_idx); + } + } + } // for end + + if (OB_SUCC(ret)) { + if (output_idx == OB_INVALID_INDEX) { + ret = OB_ITER_END; + } else { + if (need_update_partition_id_) { + if (OB_FAIL(update_output_tablet_id(das_tasks_arr_[output_idx]))) { + ObTabletID tablet_id = das_tasks_arr_[output_idx]->get_tablet_loc()->tablet_id_; + LOG_WARN("failed to update output tablet id", K(ret), K(tablet_id)); + } + } + ret = merge_store_rows_arr_[output_idx].to_expr(false, 1); + if (OB_SUCC(ret)) { + merge_state_arr_[output_idx].row_store_have_data_ = merge_store_rows_arr_[output_idx].have_data(); + } else { + LOG_WARN("failed to convert store row to expr", K(output_idx), K(ret)); + } + } + } + return ret; +} + +int ObDASMergeIter::get_next_sorted_rows(int64_t &count, int64_t capacity) +{ + int ret = OB_SUCCESS; + if (das_tasks_arr_.count() == 1) { + // only one das task, no need to compare + clear_evaluated_flag(); + ObDASScanOp *scan_op = DAS_SCAN_OP(das_tasks_arr_[0]); + if (OB_ISNULL(scan_op)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected das task op type", K(ret), KPC(scan_op)); + } else { + if (scan_op->is_local_task()) { + reset_datum_ptr(scan_op, capacity); + } + count = 0; + ret = scan_op->get_output_result_iter()->get_next_rows(count, capacity); + if (OB_ITER_END == ret && count > 0) { + ret = OB_SUCCESS; + } + if (OB_SUCC(ret)) { + if (!scan_op->is_local_task()) { + update_wild_datum_ptr(count); + } + } + } + } else { + int64_t output_idx = OB_INVALID_INDEX; + if (OB_FAIL(prepare_sort_merge_info())) { + LOG_WARN("failed to prepare sort merge info", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < das_tasks_arr_.count(); i++) { + if (!merge_state_arr_[i].das_task_iter_end_) { + if (!merge_state_arr_[i].row_store_have_data_) { + clear_evaluated_flag(); + ObDASScanOp *scan_op = DAS_SCAN_OP(das_tasks_arr_[i]); + if (OB_ISNULL(scan_op)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected das task op type", K(ret), KPC(das_tasks_arr_[i])); + } else { + if (scan_op->is_local_task()) { + reset_datum_ptr(scan_op, capacity); + } + count = 0; + ret = scan_op->get_output_result_iter()->get_next_rows(count, capacity); + if (OB_ITER_END == ret && count > 0) { + ret = OB_SUCCESS; + } + if (OB_SUCC(ret)) { + if (!scan_op->is_local_task()) { + update_wild_datum_ptr(count); + } + if (OB_FAIL(merge_store_rows_arr_[i].save(true, count))) { + LOG_WARN("failed to save store row", K(ret)); + } else { + merge_state_arr_[i].row_store_have_data_ = true; + compare(i, output_idx); + } + } else if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + merge_state_arr_[i].das_task_iter_end_ = true; + } else { + LOG_WARN("das iter failed to get next rows", K(ret)); + } + } + } else { + compare(i, output_idx); + } + } + } + + if (OB_SUCC(ret)) { + if (output_idx == OB_INVALID_INDEX) { + count = 0; + ret = OB_ITER_END; + } else { + // We need keep the datum points to the frame. + reset_wild_datum_ptr(); + if (need_update_partition_id_) { + if (OB_FAIL(update_output_tablet_id(das_tasks_arr_[output_idx]))) { + ObTabletID tablet_id = das_tasks_arr_[output_idx]->get_tablet_loc()->tablet_id_; + LOG_WARN("failed to update output tablet id", K(ret), K(tablet_id)); + } + } + ret = merge_store_rows_arr_[output_idx].to_expr(true, 1); + if (OB_SUCC(ret)) { + count = 1; + merge_state_arr_[output_idx].row_store_have_data_ = merge_store_rows_arr_[output_idx].have_data(); + } else { + LOG_WARN("failed to convert store row to expr", K(output_idx), K(ret)); + } + } + } + } + + return ret; +} + +int ObDASMergeIter::prepare_sort_merge_info() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(need_prepare_sort_merge_info_)) { + if (das_tasks_arr_.count() > 0) { + // init merge state for each das task + if (merge_state_arr_.empty()) { + if (OB_FAIL(merge_state_arr_.reserve(das_tasks_arr_.count()))) { + LOG_WARN("failed to reserve merge state array", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < das_tasks_arr_.count(); i++) { + if (OB_FAIL(merge_state_arr_.push_back(MergeState()))) { + LOG_WARN("failed to push back merge state", K(ret)); + } + } + } + } else { + for (int64_t i = 0; i < merge_state_arr_.count(); i++) { + merge_state_arr_.at(i).reuse(); + } + } + + // init store rows for each das task + if (merge_store_rows_arr_.empty()) { + if (OB_FAIL(merge_store_rows_arr_.reserve(das_tasks_arr_.count()))) { + LOG_WARN("failed to reserve merge store rows array", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < das_tasks_arr_.count(); i++) { + if (OB_FAIL(merge_store_rows_arr_.push_back( + MergeStoreRows(output_, eval_ctx_, group_id_idx_, max_size_)))) { + LOG_WARN("failed to push back merge store rows", K(ret)); + } else if (OB_FAIL(merge_store_rows_arr_.at(i).init(*iter_alloc_))) { + LOG_WARN("failed to init merge store rows", K(ret)); + } + } + } + } else { + for (int64_t i = 0; i < merge_store_rows_arr_.count(); i++) { + merge_store_rows_arr_.at(i).reuse(); + } + } + } + need_prepare_sort_merge_info_ = false; + } + return ret; +} + +void ObDASMergeIter::compare(int64_t cur_idx, int64_t &output_idx) +{ + if (OB_INVALID_INDEX == output_idx) { + output_idx = cur_idx; + } else { + // compare the values of group_idx. + int64_t output_group_idx = merge_store_rows_arr_[output_idx].cur_group_idx(); + int64_t cur_group_idx = merge_store_rows_arr_[cur_idx].cur_group_idx(); + if (output_group_idx > cur_group_idx) { + output_idx = cur_idx; + } + } +} + +}//end namespace sql +}//end namespace oceanbase diff --git a/src/sql/das/iter/ob_das_merge_iter.h b/src/sql/das/iter/ob_das_merge_iter.h new file mode 100644 index 0000000000..a74a942fc8 --- /dev/null +++ b/src/sql/das/iter/ob_das_merge_iter.h @@ -0,0 +1,225 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OBDEV_SRC_SQL_DAS_ITER_OB_DAS_MERGE_ITER_H_ +#define OBDEV_SRC_SQL_DAS_ITER_OB_DAS_MERGE_ITER_H_ +#include "sql/das/ob_das_utils.h" +#include "sql/das/iter/ob_das_iter.h" + +namespace oceanbase +{ +using namespace common; +namespace sql +{ + +class ObDASMergeIterParam : public ObDASIterParam +{ +public: + ObFixedArray *eval_infos_; + bool need_update_partition_id_; + ObExpr *pdml_partition_id_; + int64_t partition_id_calc_type_; + bool should_scan_index_; + common::ObTableID ref_table_id_; + bool is_vectorized_; + const ObExprFrameInfo *frame_info_; + bool execute_das_directly_; + bool enable_rich_format_; + + virtual bool is_valid() const override + { + return ObDASIterParam::is_valid() && eval_infos_ != nullptr && frame_info_ != nullptr; + } +}; + +class MergeStoreRows +{ +public: + MergeStoreRows() + : exprs_(nullptr), + eval_ctx_(nullptr), + group_id_idx_(OB_INVALID_INDEX), + max_size_(1), + saved_size_(0), + cur_idx_(OB_INVALID_INDEX), + store_rows_(nullptr) + {} + MergeStoreRows(const common::ObIArray *exprs, + ObEvalCtx *eval_ctx, + int64_t group_id_idx, + int64_t max_size) + : exprs_(exprs), + eval_ctx_(eval_ctx), + group_id_idx_(group_id_idx), + max_size_(max_size), + saved_size_(0), + cur_idx_(OB_INVALID_INDEX), + store_rows_(nullptr) + {} + + int init(common::ObIAllocator &allocator); + int save(bool is_vectorized, int64_t size); + int to_expr(bool is_vectorized, int64_t size); + bool have_data() const { return cur_idx_ != OB_INVALID_INDEX && cur_idx_ < saved_size_; } + int64_t cur_group_idx(); + void reuse(); + void reset(); + TO_STRING_KV(K_(saved_size), + K_(cur_idx)); + +public: + typedef ObChunkDatumStore::LastStoredRow LastDASStoreRow; + const common::ObIArray *exprs_; + ObEvalCtx *eval_ctx_; + int64_t group_id_idx_; + int64_t max_size_; + int64_t saved_size_; + int64_t cur_idx_; + LastDASStoreRow *store_rows_; +}; + +class ObDASMergeIter : public ObDASIter +{ +public: + ObDASMergeIter() + : wild_datum_info_(), + merge_type_(SEQUENTIAL_MERGE), + eval_infos_(nullptr), + need_update_partition_id_(false), + pdml_partition_id_(nullptr), + partition_id_calc_type_(0), + should_scan_index_(false), + ref_table_id_(), + is_vectorized_(false), + das_ref_(nullptr), + iter_alloc_(nullptr), + das_tasks_arr_(), + get_next_row_(nullptr), + get_next_rows_(nullptr), + seq_task_idx_(OB_INVALID_INDEX), + group_id_idx_(OB_INVALID_INDEX), + need_prepare_sort_merge_info_(false), + merge_state_arr_(), + merge_store_rows_arr_() + {} + virtual ~ObDASMergeIter() {} + + virtual int set_merge_status(MergeType merge_type) override; + MergeType get_merge_type() const { return merge_type_; } + void set_global_lookup_iter(ObDASMergeIter *global_lookup_iter); + INHERIT_TO_STRING_KV("ObDASIter", ObDASIter, K_(merge_type), K_(ref_table_id)); + + /********* DAS REF BEGIN *********/ + common::ObIAllocator *get_das_alloc(); + int create_das_task(const ObDASTabletLoc *tablet_loc, ObDASScanOp *&scan_op, bool &reuse_op); + bool has_task() const; + int32_t get_das_task_cnt() const; + DASTaskIter begin_task_iter(); + bool is_all_local_task() const; + int rescan_das_task(ObDASScanOp *scan_op); + // do_table_scan() need be called before get_next_row(s). + int do_table_scan(); + /********* DAS REF END *********/ + +protected: + virtual int inner_init(ObDASIterParam ¶m) override; + virtual int inner_reuse() override; + virtual int inner_release() override; + virtual int inner_get_next_row() override; + virtual int inner_get_next_rows(int64_t &count, int64_t capacity) override; + + void reset_datum_ptr(ObDASScanOp *scan_op, int64_t &capacity); + void reset_wild_datum_ptr(); + void update_wild_datum_ptr(int64_t rows_count); + void clear_evaluated_flag(); + int update_output_tablet_id(ObIDASTaskOp *output_das_task); + +private: + int get_next_seq_row(); + int get_next_seq_rows(int64_t &count, int64_t capacity); + int get_next_sorted_row(); + int get_next_sorted_rows(int64_t &count, int64_t capacity); + int prepare_sort_merge_info(); + void compare(int64_t cur_idx, int64_t &output_idx); + +private: + + struct WildDatumPtrInfo + { + WildDatumPtrInfo() + : exprs_(nullptr), + max_output_rows_(0), + global_lookup_iter_(nullptr) + { } + const ObExprPtrIArray *exprs_; + int64_t max_output_rows_; + // global index scan and its lookup maybe share some expr, + // so remote lookup task change its datum ptr, + // and also lead index scan touch the wild datum ptr + // so need to associate the result iterator of scan and lookup + // resetting the index scan result datum ptr will also reset the lookup result datum ptr + ObDASMergeIter *global_lookup_iter_; + }; + + WildDatumPtrInfo wild_datum_info_; + MergeType merge_type_; + ObFixedArray *eval_infos_; + bool need_update_partition_id_; + ObExpr *pdml_partition_id_; + int64_t partition_id_calc_type_; + bool should_scan_index_; + common::ObTableID ref_table_id_; + bool is_vectorized_; + ObDASRef *das_ref_; + char das_ref_buf_[sizeof(ObDASRef)]; + common::ObArenaAllocator *iter_alloc_; + char iter_alloc_buf_[sizeof(common::ObArenaAllocator)]; + typedef common::ObSEArray DasTaskArray; + DasTaskArray das_tasks_arr_; + int (ObDASMergeIter::*get_next_row_)(); + int (ObDASMergeIter::*get_next_rows_)(int64_t&, int64_t); + + /********* SEQUENTIAL MERGE BEGIN *********/ + int64_t seq_task_idx_; + /********* SEQUENTIAL MERGE END *********/ + + /********* SORT MERGE BEGIN *********/ + struct MergeState + { + bool row_store_have_data_; + bool das_task_iter_end_; + MergeState() + : row_store_have_data_(false), + das_task_iter_end_(false) + {} + void reuse() + { + row_store_have_data_ = false; + das_task_iter_end_ = false; + } + TO_STRING_KV(K_(row_store_have_data), + K_(das_task_iter_end)); + }; + + int64_t group_id_idx_; + bool need_prepare_sort_merge_info_; + typedef common::ObSEArray MergeStateArray; + typedef common::ObSEArray MergeStoreRowsArray; + MergeStateArray merge_state_arr_; + MergeStoreRowsArray merge_store_rows_arr_; + /********* SORT MERGE END *********/ +}; + +}//end namespace sql +}//end namespace oceanbase + +#endif /* OBDEV_SRC_SQL_DAS_ITER_OB_DAS_MERGE_ITER_H_ */ diff --git a/src/sql/das/ob_das_attach_define.cpp b/src/sql/das/ob_das_attach_define.cpp index 1dbc7cfdf9..4ab1cb75d1 100644 --- a/src/sql/das/ob_das_attach_define.cpp +++ b/src/sql/das/ob_das_attach_define.cpp @@ -15,17 +15,63 @@ */ #define USING_LOG_PREFIX SQL_DAS #include "sql/das/ob_das_attach_define.h" +#include "sql/das/ob_das_scan_op.h" #include "sql/das/ob_das_factory.h" -namespace oceanbase +namespace oceanbase { +namespace sql { + +OB_SERIALIZE_MEMBER(ObDASAttachCtDef, + result_output_); + +OB_SERIALIZE_MEMBER(ObDASAttachRtDef); + +OB_SERIALIZE_MEMBER((ObDASTableLookupCtDef, ObDASAttachCtDef)); + +const ObDASScanCtDef *ObDASTableLookupCtDef::get_lookup_scan_ctdef() const { -namespace sql + const ObDASScanCtDef *scan_ctdef = nullptr; + OB_ASSERT(2 == children_cnt_ && children_ != nullptr); + if (DAS_OP_TABLE_SCAN == children_[1]->op_type_) { + scan_ctdef = static_cast(children_[1]); + } + return scan_ctdef; +} + +OB_SERIALIZE_MEMBER((ObDASTableLookupRtDef, ObDASAttachRtDef)); + +ObDASScanRtDef *ObDASTableLookupRtDef::get_lookup_scan_rtdef() { + ObDASScanRtDef *scan_rtdef = nullptr; + OB_ASSERT(2 == children_cnt_ && children_ != nullptr); + if (DAS_OP_TABLE_SCAN == children_[1]->op_type_) { + scan_rtdef = static_cast(children_[1]); + } + return scan_rtdef; +} + +OB_SERIALIZE_MEMBER((ObDASSortCtDef, ObDASAttachCtDef), + sort_exprs_, + sort_collations_, + sort_cmp_funcs_, + limit_expr_, + offset_expr_, + fetch_with_ties_); + +OB_SERIALIZE_MEMBER((ObDASSortRtDef, ObDASAttachRtDef)); OB_DEF_SERIALIZE(ObDASAttachSpec) { int ret = OB_SUCCESS; - bool has_attach_ctdef = false; + bool has_attach_ctdef = attach_ctdef_ != nullptr; OB_UNIS_ENCODE(has_attach_ctdef); + if (has_attach_ctdef) { + OB_UNIS_ENCODE(attach_loc_metas_.size()); + FOREACH_X(it, attach_loc_metas_, OB_SUCC(ret)) { + const ObDASTableLocMeta *loc_meta = *it; + OB_UNIS_ENCODE(*loc_meta); + } + OZ(serialize_ctdef_tree(buf, buf_len, pos, attach_ctdef_)); + } return ret; } @@ -34,16 +80,167 @@ OB_DEF_DESERIALIZE(ObDASAttachSpec) int ret = OB_SUCCESS; bool has_attach_ctdef = false; OB_UNIS_DECODE(has_attach_ctdef); + if (OB_SUCC(ret) && has_attach_ctdef) { + int64_t list_size = 0; + OB_UNIS_DECODE(list_size); + for (int i = 0; OB_SUCC(ret) && i < list_size; ++i) { + ObDASTableLocMeta *loc_meta = OB_NEWx(ObDASTableLocMeta, &allocator_, allocator_); + if (OB_ISNULL(loc_meta)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate table location meta failed", K(ret)); + } else if (OB_FAIL(attach_loc_metas_.push_back(loc_meta))) { + LOG_WARN("store attach loc meta failed", K(ret)); + } else { + OB_UNIS_DECODE(*loc_meta); + } + } + OZ(deserialize_ctdef_tree(buf, data_len, pos, attach_ctdef_)); + } return ret; } OB_DEF_SERIALIZE_SIZE(ObDASAttachSpec) { int64_t len = 0; - bool has_attach_ctdef = false; + bool has_attach_ctdef = attach_ctdef_ != nullptr; OB_UNIS_ADD_LEN(has_attach_ctdef); + if (has_attach_ctdef) { + OB_UNIS_ADD_LEN(attach_loc_metas_.size()); + FOREACH(it, attach_loc_metas_) { + const ObDASTableLocMeta *loc_meta = *it; + OB_UNIS_ADD_LEN(*loc_meta); + } + len += get_ctdef_tree_serialize_size(attach_ctdef_); + } return len; } +int ObDASAttachSpec::serialize_ctdef_tree(char *buf, + const int64_t buf_len, + int64_t &pos, + const ObDASBaseCtDef *root) const +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(root)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("root ctdef is nullptr", K(ret)); + } else { + ObDASOpType op_type = root->op_type_; + bool has_main_ctdef = (scan_ctdef_ == root); + OB_UNIS_ENCODE(has_main_ctdef); + if (!has_main_ctdef) { + OB_UNIS_ENCODE(op_type); + OB_UNIS_ENCODE(*root); + OB_UNIS_ENCODE(root->children_cnt_); + for (int i = 0; OB_SUCC(ret) && i < root->children_cnt_; ++i) { + OZ(serialize_ctdef_tree(buf, buf_len, pos, root->children_[i])); + } + } + } + return ret; } + +int64_t ObDASAttachSpec::get_ctdef_tree_serialize_size(const ObDASBaseCtDef *root) const +{ + int64_t len = 0; + if (OB_NOT_NULL(root)) { + ObDASOpType op_type = root->op_type_; + bool has_main_ctdef = (scan_ctdef_ == root); + OB_UNIS_ADD_LEN(has_main_ctdef); + if (!has_main_ctdef) { + OB_UNIS_ADD_LEN(op_type); + OB_UNIS_ADD_LEN(*root); + OB_UNIS_ADD_LEN(root->children_cnt_); + for (int i = 0; i < root->children_cnt_; ++i) { + len += get_ctdef_tree_serialize_size(root->children_[i]); + } + } + } + return len; } + +int ObDASAttachSpec::deserialize_ctdef_tree(const char *buf, + const int64_t data_len, + int64_t &pos, + ObDASBaseCtDef *&root) +{ + int ret = OB_SUCCESS; + ObDASOpType op_type = DAS_OP_INVALID; + bool has_main_ctdef = 0; + OB_UNIS_DECODE(has_main_ctdef); + if (!has_main_ctdef) { + OB_UNIS_DECODE(op_type); + if (OB_SUCC(ret)) { + if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(op_type, allocator_, root))) { + LOG_WARN("allooc das ctde failed", K(ret), K(op_type)); + } + } + OB_UNIS_DECODE(*root); + OB_UNIS_DECODE(root->children_cnt_); + if (OB_SUCC(ret) && root->children_cnt_ > 0) { + if (OB_ISNULL(root->children_ = OB_NEW_ARRAY(ObDASBaseCtDef*, &allocator_, root->children_cnt_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc child buffer failed", K(ret), K(root->children_cnt_)); + } + } + for (int i = 0; OB_SUCC(ret) && i < root->children_cnt_; ++i) { + OZ(deserialize_ctdef_tree(buf, data_len, pos, root->children_[i])); + } + } else { + root = scan_ctdef_; + } + return ret; +} + +const ObDASTableLocMeta *ObDASAttachSpec::get_attach_loc_meta(int64_t table_location_id, + int64_t ref_table_id) const +{ + const ObDASTableLocMeta *loc_meta = nullptr; + FOREACH_X(it, attach_loc_metas_, nullptr == loc_meta) { + const ObDASTableLocMeta *tmp_loc_meta = *it; + if (tmp_loc_meta->table_loc_id_ == table_location_id && + tmp_loc_meta->ref_table_id_ == ref_table_id) { + loc_meta = tmp_loc_meta; + } + } + return loc_meta; +} + +int ObDASAttachSpec::set_calc_exprs(const ExprFixedArray &calc_exprs, const int64_t max_batch_size) +{ + int ret = OB_SUCCESS; + if (nullptr != attach_ctdef_) { + if (OB_UNLIKELY(!ObDASTaskFactory::is_attached(attach_ctdef_->op_type_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected attach op type", K(ret), K(attach_ctdef_->op_type_)); + } + OZ(set_calc_exprs_tree(static_cast(attach_ctdef_), calc_exprs, max_batch_size)); + } + return ret; +} + +int ObDASAttachSpec::set_calc_exprs_tree(ObDASAttachCtDef *root, + const ExprFixedArray &calc_exprs, + const int64_t max_batch_size) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(root)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null root attach ctdef", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < root->children_cnt_; ++i) { + ObDASBaseCtDef *child = root->children_[i]; + if (ObDASTaskFactory::is_attached(child->op_type_)) { + ObDASAttachCtDef *attach_child = static_cast(child); + OZ(set_calc_exprs_tree(attach_child, calc_exprs, max_batch_size)); + } else if (child->op_type_ == DAS_OP_TABLE_SCAN) { + if (OB_FAIL(static_cast(child)->pd_expr_spec_.set_calc_exprs(calc_exprs, max_batch_size))) { + LOG_WARN("failed to set scan calc exprs", K(ret), KPC(child)); + } + } + } + return ret; +} + +} // namespace sql +} // namespace oceanbase diff --git a/src/sql/das/ob_das_attach_define.h b/src/sql/das/ob_das_attach_define.h index 529ef3bceb..f90e918554 100644 --- a/src/sql/das/ob_das_attach_define.h +++ b/src/sql/das/ob_das_attach_define.h @@ -18,17 +18,114 @@ #include "sql/das/ob_das_define.h" #include "share/ob_define.h" #include "sql/engine/expr/ob_expr.h" +#include "sql/engine/sort/ob_sort_basic_info.h" namespace oceanbase { namespace sql { +struct ObDASScanCtDef; +struct ObDASScanRtDef; + +struct ObDASAttachCtDef : ObDASBaseCtDef +{ + OB_UNIS_VERSION(1); +public: + ExprFixedArray result_output_; +protected: + ObDASAttachCtDef(common::ObIAllocator &allocator, ObDASOpType op_type) + : ObDASBaseCtDef(op_type), + result_output_(allocator) + { + } +}; + +struct ObDASAttachRtDef : ObDASBaseRtDef +{ + OB_UNIS_VERSION(1); +protected: + ObDASAttachRtDef(ObDASOpType op_type) + : ObDASBaseRtDef(op_type) + { + } +}; + +struct ObDASTableLookupCtDef : ObDASAttachCtDef +{ + OB_UNIS_VERSION(1); +public: + ObDASTableLookupCtDef(common::ObIAllocator &alloc) + : ObDASAttachCtDef(alloc, DAS_OP_TABLE_LOOKUP), + is_global_index_(false) + { + } + const ObDASBaseCtDef *get_rowkey_scan_ctdef() const + { + OB_ASSERT(2 == children_cnt_ && children_ != nullptr); + return children_[0]; + } + const ObDASScanCtDef *get_lookup_scan_ctdef() const; + +public: + bool is_global_index_; +}; + +struct ObDASTableLookupRtDef : ObDASAttachRtDef +{ + OB_UNIS_VERSION(1); +public: + ObDASTableLookupRtDef() + : ObDASAttachRtDef(DAS_OP_TABLE_LOOKUP) + {} + + virtual ~ObDASTableLookupRtDef() {} + + ObDASBaseRtDef *get_rowkey_scan_rtdef() + { + OB_ASSERT(2 == children_cnt_ && children_ != nullptr); + return children_[0]; + } + ObDASScanRtDef *get_lookup_scan_rtdef(); +}; + +struct ObDASSortCtDef : ObDASAttachCtDef +{ + OB_UNIS_VERSION(1); +public: + ObDASSortCtDef(common::ObIAllocator &alloc) + : ObDASAttachCtDef(alloc, DAS_OP_SORT), + sort_exprs_(alloc), + sort_collations_(alloc), + sort_cmp_funcs_(alloc), + limit_expr_(nullptr), + offset_expr_(nullptr), + fetch_with_ties_(false) {} + + virtual ~ObDASSortCtDef() {} +public: + ExprFixedArray sort_exprs_; + ObSortCollations sort_collations_; + ObSortFuncs sort_cmp_funcs_; + ObExpr *limit_expr_; + ObExpr *offset_expr_; + bool fetch_with_ties_; +}; + +struct ObDASSortRtDef : ObDASAttachRtDef +{ + OB_UNIS_VERSION(1); +public: + ObDASSortRtDef() + : ObDASAttachRtDef(DAS_OP_SORT) {} + + virtual ~ObDASSortRtDef() {} +}; struct ObDASAttachSpec { OB_UNIS_VERSION(1); public: - ObDASAttachSpec(common::ObIAllocator &alloc, ObDASBaseCtDef *scan_ctdef) + ObDASAttachSpec(common::ObIAllocator &alloc, ObDASBaseCtDef *scan_ctdef) : attach_loc_metas_(alloc), scan_ctdef_(nullptr), allocator_(alloc), @@ -40,11 +137,36 @@ public: common::ObIAllocator &allocator_; ObDASBaseCtDef *attach_ctdef_; //The attach_ctdef represents the task information that is bound to and executed on the DAS Task. + const ObDASTableLocMeta *get_attach_loc_meta(int64_t table_location_id, int64_t ref_table_id) const; + int set_calc_exprs(const ExprFixedArray &calc_exprs, const int64_t max_batch_size); TO_STRING_KV(K_(attach_loc_metas), K_(attach_ctdef)); +private: + int serialize_ctdef_tree(char *buf, + const int64_t buf_len, + int64_t &pos, + const ObDASBaseCtDef *root) const; + int64_t get_ctdef_tree_serialize_size(const ObDASBaseCtDef *root) const; + int deserialize_ctdef_tree(const char *buf, + const int64_t data_len, + int64_t &pos, + ObDASBaseCtDef *&root); + int set_calc_exprs_tree(ObDASAttachCtDef *root, + const ExprFixedArray &calc_exprs, + const int64_t max_batch_size); }; -} -} - -#endif \ No newline at end of file +struct ObDASAttachRtInfo +{ + ObDASAttachRtInfo() + : pushdown_tasks_(), + attach_rtdef_(nullptr), + related_scan_cnt_(0) + { } + common::ObSEArray pushdown_tasks_; + ObDASBaseRtDef *attach_rtdef_; + int64_t related_scan_cnt_; +}; +} // namespace sql +} // namespace oceanbase +#endif /* OBDEV_SRC_SQL_DAS_OB_DAS_ATTACH_DEFINE_H_ */ diff --git a/src/sql/das/ob_das_context.cpp b/src/sql/das/ob_das_context.cpp index 1d6184703b..33d8e2b720 100644 --- a/src/sql/das/ob_das_context.cpp +++ b/src/sql/das/ob_das_context.cpp @@ -607,6 +607,27 @@ int ObDASCtx::build_related_tablet_map(const ObDASTableLocMeta &loc_meta) return ret; } +int ObDASCtx::find_group_param_by_param_idx(int64_t param_idx, + bool &exist, uint64_t &array_idx) +{ + int ret = OB_SUCCESS; + exist = false; + array_idx = OB_INVALID_ID; + if(OB_ISNULL(group_params_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("group params set by above operator is null", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < group_params_->count() && !exist; ++i) { + const GroupRescanParam &group_param = group_params_->at(i); + if (param_idx == group_param.param_idx_) { + exist = true; + array_idx = i; + } + } + } + return ret; +} + OB_DEF_SERIALIZE(ObDASCtx) { int ret = OB_SUCCESS; diff --git a/src/sql/das/ob_das_context.h b/src/sql/das/ob_das_context.h index 184601e37a..32067a612e 100644 --- a/src/sql/das/ob_das_context.h +++ b/src/sql/das/ob_das_context.h @@ -36,8 +36,28 @@ public: }; typedef common::ObList DASDelCtxList; +struct GroupRescanParam +{ +public: + GroupRescanParam() + : param_idx_(common::OB_INVALID_ID), + gr_param_(nullptr) + { } + GroupRescanParam(int64_t param_idx, ObSqlArrayObj *gr_param) + : param_idx_(param_idx), + gr_param_(gr_param) + { } + TO_STRING_KV(K_(param_idx), + KPC_(gr_param)); + int64_t param_idx_; + ObSqlArrayObj *gr_param_; //group rescan param +}; + +typedef common::ObArrayWrap GroupParamArray; class ObDASCtx { + friend class DASGroupScanMarkGuard; + friend class GroupParamBackupGuard; OB_UNIS_VERSION(1); public: ObDASCtx(common::ObIAllocator &allocator) @@ -52,8 +72,10 @@ public: savepoint_(), write_branch_id_(0), del_ctx_list_(allocator), + group_params_(nullptr), + skip_scan_group_id_(-1), + group_rescan_cnt_(-1), same_tablet_addr_(), - jump_read_group_id_(-1), flags_(0) { is_fk_cascading_ = 0; @@ -114,6 +136,9 @@ public: int build_related_tablet_loc(ObDASTabletLoc &tablet_loc); int build_related_table_loc(ObDASTableLoc &table_loc); int rebuild_tablet_loc_reference(); + const GroupParamArray* get_group_params() { return group_params_; } + int64_t get_skip_scan_group_id() const { return skip_scan_group_id_; } + int64_t get_group_rescan_cnt() const { return group_rescan_cnt_; } void clear_all_location_info() { table_locs_.clear(); @@ -133,6 +158,9 @@ public: int build_related_tablet_map(const ObDASTableLocMeta &loc_meta); const ObAddr &same_tablet_addr() const { return same_tablet_addr_; } + int find_group_param_by_param_idx(int64_t param_idx, + bool &exist, uint64_t &array_idx); + TO_STRING_KV(K_(table_locs), K_(external_table_locs), K_(is_fk_cascading), @@ -162,9 +190,11 @@ private: int16_t write_branch_id_; //@todo: save snapshot version DASDelCtxList del_ctx_list_; + const GroupParamArray *group_params_; //only allowed to be modified by GroupParamBackupGuard + int64_t skip_scan_group_id_; //only allowed to be modified by GroupParamBackupGuard + int64_t group_rescan_cnt_; //only allowed to be modified by GroupParamBackupGuard ObAddr same_tablet_addr_; public: - int64_t jump_read_group_id_; union { uint64_t flags_; struct { @@ -172,10 +202,62 @@ public: uint64_t need_check_server_ : 1; //need to check if partitions hit the same server uint64_t same_server_ : 1; //if partitions hit the same server, could be local or remote uint64_t iter_uncommitted_row_ : 1; //iter uncommitted row in fk_checker - uint64_t reserved_ : 60; + uint64_t in_das_group_scan_ : 1; //the current execution in das group scan + uint64_t reserved_ : 59; }; }; }; + +class GroupParamBackupGuard +{ +public: + GroupParamBackupGuard(ObDASCtx &ctx) + : ctx_(ctx) + { + current_group_ = ctx.skip_scan_group_id_; + group_rescan_cnt_ = ctx.group_rescan_cnt_; + group_params_ = ctx.get_group_params(); + } + + void bind_batch_rescan_params(int64_t current_group, + int64_t group_rescan_cnt, + const GroupParamArray *group_params) + { + ctx_.skip_scan_group_id_ = current_group; + ctx_.group_rescan_cnt_ = group_rescan_cnt; + ctx_.group_params_ = group_params; + } + + ~GroupParamBackupGuard() { + ctx_.skip_scan_group_id_ = current_group_; + ctx_.group_rescan_cnt_ = group_rescan_cnt_; + ctx_.group_params_ = group_params_; + } + +private: + ObDASCtx &ctx_; + int64_t current_group_; + int64_t group_rescan_cnt_; + const GroupParamArray *group_params_; +}; + +class DASGroupScanMarkGuard +{ +public: + DASGroupScanMarkGuard(ObDASCtx &das_ctx, bool in_das_group_scan) + : das_ctx_(das_ctx) + { + in_das_group_scan_ = das_ctx.in_das_group_scan_; + das_ctx.in_das_group_scan_ = in_das_group_scan; + } + ~DASGroupScanMarkGuard() + { + das_ctx_.in_das_group_scan_ = in_das_group_scan_; + } +private: + bool in_das_group_scan_; + ObDASCtx &das_ctx_; +}; } // namespace sql } // namespace oceanbase #endif /* DEV_SRC_SQL_DAS_OB_DAS_CONTEXT_H_ */ diff --git a/src/sql/das/ob_das_def_reg.h b/src/sql/das/ob_das_def_reg.h index bd4ca6c3be..7b8a34524d 100644 --- a/src/sql/das/ob_das_def_reg.h +++ b/src/sql/das/ob_das_def_reg.h @@ -26,6 +26,9 @@ struct ObDASOpTypeTraits { constexpr static bool registered_ = false; + //attached_=false means this computation is bound to other operations for execution + //and does not have its own operator. + constexpr static bool attached_ = false; typedef char DASOp; typedef char DASOpResult; typedef char DASCtDef; @@ -45,6 +48,7 @@ struct ObDASOpTraits struct ObDASOpTypeTraits \ { \ constexpr static bool registered_ = true; \ + constexpr static bool attached_ = false; \ typedef op DASOp; \ typedef op_result DASOpResult; \ typedef ctdef DASCtDef; \ @@ -100,6 +104,41 @@ class ObDASRangesCostResult; REGISTER_DAS_OP(DAS_OP_GET_RANGES_COST, ObDASRangesCostOp, ObDASRangesCostResult, ObDASEmptyCtDef, ObDASEmptyRtDef); #undef REGISTER_DAS_OP + +class ObDASEmptyOp; +class ObDASEmptyResult; +#define REGISTER_DAS_ATTACH_OP(type, ctdef, rtdef) \ + namespace das_reg { \ + template<> \ + struct ObDASOpTypeTraits \ + { \ + constexpr static bool registered_ = true; \ + constexpr static bool attached_ = true; \ + typedef ObDASEmptyOp DASOp; \ + typedef ObDASEmptyResult DASOpResult; \ + typedef ctdef DASCtDef; \ + typedef rtdef DASRtDef; \ + }; \ + template <> struct ObDASOpTraits { constexpr static int type_ = type; }; \ + } + +struct ObDASTableLookupCtDef; +struct ObDASTableLookupRtDef; +REGISTER_DAS_ATTACH_OP(DAS_OP_TABLE_LOOKUP, ObDASTableLookupCtDef, ObDASTableLookupRtDef); + +struct ObDASIRScanCtDef; +struct ObDASIRScanRtDef; +REGISTER_DAS_ATTACH_OP(DAS_OP_IR_SCAN, ObDASIRScanCtDef, ObDASIRScanRtDef); + +struct ObDASIRAuxLookupCtDef; +struct ObDASIRAuxLookupRtDef; +REGISTER_DAS_ATTACH_OP(DAS_OP_IR_AUX_LOOKUP, ObDASIRAuxLookupCtDef, ObDASIRAuxLookupRtDef); + +struct ObDASSortCtDef; +struct ObDASSortRtDef; +REGISTER_DAS_ATTACH_OP(DAS_OP_SORT, ObDASSortCtDef, ObDASSortRtDef); + +#undef REGISTER_DAS_ATTACH_OP } // namespace sql } // namespace oceanbase diff --git a/src/sql/das/ob_das_define.h b/src/sql/das/ob_das_define.h index e3719c1101..2e511a871c 100644 --- a/src/sql/das/ob_das_define.h +++ b/src/sql/das/ob_das_define.h @@ -20,8 +20,7 @@ #include "rpc/obrpc/ob_rpc_result_code.h" #define DAS_SCAN_OP(_task_op) \ - (::oceanbase::sql::DAS_OP_TABLE_SCAN != (_task_op)->get_type() && \ - ::oceanbase::sql::DAS_OP_TABLE_BATCH_SCAN != (_task_op)->get_type() ? \ + (::oceanbase::sql::DAS_OP_TABLE_SCAN != (_task_op)->get_type() ? \ nullptr : static_cast<::oceanbase::sql::ObDASScanOp*>(_task_op)) #define DAS_GROUP_SCAN_OP(_task_op) \ (::oceanbase::sql::DAS_OP_TABLE_BATCH_SCAN != (_task_op)->get_type() ? \ @@ -45,7 +44,6 @@ class ObExecContext; class ObPhysicalPlan; class ObChunkDatumStore; class ObEvalCtx; -class ObPhyTableLocation; namespace das { @@ -80,6 +78,10 @@ enum ObDASOpType DAS_OP_TABLE_BATCH_SCAN, DAS_OP_SPLIT_MULTI_RANGES, DAS_OP_GET_RANGES_COST, + DAS_OP_TABLE_LOOKUP, + DAS_OP_IR_SCAN, + DAS_OP_IR_AUX_LOOKUP, + DAS_OP_SORT, //append OpType before me DAS_OP_MAX }; @@ -338,15 +340,21 @@ struct ObDASBaseCtDef OB_UNIS_VERSION_PV(); public: ObDASOpType op_type_; + ObDASBaseCtDef **children_; + uint32_t children_cnt_; virtual ~ObDASBaseCtDef() = default; - VIRTUAL_TO_STRING_KV(K_(op_type)); + VIRTUAL_TO_STRING_KV(K_(op_type), K_(children_cnt)); + virtual bool has_expr() const { return false; } virtual bool has_pdfilter_or_calc_expr() const { return false; } virtual bool has_pl_udf() const { return false; } + protected: ObDASBaseCtDef(ObDASOpType op_type) - : op_type_(op_type) + : op_type_(op_type), + children_(nullptr), + children_cnt_(0) { } }; @@ -357,16 +365,22 @@ struct ObDASBaseRtDef OB_UNIS_VERSION_PV(); public: ObDASOpType op_type_; + const ObDASBaseCtDef *ctdef_; ObEvalCtx *eval_ctx_; //nullptr in DML DAS Op ObDASTableLoc *table_loc_; + ObDASBaseRtDef **children_; + uint32_t children_cnt_; virtual ~ObDASBaseRtDef() = default; - VIRTUAL_TO_STRING_KV(K_(op_type)); + VIRTUAL_TO_STRING_KV(K_(op_type), K_(children_cnt)); protected: ObDASBaseRtDef(ObDASOpType op_type) : op_type_(op_type), + ctdef_(nullptr), eval_ctx_(NULL), - table_loc_(nullptr) + table_loc_(nullptr), + children_(nullptr), + children_cnt_(0) { } }; typedef common::ObFixedArray DASCtDefFixedArray; diff --git a/src/sql/das/ob_das_delete_op.cpp b/src/sql/das/ob_das_delete_op.cpp index 7e9f94e1a5..f45bbeafa0 100644 --- a/src/sql/das/ob_das_delete_op.cpp +++ b/src/sql/das/ob_das_delete_op.cpp @@ -73,7 +73,9 @@ int ObDASIndexDMLAdaptor::write_rows(cons if (OB_TRY_LOCK_ROW_CONFLICT != ret) { LOG_WARN("delete rows to access service failed", K(ret)); } - } else if (!(ctdef.is_ignore_ || ctdef.table_param_.get_data_table().is_spatial_index()) + } else if (!(ctdef.is_ignore_ || + ctdef.table_param_.get_data_table().is_spatial_index() || + ctdef.table_param_.get_data_table().is_multivalue_index_aux()) && 0 == affected_rows) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected affected_rows after do delete", K(affected_rows), K(ret)); diff --git a/src/sql/das/ob_das_dml_ctx_define.cpp b/src/sql/das/ob_das_dml_ctx_define.cpp index be7c4a8999..a374496734 100644 --- a/src/sql/das/ob_das_dml_ctx_define.cpp +++ b/src/sql/das/ob_das_dml_ctx_define.cpp @@ -15,6 +15,7 @@ #include "lib/utility/ob_tracepoint.h" #include "sql/das/ob_das_dml_ctx_define.h" #include "sql/das/ob_das_utils.h" +#include "sql/das/ob_das_domain_utils.h" #include "sql/engine/dml/ob_dml_service.h" #include "sql/engine/expr/ob_expr_lob_utils.h" #include "storage/access/ob_dml_param.h" @@ -98,88 +99,21 @@ OB_SERIALIZE_MEMBER((ObDASLockCtDef, ObDASDMLBaseCtDef), ObDASDMLIterator::~ObDASDMLIterator() { - if (spat_rows_ != nullptr) { - spat_rows_->~ObSEArray(); - spat_rows_ = nullptr; + if (nullptr != domain_iter_) { + domain_iter_->~ObDomainDMLIterator(); + domain_iter_ = nullptr; } } -int ObDASDMLIterator::create_spatial_index_store() +int ObDASDMLIterator::get_next_domain_index_row(ObNewRow *&row) { int ret = OB_SUCCESS; - void *buf = allocator_.alloc(sizeof(ObSpatIndexRow)); - if (OB_ISNULL(buf)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("allocate spatial row store failed", K(ret)); - } else { - spat_rows_ = new(buf) ObSpatIndexRow(); - } - return ret; -} - -int ObDASDMLIterator::get_next_spatial_index_row(ObNewRow *&row) -{ - int ret = OB_SUCCESS; - ObDASWriteBuffer &write_buffer = get_write_buffer(); - ObSpatIndexRow *spatial_rows = get_spatial_index_rows(); - bool got_row = false; - while (OB_SUCC(ret) && !got_row) { - if (OB_ISNULL(spatial_rows) || spatial_row_idx_ >= spatial_rows->count()) { - const ObChunkDatumStore::StoredRow *sr = nullptr; - spatial_row_idx_ = 0; - if (OB_FAIL(write_iter_.get_next_row(sr))) { - if (OB_ITER_END != ret) { - LOG_WARN("get next row from result iterator failed", K(ret)); - } - } else if (OB_ISNULL(spatial_rows)) { - if (OB_FAIL(create_spatial_index_store())) { - LOG_WARN("create spatial index rows store failed", K(ret)); - } else { - spatial_rows = get_spatial_index_rows(); - } - } - if (OB_NOT_NULL(spatial_rows)) { - spatial_rows->reuse(); - } - - if(OB_SUCC(ret)) { - uint64_t geo_col_id = das_ctdef_->table_param_.get_data_table().get_spatial_geo_col_id(); - uint64_t rowkey_num = das_ctdef_->table_param_.get_data_table().get_rowkey_column_num(); - int64_t geo_idx = -1; - ObString geo_wkb; - ObObjMeta geo_meta; - bool has_old_row = !main_ctdef_->old_row_projector_.empty(); - for (uint64_t i = 0; OB_SUCC(ret) && i < main_ctdef_->column_ids_.count() && geo_idx == -1; i++) { - int64_t projector_idx = has_old_row ? main_ctdef_->old_row_projector_.at(i) : i; - if (geo_col_id == main_ctdef_->column_ids_.at(i)) { - if (projector_idx >= sr->cnt_) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("invalid index for sr", K(ret), KPC(sr), K(i), K(main_ctdef_->old_row_projector_)); - } else { - geo_idx = projector_idx; - geo_wkb = sr->cells()[projector_idx].get_string(); - geo_meta = main_ctdef_->column_types_.at(i); - } - } - } - if (OB_FAIL(ret)) { - } else if (geo_idx == -1) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("can't get geo col idx", K(ret), K(geo_col_id)); - } else if (OB_FAIL(ObTextStringHelper::read_real_string_data(&allocator_, geo_meta.get_type(), - geo_meta.get_collation_type(), geo_meta.has_lob_header(), geo_wkb))) { - LOG_WARN("fail to get real geo data", K(ret)); - } else if (OB_FAIL(ObDASUtils::generate_spatial_index_rows(allocator_, *das_ctdef_, geo_wkb, - *row_projector_, *sr, *spatial_rows))) { - LOG_WARN("generate spatial_index_rows failed", K(ret), K(geo_col_id), K(geo_wkb), K(geo_idx), KPC(sr)); - } - } - } - - if (OB_SUCC(ret) && spatial_row_idx_ < spatial_rows->count()) { - row = &(*spatial_rows)[spatial_row_idx_]; - spatial_row_idx_++; - got_row = true; + if (OB_ISNULL(domain_iter_) && OB_FAIL(ObDomainDMLIterator::create_domain_dml_iterator( + allocator_, row_projector_, write_iter_, das_ctdef_, main_ctdef_, domain_iter_))) { + LOG_WARN("fail to create domain index dml iterator", K(ret)); + } else if (OB_FAIL(domain_iter_->get_next_domain_row(row))) { + if (OB_ITER_END != ret) { + LOG_WARN("fail to get next domain_row", K(ret)); } } return ret; @@ -196,10 +130,10 @@ int ObDASDMLIterator::get_next_row(ObNewRow *&row) } } - if (OB_SUCC(ret) && das_ctdef_->table_param_.get_data_table().is_spatial_index()) { - if (OB_FAIL(get_next_spatial_index_row(row))) { + if (OB_SUCC(ret) && das_ctdef_->table_param_.get_data_table().is_domain_index()) { + if (OB_FAIL(get_next_domain_index_row(row))) { if (OB_ITER_END != ret) { - LOG_WARN("get next spatial index row failed", K(ret), K(das_ctdef_->table_param_.get_data_table())); + LOG_WARN("get next domain index row", K(ret), K(das_ctdef_->table_param_.get_data_table())); } } } else { @@ -232,12 +166,12 @@ int ObDASDMLIterator::get_next_row() int ObDASDMLIterator::get_next_rows(ObNewRow *&rows, int64_t &row_count) { int ret = OB_SUCCESS; - const bool is_spatial_index = das_ctdef_->table_param_.get_data_table().is_spatial_index(); + const bool is_domain_index = das_ctdef_->table_param_.get_data_table().is_domain_index(); row_count = 0; - if (is_spatial_index || 1 == batch_size_) { + if (is_domain_index || 1 == batch_size_) { if (OB_FAIL(get_next_row(rows))) { if (OB_ITER_END != ret) { - LOG_WARN("Failed to get next row", K(ret), K_(batch_size), K(is_spatial_index)); + LOG_WARN("Failed to get next row", K(ret), K_(batch_size), K(is_domain_index)); } } else { row_count = 1; @@ -279,6 +213,31 @@ int ObDASDMLIterator::get_next_rows(ObNewRow *&rows, int64_t &row_count) return ret; } +int ObDASDMLIterator::rewind(const ObDASDMLBaseCtDef *das_ctdef) +{ + int ret = common::OB_SUCCESS; + cur_row_ = nullptr; + cur_rows_ = nullptr; + set_ctdef(das_ctdef); + if (OB_NOT_NULL(domain_iter_)) { + if (OB_FAIL(domain_iter_->rewind())) { + LOG_WARN("fail to rewind for domain iterator", K(ret)); + } + } + return ret; +} + +void ObDASDMLIterator::set_ctdef(const ObDASDMLBaseCtDef *das_ctdef) +{ + das_ctdef_ = das_ctdef; + row_projector_ = !das_ctdef_->old_row_projector_.empty() ? + &das_ctdef_->old_row_projector_ : + &das_ctdef_->new_row_projector_; + if (OB_NOT_NULL(domain_iter_)) { + domain_iter_->set_ctdef(das_ctdef, row_projector_); + } +} + int ObDASMLogDMLIterator::get_next_row(ObNewRow *&row) { int ret = OB_SUCCESS; diff --git a/src/sql/das/ob_das_dml_ctx_define.h b/src/sql/das/ob_das_dml_ctx_define.h index 702a6fe07a..c57b47e9c2 100644 --- a/src/sql/das/ob_das_dml_ctx_define.h +++ b/src/sql/das/ob_das_dml_ctx_define.h @@ -21,6 +21,7 @@ #include "storage/tx/ob_clog_encrypt_info.h" #include "sql/engine/ob_operator.h" #include "sql/resolver/dml/ob_hint.h" +#include "storage/fts/ob_fts_plugin_helper.h" namespace oceanbase { namespace storage @@ -32,8 +33,9 @@ namespace sql typedef common::ObFixedArray ObjMetaFixedArray; typedef common::ObFixedArray AccuracyFixedArray; static const int64_t SAPTIAL_INDEX_DEFAULT_ROW_COUNT = 32; // 一个wkb生成的cellid数量(设定值) -typedef common::ObSEArray ObSpatIndexRow; +typedef common::ObSEArray ObDomainIndexRow; +class ObDomainDMLIterator; struct ObDASDMLBaseRtDef; //das dml base compile info definition struct ObDASDMLBaseCtDef : ObDASBaseCtDef @@ -467,8 +469,7 @@ public: cur_row_(nullptr), cur_rows_(nullptr), main_ctdef_(das_ctdef), - spat_rows_(nullptr), - spatial_row_idx_(0) + domain_iter_(nullptr) { set_ctdef(das_ctdef); batch_size_ = MIN(write_buffer_.get_row_cnt(), DEFAULT_BATCH_SIZE); @@ -479,27 +480,11 @@ public: virtual int get_next_rows(ObNewRow *&rows, int64_t &row_count); ObDASWriteBuffer &get_write_buffer() { return write_buffer_; } virtual void reset() override { } - int rewind(const ObDASDMLBaseCtDef *das_ctdef) - { - cur_row_ = nullptr; - cur_rows_ = nullptr; - spatial_row_idx_ = 0; - set_ctdef(das_ctdef); - return common::OB_SUCCESS; - } + int rewind(const ObDASDMLBaseCtDef *das_ctdef); private: - void set_ctdef(const ObDASDMLBaseCtDef *das_ctdef) - { - das_ctdef_ = das_ctdef; - row_projector_ = !das_ctdef_->old_row_projector_.empty() ? - &das_ctdef_->old_row_projector_ : - &das_ctdef_->new_row_projector_; - } - // spatial index - int get_next_spatial_index_row(ObNewRow *&row); - ObSpatIndexRow *get_spatial_index_rows() { return spat_rows_; } - int create_spatial_index_store(); + void set_ctdef(const ObDASDMLBaseCtDef *das_ctdef); + int get_next_domain_index_row(ObNewRow *&row); private: ObDASWriteBuffer &write_buffer_; const ObDASDMLBaseCtDef *das_ctdef_; @@ -509,8 +494,7 @@ private: common::ObNewRow *cur_row_; common::ObNewRow *cur_rows_; const ObDASDMLBaseCtDef *main_ctdef_; - ObSpatIndexRow *spat_rows_; - uint32_t spatial_row_idx_; + ObDomainDMLIterator *domain_iter_; int64_t batch_size_; }; diff --git a/src/sql/das/ob_das_domain_utils.cpp b/src/sql/das/ob_das_domain_utils.cpp new file mode 100644 index 0000000000..166ee7aebb --- /dev/null +++ b/src/sql/das/ob_das_domain_utils.cpp @@ -0,0 +1,895 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_DAS + +#include "lib/geo/ob_s2adapter.h" +#include "lib/geo/ob_geo_utils.h" +#include "lib/json_type/ob_json_bin.h" +#include "sql/das/ob_das_domain_utils.h" +#include "sql/das/ob_das_utils.h" +#include "sql/engine/expr/ob_expr_lob_utils.h" +#include "observer/omt/ob_tenant_srs.h" + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace sql +{ + +int ObDASDomainUtils::generate_spatial_index_rows( + ObIAllocator &allocator, + const ObDASDMLBaseCtDef &das_ctdef, + const ObString &wkb_str, + const IntFixedArray &row_projector, + const ObDASWriteBuffer::DmlRow &dml_row, + ObDomainIndexRow &spat_rows) +{ + int ret = OB_SUCCESS; + omt::ObSrsCacheGuard srs_guard; + const ObSrsItem *srs_item = NULL; + const ObSrsBoundsItem *srs_bound = NULL; + uint32_t srid = UINT32_MAX; + uint64_t rowkey_num = das_ctdef.table_param_.get_data_table().get_rowkey_column_num(); + lib::ObMallocHookAttrGuard malloc_guard(lib::ObMemAttr(MTL_ID(), "S2Adapter")); + + if (OB_FAIL(ObGeoTypeUtil::get_srid_from_wkb(wkb_str, srid))) { + LOG_WARN("failed to get srid", K(ret), K(wkb_str)); + } else if (srid != 0 && + OB_FAIL(OTSRS_MGR->get_tenant_srs_guard(srs_guard))) { + LOG_WARN("failed to get srs guard", K(ret), K(MTL_ID()), K(srid)); + } else if (srid != 0 && + OB_FAIL(srs_guard.get_srs_item(srid, srs_item))) { + LOG_WARN("failed to get srs item", K(ret), K(MTL_ID()), K(srid)); + } else if (((srid == 0) || !(srs_item->is_geographical_srs())) && + OB_FAIL(OTSRS_MGR->get_srs_bounds(srid, srs_item, srs_bound))) { + LOG_WARN("failed to get srs bound", K(ret), K(srid)); + } else { + ObS2Adapter s2object(&allocator, srid != 0 ? srs_item->is_geographical_srs() : false); + ObSpatialMBR spa_mbr; + ObObj *obj_arr = NULL; + ObS2Cellids cellids; + char *mbr = NULL; + int64_t mbr_len = 0; + if (OB_FAIL(s2object.init(wkb_str, srs_bound))) { + LOG_WARN("Init s2object failed", K(ret)); + } else if (OB_FAIL(s2object.get_cellids(cellids, false))) { + LOG_WARN("Get cellids from s2object failed", K(ret)); + } else if (OB_FAIL(s2object.get_mbr(spa_mbr))) { + LOG_WARN("Get mbr from s2object failed", K(ret)); + } else if (spa_mbr.is_empty()) { + if (cellids.size() == 0) { + LOG_DEBUG("it's might be empty geometry collection", K(wkb_str)); + } else { + ret = OB_ERR_GIS_INVALID_DATA; + LOG_WARN("invalid geometry", K(ret), K(wkb_str)); + } + } else if (OB_ISNULL(mbr = reinterpret_cast(allocator.alloc(OB_DEFAULT_MBR_SIZE)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory for spatial index row mbr", K(ret)); + } else if (OB_FAIL(spa_mbr.to_char(mbr, mbr_len))) { + LOG_WARN("failed transform ObSpatialMBR to string", K(ret)); + } else { + for (uint64_t i = 0; OB_SUCC(ret) && i < cellids.size(); i++) { + if (OB_ISNULL(obj_arr = reinterpret_cast(allocator.alloc(sizeof(ObObj) * rowkey_num)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory for spatial index row cells", K(ret)); + } else { + // 索引行[cellid_obj][mbr_obj][rowkey_obj] + for(uint64_t j = 0; OB_SUCC(ret) && j < rowkey_num; j++) { + obj_arr[j].set_nop_value(); + const ObObjMeta &col_type = das_ctdef.column_types_.at(j); + const ObAccuracy &col_accuracy = das_ctdef.column_accuracys_.at(j); + int64_t projector_idx = row_projector.at(j); + if (OB_FAIL(dml_row.cells()[projector_idx].to_obj(obj_arr[j], col_type))) { + LOG_WARN("stored row to new row obj failed", K(ret), + K(dml_row.cells()[projector_idx]), K(col_type), K(projector_idx), K(j)); + } else if (OB_FAIL(ObDASUtils::reshape_storage_value(col_type, col_accuracy, allocator, obj_arr[j]))) { + LOG_WARN("reshape storage value failed", K(ret), K(col_type), K(projector_idx), K(j)); + } + } + if (OB_SUCC(ret)) { + int64_t cellid_col_idx = 0; + int64_t mbr_col_idx = 1; + obj_arr[cellid_col_idx].set_uint64(cellids.at(i)); + ObString mbr_val(mbr_len, mbr); + obj_arr[mbr_col_idx].set_varchar(mbr_val); + obj_arr[mbr_col_idx].set_collation_type(CS_TYPE_BINARY); + obj_arr[mbr_col_idx].set_collation_level(CS_LEVEL_IMPLICIT); + ObNewRow row; + row.cells_ = obj_arr; + row.count_ = rowkey_num; + if (OB_FAIL(spat_rows.push_back(row))) { + LOG_WARN("failed to push back spatial index row", K(ret), K(row)); + } + } + } + } + } + } + + return ret; +} + +/*static*/ int ObDASDomainUtils::generate_fulltext_word_rows( + common::ObIAllocator &allocator, + storage::ObFTParseHelper *helper, + const common::ObObjMeta &ft_obj_meta, + const ObString &doc_id, + const ObString &fulltext, + const bool is_fts_index_aux, + ObDomainIndexRow &word_rows) +{ + int ret = OB_SUCCESS; + static int64_t FT_WORD_DOC_COL_CNT = 4; + const int64_t ft_word_bkt_cnt = MAX(fulltext.length() / 10, 2); + int64_t doc_length = 0; + ObFTWordMap ft_word_map; + ObObj *obj_arr = nullptr; + if (OB_ISNULL(helper) + || OB_UNLIKELY(!ft_obj_meta.is_valid()) + || OB_UNLIKELY(doc_id.length() != sizeof(ObDocId) || doc_id.empty()) ) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KPC(helper), K(ft_obj_meta), K(doc_id)); + } else if (0 == fulltext.length()) { + ret = OB_ITER_END; + } else if (OB_FAIL(ft_word_map.create(ft_word_bkt_cnt, common::ObMemAttr(MTL_ID(), "FTWordMap")))) { + LOG_WARN("fail to create ft word map", K(ret), K(ft_word_bkt_cnt)); + } else if (OB_FAIL(segment_and_calc_word_count(allocator, helper, ft_obj_meta.get_collation_type(), + fulltext, doc_length, ft_word_map))) { + LOG_WARN("fail to segment and calculate word count", K(ret), KPC(helper), + K(ft_obj_meta.get_collation_type()), K(fulltext)); + } else if (0 == ft_word_map.size()) { + ret = OB_ITER_END; + } else { + const int64_t obj_cnt = ft_word_map.size() * FT_WORD_DOC_COL_CNT; + const int64_t obj_arr_size = sizeof(ObObj) * obj_cnt; + if (OB_ISNULL(obj_arr = reinterpret_cast(allocator.alloc(obj_arr_size)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate obj array", K(ret), K(obj_arr_size)); + } else { + for (int64_t i = 0; i < obj_cnt; ++i) { + new (obj_arr + i) ObObj(); + } + } + int64_t i = 0; + for (ObFTWordMap::const_iterator iter = ft_word_map.begin(); + OB_SUCC(ret) && iter != ft_word_map.end(); + ++iter) { + const ObFTWord &ft_word = iter->first; + const int64_t word_cnt = iter->second; + // index row format + // - FTS_INDEX: [WORD], [DOC_ID], [WORD_COUNT] + // - FTS_DOC_WORD: [DOC_ID], [WORD], [WORD_COUNT] + const int64_t word_idx = is_fts_index_aux ? 0 : 1; + const int64_t doc_id_idx = is_fts_index_aux ? 1 : 0; + const int64_t word_cnt_idx = 2; + const int64_t doc_len_idx = 3; + obj_arr[i * FT_WORD_DOC_COL_CNT + word_idx].set_varchar(ft_word.get_word()); + obj_arr[i * FT_WORD_DOC_COL_CNT + word_idx].set_meta_type(ft_obj_meta); + obj_arr[i * FT_WORD_DOC_COL_CNT + doc_id_idx].set_varbinary(doc_id); + obj_arr[i * FT_WORD_DOC_COL_CNT + word_cnt_idx].set_uint64(word_cnt); + obj_arr[i * FT_WORD_DOC_COL_CNT + doc_len_idx].set_uint64(doc_length); + ObNewRow row; + row.cells_ = &(obj_arr[i * FT_WORD_DOC_COL_CNT]); + row.count_ = FT_WORD_DOC_COL_CNT; + if (OB_FAIL(word_rows.push_back(row))) { + LOG_WARN("fail to push back row", K(ret), K(row)); + } else { + ObDocId tmp_doc_id; + tmp_doc_id.tablet_id_ = ((const uint64_t *)doc_id.ptr())[0]; + tmp_doc_id.seq_id_ = ((const uint64_t *)doc_id.ptr())[1]; + STORAGE_FTS_LOG(DEBUG, "succeed to add word row", K(ret), K(is_fts_index_aux), "doc_id", tmp_doc_id, + K(ft_word), K(word_cnt), K(i), K(row)); + ++i; + } + } + } + return ret; +} + +/*static*/ int ObDASDomainUtils::segment_and_calc_word_count( + common::ObIAllocator &allocator, + storage::ObFTParseHelper *helper, + const common::ObCollationType &type, + const ObString &fulltext, + int64_t &doc_length, + ObFTWordMap &words_count) +{ + int ret = OB_SUCCESS; + common::ObSEArray words; + if (OB_ISNULL(helper) + || OB_UNLIKELY(ObCollationType::CS_TYPE_INVALID == type + || ObCollationType::CS_TYPE_EXTENDED_MARK < type) + || OB_UNLIKELY(!words_count.created())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KPC(helper), K(type), K(words_count.created())); + } else if (OB_FAIL(helper->segment(type, fulltext.ptr(), fulltext.length(), doc_length, words))) { + LOG_WARN("fail to segment", K(ret), KPC(helper), K(type), K(fulltext)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < words.count(); ++i) { + const ObFTWord &ft_word = words.at(i); + int64_t word_count = 0; + if (OB_FAIL(words_count.get_refactored(ft_word, word_count)) && OB_HASH_NOT_EXIST != ret) { + LOG_WARN("fail to get ft word", K(ret), K(ft_word)); + } else { + word_count = OB_HASH_NOT_EXIST == ret ? 1 : ++word_count; + if (OB_FAIL(words_count.set_refactored(ft_word, word_count, 1/*overwrite*/))) { + LOG_WARN("fail to set ft word and count", K(ret), K(ft_word)); + } + } + } + } + STORAGE_FTS_LOG(DEBUG, "segment and calc word count", K(ret), K(words), K(type)); + return ret; +} + +int ObDASDomainUtils::get_pure_mutivalue_data(const ObString &json_str, const char*& data, int64_t& data_len, uint32_t& record_num) +{ + int ret = OB_SUCCESS; + + ObJsonBin bin(json_str.ptr(), json_str.length()); + + if (OB_FAIL(bin.reset_iter())) { + LOG_WARN("failed to parse binary.", K(ret), K(json_str)); + } else if (!ObJsonVerType::is_opaque_or_string(bin.json_type())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to parse binary.", K(ret), K(json_str)); + } else { + data = bin.get_data(); + data_len = bin.get_data_length(); + + record_num = *reinterpret_cast(data); + } + + return ret; +} + +int ObDASDomainUtils::calc_save_rowkey_policy( + ObIAllocator &allocator, + const ObDASDMLBaseCtDef &das_ctdef, + const IntFixedArray &row_projector, + const ObDASWriteBuffer::DmlRow &dml_row, + const int64_t record_cnt, + bool& is_save_rowkey) +{ + int ret = OB_SUCCESS; + is_save_rowkey = true; + + uint64_t column_num = das_ctdef.column_ids_.count(); + const int64_t data_table_rowkey_cnt = das_ctdef.table_param_.get_data_table().get_data_table_rowkey_column_num(); + const uint64_t multivalue_arr_col_id = das_ctdef.table_param_.get_data_table().get_multivalue_array_col_id(); + + uint64_t mulvalue_column_start = 0; + + // -1 : doc id column + uint64_t mulvalue_column_end = column_num - 1 - data_table_rowkey_cnt; + + if (mulvalue_column_end <= mulvalue_column_start) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("failed to calc save rowkey policy.", K(ret), K(mulvalue_column_end), K(mulvalue_column_start)); + } else { + + ObObj *obj_arr = nullptr; + if (OB_ISNULL(obj_arr = reinterpret_cast(allocator.alloc(sizeof(ObObj) * column_num)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory for multivalue index row cells", K(ret)); + } + + uint32_t pure_data_size = 0; + uint64_t rowkey_column_start = mulvalue_column_end; + // -1 : doc id column + uint64_t rowkey_column_end = column_num - 1; + + for(uint64_t j = rowkey_column_start; OB_SUCC(ret) && j < rowkey_column_end; j++) { + obj_arr[j].set_nop_value(); + const ObObjMeta &col_type = das_ctdef.column_types_.at(j); + const ObAccuracy &col_accuracy = das_ctdef.column_accuracys_.at(j); + int64_t projector_idx = row_projector.at(j); + if (das_ctdef.column_ids_.at(j) == multivalue_arr_col_id) { + // do nothing + } else if (OB_FAIL(dml_row.cells()[projector_idx].to_obj(obj_arr[j], col_type))) { + LOG_WARN("stored row to new row obj failed", K(ret), + K(dml_row.cells()[projector_idx]), K(col_type), K(projector_idx), K(j)); + } else { + pure_data_size += obj_arr[j].get_serialize_size(); + } + } + + if (OB_SUCC(ret)) { + if (record_cnt < MVI_FULL_ROWKEY_THRESHOLD) { + is_save_rowkey = true; + } else if (pure_data_size > MVI_ROWKEY_SIZE_THRESHOLD) { + is_save_rowkey = false; + } else { + is_save_rowkey = true; + } + } + } + + + return ret; +} + +int ObDASDomainUtils::generate_multivalue_index_rows(ObIAllocator &allocator, + const ObDASDMLBaseCtDef &das_ctdef, + int64_t multivalue_idx, + int64_t multivalue_arr_idx, + const ObString &json_str, + const IntFixedArray &row_projector, + const ObDASWriteBuffer::DmlRow &dml_row, + ObDomainIndexRow &mvi_rows) +{ + int ret = OB_SUCCESS; + bool is_save_rowkey = true; + + const int64_t data_table_rowkey_cnt = das_ctdef.table_param_.get_data_table().get_data_table_rowkey_column_num(); + const char* data = nullptr; + int64_t data_len = 0; + uint32_t record_num = 0; + + if (OB_FAIL(get_pure_mutivalue_data(json_str, data, data_len, record_num))) { + LOG_WARN("failed to parse binary.", K(ret), K(json_str)); + } else if (record_num == 0) { + ret = OB_ITER_END; + } else if (OB_FAIL(calc_save_rowkey_policy(allocator, das_ctdef, row_projector, + dml_row, record_num, is_save_rowkey))) { + LOG_WARN("failed to calc store policy.", K(ret), K(data_table_rowkey_cnt)); + } else { + + uint64_t column_num = das_ctdef.column_ids_.count(); + // -1 : doc id column + uint64_t rowkey_column_start = column_num - 1 - data_table_rowkey_cnt; + uint64_t rowkey_column_end = column_num - 1; + + ObObj *obj_arr = nullptr; + int64_t pos = sizeof(uint32_t); + + for (int i = 0; OB_SUCC(ret) && i < record_num ; ++i) { + if (OB_ISNULL(obj_arr = reinterpret_cast(allocator.alloc(sizeof(ObObj) * column_num)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory for multivalue index row cells", K(ret)); + } + for(uint64_t j = 0; OB_SUCC(ret) && j < column_num; j++) { + obj_arr[j].set_nop_value(); + const ObObjMeta &col_type = das_ctdef.column_types_.at(j); + const ObAccuracy &col_accuracy = das_ctdef.column_accuracys_.at(j); + int64_t projector_idx = row_projector.at(j); + if (multivalue_idx == projector_idx) { + if (OB_FAIL(obj_arr[j].deserialize(data, data_len, pos))) { + LOG_WARN("failed to deserialize datum.", K(ret), K(json_str)); + } else { + obj_arr[j].set_collation_level(col_type.get_collation_level()); + obj_arr[j].set_collation_type(col_type.get_collation_type()); + obj_arr[j].set_type(col_type.get_type()); + } + } else if (!is_save_rowkey && (rowkey_column_start >= j && j < rowkey_column_end)) { + obj_arr[j].set_null(); + } else if (multivalue_arr_idx == projector_idx) { + obj_arr[j].set_null(); + } else if (OB_FAIL(dml_row.cells()[projector_idx].to_obj(obj_arr[j], col_type))) { + LOG_WARN("stored row to new row obj failed", K(ret), + K(dml_row.cells()[projector_idx]), K(col_type), K(projector_idx), K(j)); + } + + if (OB_SUCC(ret) && OB_FAIL(ObDASUtils::reshape_storage_value(col_type, col_accuracy, allocator, obj_arr[j]))) { + LOG_WARN("reshape storage value failed", K(ret), K(col_type), K(projector_idx), K(j)); + } + } + + if (OB_SUCC(ret)) { + ObNewRow row; + row.cells_ = obj_arr; + row.count_ = column_num; + if (OB_FAIL(mvi_rows.push_back(row))) { + LOG_WARN("failed to push back spatial index row", K(ret), K(row)); + } + } // end if (OB_SUCC(ret)) + } + } + + return ret; +} + +/*static*/ int ObDomainDMLIterator::create_domain_dml_iterator( + common::ObIAllocator &allocator, + const IntFixedArray *row_projector, + ObDASWriteBuffer::Iterator &write_iter, + const ObDASDMLBaseCtDef *das_ctdef, + const ObDASDMLBaseCtDef *main_ctdef, + ObDomainDMLIterator *&domain_iter) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(row_projector) || OB_ISNULL(das_ctdef)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KP(row_projector), KP(das_ctdef)); + } else if (das_ctdef->table_param_.get_data_table().is_spatial_index()) { + void *buf = nullptr; + if (OB_ISNULL(buf = allocator.alloc(sizeof(ObSpatialDMLIterator)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate spatial dml iterator memory", K(ret), KP(buf)); + } else { + domain_iter = new (buf) ObSpatialDMLIterator(allocator, row_projector, write_iter, das_ctdef, main_ctdef); + } + } else if (das_ctdef->table_param_.get_data_table().is_fts_index()) { + void *buf = nullptr; + if (OB_ISNULL(buf = allocator.alloc(sizeof(ObFTDMLIterator)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate fulltext dml iterator memory", K(ret), KP(buf)); + } else { + ObFTDMLIterator *iter = new (buf) ObFTDMLIterator(allocator, row_projector, write_iter, das_ctdef, main_ctdef); + if (OB_FAIL(iter->init(das_ctdef->table_param_.get_data_table().get_fts_parser_name()))) { + LOG_WARN("fail to init fulltext dml iterator", K(ret), KPC(iter)); + } else { + domain_iter = static_cast(iter); + } + } + } else if (das_ctdef->table_param_.get_data_table().is_multivalue_index()) { + void *buf = nullptr; + if (OB_ISNULL(buf = allocator.alloc(sizeof(ObMultivalueDMLIterator)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate fulltext dml iterator memory", K(ret), KP(buf)); + } else { + ObMultivalueDMLIterator *iter = new (buf) ObMultivalueDMLIterator(allocator, row_projector, write_iter, das_ctdef, main_ctdef); + domain_iter = static_cast(iter); + } + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supported domain index type", K(ret), K(das_ctdef->table_param_.get_data_table())); + } + return ret; +} + +ObDomainDMLIterator::ObDomainDMLIterator( + common::ObIAllocator &allocator, + const IntFixedArray *row_projector, + ObDASWriteBuffer::Iterator &write_iter, + const ObDASDMLBaseCtDef *das_ctdef, + const ObDASDMLBaseCtDef *main_ctdef) + : row_idx_(0), + rows_(), + row_projector_(row_projector), + write_iter_(write_iter), + das_ctdef_(das_ctdef), + main_ctdef_(main_ctdef), + allocator_(allocator), + is_update_(nullptr == main_ctdef) +{ +} + +ObDomainDMLIterator::~ObDomainDMLIterator() +{ + reset(); +} + +void ObDomainDMLIterator::reset() +{ + row_idx_ = 0; + rows_.reset(); + row_projector_ = nullptr; + das_ctdef_ = nullptr; + main_ctdef_ = nullptr; +} + +void ObDomainDMLIterator::set_ctdef( + const ObDASDMLBaseCtDef *das_ctdef, + const IntFixedArray *row_projector) +{ + row_idx_ = 0; + das_ctdef_ = das_ctdef; + row_projector_ = row_projector; +} + +int ObDomainDMLIterator::get_next_domain_row(ObNewRow *&row) +{ + int ret = OB_SUCCESS; + const ObChunkDatumStore::StoredRow *sr = nullptr; + bool got_row = false; + while (OB_SUCC(ret) && !got_row) { + if (row_idx_ >= rows_.count()) { + rows_.reuse(); + row_idx_ = 0; + if (OB_UNLIKELY(!das_ctdef_->table_param_.get_data_table().is_domain_index())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, not domain index", K(ret), K(das_ctdef_->table_param_.get_data_table())); + } else if (FAILEDx(write_iter_.get_next_row(sr))) { + if (OB_ITER_END != ret) { + LOG_WARN("get next row from result iterator failed", K(ret)); + } + } else if (OB_FAIL(generate_domain_rows(sr))) { + if (ret != OB_ITER_END) { + LOG_WARN("fail to generate domain index row", K(ret), KPC(sr)); + } + } + } + if (OB_SUCC(ret) && row_idx_ < rows_.count()) { + row = &(rows_[row_idx_]); + ++row_idx_; + got_row = true; + } + } + LOG_DEBUG("get next domain row", K(ret), K(got_row), K(row_idx_), K(rows_), KPC(row), KPC(sr)); + return ret; +} + +int ObSpatialDMLIterator::generate_domain_rows(const ObChunkDatumStore::StoredRow *store_row) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(store_row)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(store_row)); + } else if (OB_UNLIKELY(!das_ctdef_->table_param_.get_data_table().is_spatial_index())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, it isn't spatial index", K(ret), K(das_ctdef_->table_param_.get_data_table())); + } else { + const uint64_t geo_col_id = das_ctdef_->table_param_.get_data_table().get_spatial_geo_col_id(); + int64_t geo_idx = -1; + ObString geo_wkb; + ObObjMeta geo_meta; + if (!is_update_ && OB_FAIL(get_geo_wkb(store_row, geo_idx, geo_wkb, geo_meta))) { + LOG_WARN("fail to get geo wkb", K(ret), KPC(store_row)); + } else if (is_update_ && OB_FAIL(get_geo_wkb_for_update(store_row, geo_idx, geo_wkb, geo_meta))) { + LOG_WARN("fail to get geo wkb for update", K(ret), KPC(store_row)); + } else if (OB_UNLIKELY(geo_idx == -1)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("can't get geo col idx", K(ret), K(is_update_), K(geo_idx), K(geo_col_id)); + } else if (OB_FAIL(ObTextStringHelper::read_real_string_data(&allocator_, geo_meta.get_type(), + geo_meta.get_collation_type(), is_update_ ? true : geo_meta.has_lob_header(), geo_wkb))) { + LOG_WARN("fail to get real geo data", K(ret)); + } else if (OB_FAIL(ObDASDomainUtils::generate_spatial_index_rows(allocator_, *das_ctdef_, geo_wkb, + *row_projector_, *store_row, rows_))) { + LOG_WARN("generate spatial_index_rows failed", K(ret), K(geo_col_id), K(geo_wkb), K(geo_idx), + KPC(store_row), K(geo_meta)); + } + } + LOG_DEBUG("generate domain rows", K(ret), K(rows_), KPC(store_row)); + return ret; +} + +int ObSpatialDMLIterator::get_geo_wkb( + const ObChunkDatumStore::StoredRow *store_row, + int64_t &geo_idx, + ObString &geo_wkb, + ObObjMeta &geo_meta) const +{ + int ret = OB_SUCCESS; + const uint64_t geo_col_id = das_ctdef_->table_param_.get_data_table().get_spatial_geo_col_id(); + const bool has_old_row = !main_ctdef_->old_row_projector_.empty(); + geo_idx = -1; + for (int64_t i = 0; OB_SUCC(ret) && i < main_ctdef_->column_ids_.count() && -1 == geo_idx; ++i) { + const int64_t projector_idx = has_old_row ? main_ctdef_->old_row_projector_.at(i) : i; + if (geo_col_id == main_ctdef_->column_ids_.at(i)) { + if (projector_idx >= store_row->cnt_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid index for sr", K(ret), KPC(store_row), K(i), K(main_ctdef_->old_row_projector_)); + } else { + geo_idx = projector_idx; + geo_wkb = store_row->cells()[projector_idx].get_string(); + geo_meta = main_ctdef_->column_types_.at(i); + } + } + } + return ret; +} + +int ObSpatialDMLIterator::get_geo_wkb_for_update( + const ObChunkDatumStore::StoredRow *store_row, + int64_t &geo_idx, + ObString &geo_wkb, + ObObjMeta &geo_meta) const +{ + int ret = OB_SUCCESS; + const uint64_t rowkey_num = das_ctdef_->table_param_.get_data_table().get_rowkey_column_num(); + const uint64_t old_proj = das_ctdef_->old_row_projector_.count(); + const uint64_t new_proj = das_ctdef_->new_row_projector_.count(); + geo_idx = -1; + if (OB_UNLIKELY(rowkey_num + 1 != old_proj || rowkey_num + 1 != new_proj)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid project count", K(ret), K(rowkey_num), K(new_proj), K(old_proj)); + } else { + // get full row successfully + geo_idx = row_projector_->at(rowkey_num); + geo_wkb = store_row->cells()[geo_idx].get_string(); + geo_meta.set_type(ObGeometryType); + geo_meta.set_collation_type(CS_TYPE_UTF8MB4_BIN); + } + return ret; +} + +void ObFTDMLIterator::reset() +{ + is_inited_ = false; + ft_parse_helper_.reset(); + ObDomainDMLIterator::reset(); +} + +int ObFTDMLIterator::rewind() +{ + int ret = OB_SUCCESS; + row_idx_ = 0; + if (das_ctdef_->table_param_.get_data_table().is_fts_index()) { + if (OB_FAIL(ObDomainDMLIterator::rewind())) { + LOG_WARN("fail to ObDomainDMLIterator::rewind", K(ret)); + } else if (OB_ISNULL(das_ctdef_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, das_ctdef is nullptr", K(ret), KP(das_ctdef_)); + } else { + storage::ObFTParser parser_name; + const common::ObString parser_str = das_ctdef_->table_param_.get_data_table().get_fts_parser_name(); + if (OB_FAIL(parser_name.parse_from_str(parser_str.ptr(), parser_str.length()))) { + LOG_WARN("fail to parse name from cstring", K(ret), K(parser_str)); + } else if (parser_name == ft_parse_helper_.get_parser_name()) { + // This is the same as the parser name of the previous index. + // nothing to do, just skip. + } else if (FALSE_IT(ft_parse_helper_.reset())) { + } else if (OB_FAIL(ft_parse_helper_.init(&allocator_, parser_str))) { + LOG_WARN("fail to init fulltext parse helper", K(ret), K(parser_str)); + } + } + } + return ret; +} + +int ObFTDMLIterator::init(const common::ObString &parser_name) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("init fulltext dml iterator twice", K(ret), K(is_inited_)); + } else if (OB_FAIL(ft_parse_helper_.init(&allocator_, parser_name))) { + LOG_WARN("fail to init fulltext parse helper", K(ret), K(parser_name)); + } else { + is_inited_ = true; + } + if (OB_UNLIKELY(!is_inited_)) { + reset(); + } + return ret; +} + +int ObFTDMLIterator::generate_domain_rows(const ObChunkDatumStore::StoredRow *store_row) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(store_row)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(store_row)); + } else if (OB_UNLIKELY(!das_ctdef_->table_param_.get_data_table().is_fts_index())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, it isn't fulltext index", K(ret), K(das_ctdef_->table_param_.get_data_table())); + } else { + ObString doc_id; + ObString ft; + common::ObObjMeta ft_meta; + const bool is_fts_index_aux = das_ctdef_->table_param_.get_data_table().is_fts_index_aux(); + if (!is_update_ && OB_FAIL(get_ft_and_doc_id(store_row, doc_id, ft, ft_meta))) { + LOG_WARN("fail to get fulltext and doc id", K(ret), KPC(store_row)); + } else if (is_update_ && OB_FAIL(get_ft_and_doc_id_for_update(store_row, doc_id, ft, ft_meta))) { + LOG_WARN("fail to get fulltext and doc id for update", K(ret), KPC(store_row)); + } else if (OB_FAIL(ObDASDomainUtils::generate_fulltext_word_rows(allocator_, &ft_parse_helper_, ft_meta, + doc_id, ft, is_fts_index_aux, rows_))) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("fail to generate fulltext word rows", K(ret), K(doc_id), K(ft_parse_helper_), + K(ft_meta), K(ft), KPC(store_row), K(is_fts_index_aux), K(rows_), KPC(main_ctdef_)); + } + } + } + STORAGE_FTS_LOG(DEBUG, "generate domain rows", K(ret), K(rows_), KPC(store_row)); + return ret; +} + +int ObFTDMLIterator::get_ft_and_doc_id( + const ObChunkDatumStore::StoredRow *store_row, + ObString &doc_id, + ObString &ft, + common::ObObjMeta &ft_meta) const +{ + int ret = OB_SUCCESS; + const uint64_t doc_id_col_id = das_ctdef_->table_param_.get_data_table().get_doc_id_col_id(); + const uint64_t fts_col_id = das_ctdef_->table_param_.get_data_table().get_fulltext_col_id(); + if (OB_UNLIKELY(OB_INVALID_ID == doc_id_col_id || OB_INVALID_ID == fts_col_id)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid doc id or fulltext column id", K(ret), K(doc_id_col_id), K(fts_col_id)); + } else { + const bool is_fts_index_aux = das_ctdef_->table_param_.get_data_table().is_fts_index_aux(); + const int64_t doc_id_idx = !is_fts_index_aux ? 0 : 1; + const int64_t ft_idx = !is_fts_index_aux ? 1 : 0; + doc_id = store_row->cells()[row_projector_->at(doc_id_idx)].get_string(); + ft = store_row->cells()[row_projector_->at(ft_idx)].get_string(); + ft_meta = das_ctdef_->column_types_.at(ft_idx); + if (OB_FAIL(ret)) { + } else if (OB_UNLIKELY(doc_id.length() != sizeof(ObDocId)) || OB_ISNULL(doc_id.ptr())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid binary document id", K(ret), K(doc_id)); + } else if (OB_FAIL(ObTextStringHelper::read_real_string_data(&allocator_, + ft_meta.get_type(), + ft_meta.get_collation_type(), + ft_meta.has_lob_header(), + ft))) { + LOG_WARN("fail to get real text data", K(ret)); + } else { + STORAGE_FTS_LOG(DEBUG, "succeed to get fulltext and doc id", K(doc_id), K(ft_meta), K(ft)); + } + } + return ret; +} + +int ObFTDMLIterator::get_ft_and_doc_id_for_update( + const ObChunkDatumStore::StoredRow *store_row, + ObString &doc_id, + ObString &ft, + common::ObObjMeta &ft_meta) const +{ + int ret = OB_SUCCESS; + const uint64_t rowkey_col_cnt = das_ctdef_->table_param_.get_data_table().get_rowkey_column_num(); + const uint64_t old_proj_cnt = das_ctdef_->old_row_projector_.count(); + const uint64_t new_proj_cnt = das_ctdef_->new_row_projector_.count(); + if (OB_UNLIKELY(rowkey_col_cnt + 2 != old_proj_cnt || rowkey_col_cnt + 2 != new_proj_cnt)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid project count", K(ret), K(rowkey_col_cnt), K(old_proj_cnt), K(new_proj_cnt)); + } else { + const bool is_fts_index_aux = das_ctdef_->table_param_.get_data_table().is_fts_index_aux(); + const int64_t doc_id_idx = !is_fts_index_aux ? 0 : 1; + const int64_t ft_idx = !is_fts_index_aux ? 1 : 0; + doc_id = store_row->cells()[row_projector_->at(doc_id_idx)].get_string(); + ft = store_row->cells()[row_projector_->at(ft_idx)].get_string(); + ft_meta = das_ctdef_->column_types_.at(ft_idx); + if (OB_FAIL(ret)) { + } else if (OB_UNLIKELY(doc_id.length() != sizeof(ObDocId)) || OB_ISNULL(doc_id.ptr())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid binary document id", K(ret), K(doc_id)); + } else if (OB_FAIL(ObTextStringHelper::read_real_string_data(&allocator_, + ft_meta.get_type(), + ft_meta.get_collation_type(), + true /* has lob header */, + ft))) { + LOG_WARN("fail to get real text data", K(ret)); + } else { + STORAGE_FTS_LOG(DEBUG, "succeed to get fulltext and doc id", K(doc_id), K(ft_meta), K(ft)); + } + } + return ret; +} + +int ObMultivalueDMLIterator::generate_domain_rows(const ObChunkDatumStore::StoredRow *store_row) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(store_row)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(store_row)); + } else if (OB_UNLIKELY(!das_ctdef_->table_param_.get_data_table().is_multivalue_index_aux())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, it isn't multivalue index", K(ret), K(das_ctdef_->table_param_.get_data_table())); + } else { + int64_t multivalue_idx = OB_INVALID_ID; + int64_t multivalue_arr_idx = OB_INVALID_ID; + ObString multivalue_data; + if (!is_update_ && OB_FAIL(get_multivlaue_json_data( + store_row, multivalue_idx, multivalue_arr_idx, multivalue_data))) { + LOG_WARN("fail to get json data.", K(ret), KPC(store_row)); + } else if (is_update_ && OB_FAIL(get_multivlaue_json_data_for_update( + store_row, multivalue_idx, multivalue_arr_idx, multivalue_data))) { + LOG_WARN("fail to get json data for update.", K(ret), KPC(store_row)); + } else if (OB_FAIL(ObDASDomainUtils::generate_multivalue_index_rows( + allocator_, *das_ctdef_, multivalue_idx, multivalue_arr_idx, + multivalue_data, *row_projector_, *store_row, rows_))) { + if (ret != OB_ITER_END) { + LOG_WARN("generate multi value index_rows failed", K(ret), K(multivalue_idx), + KPC(store_row), K(multivalue_data)); + } + } + } + LOG_DEBUG("generate domain rows", K(ret), K(rows_), KPC(store_row)); + return ret; +} + +int ObMultivalueDMLIterator::get_multivlaue_json_data( + const ObChunkDatumStore::StoredRow *store_row, + int64_t& multivalue_idx, + int64_t& multivalue_arr_idx, + ObString &multivalue_data) const +{ + int ret = OB_SUCCESS; + multivalue_idx = OB_INVALID_ID; + + const uint64_t multivalue_col_id = das_ctdef_->table_param_.get_data_table().get_multivalue_col_id(); + bool found = false; + + if (OB_INVALID_ID == multivalue_col_id) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid doc id or multivalue column id", K(ret), + K(multivalue_col_id), K(das_ctdef_->table_param_.get_data_table())); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < das_ctdef_->column_ids_.count() && !found; ++i) { + const int64_t column_id = das_ctdef_->column_ids_.at(i); + const int64_t projector_idx = row_projector_->at(i); + if (OB_UNLIKELY(projector_idx >= store_row->cnt_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid index for sr", K(ret), KPC(store_row), K(i), K(main_ctdef_->column_ids_)); + } else if (multivalue_col_id == column_id) { + found = true; + multivalue_idx = projector_idx; + multivalue_arr_idx = multivalue_idx + 1; + multivalue_data = store_row->cells()[multivalue_arr_idx].get_string(); + + if (OB_FAIL(ObTextStringHelper::read_real_string_data(&allocator_, + ObJsonType, + CS_TYPE_UTF8MB4_BIN, + true, multivalue_data))) { + LOG_WARN("fail to get real json data", K(ret)); + } + } + } + + if (OB_SUCC(ret) && !found) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("can't get multivalue col idx, or get doc id column", K(ret), K(multivalue_idx)); + } + } + + return ret; +} + +int ObMultivalueDMLIterator::get_multivlaue_json_data_for_update( + const ObChunkDatumStore::StoredRow *store_row, + int64_t& multivalue_idx, + int64_t& multivalue_arr_idx, + ObString &multivalue_data) const +{ + int ret = OB_SUCCESS; + bool found = false; + + multivalue_idx = OB_INVALID_ID; + multivalue_arr_idx = OB_INVALID_ID; + + const uint64_t multivalue_col_id = das_ctdef_->table_param_.get_data_table().get_multivalue_col_id(); + + for (int64_t i = 0; OB_SUCC(ret) && i < das_ctdef_->column_ids_.count() && !found; ++i) { + const int64_t projector_idx = row_projector_->at(i); + if (multivalue_col_id == das_ctdef_->column_ids_.at(i)) { + if (projector_idx >= store_row->cnt_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid index for sr", K(ret), KPC(store_row), K(i), K(projector_idx)); + } else { + found = true; + multivalue_idx = projector_idx; + + if (projector_idx >= store_row->cnt_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid index for sr", K(ret), KPC(store_row), K(i), K(projector_idx)); + } else { + multivalue_arr_idx = projector_idx + 1; + multivalue_data = store_row->cells()[multivalue_arr_idx].get_string(); + + if (OB_FAIL(ObTextStringHelper::read_real_string_data(&allocator_, + ObJsonType, + CS_TYPE_UTF8MB4_BIN, + true, multivalue_data))) { + LOG_WARN("fail to get real json data", K(ret)); + } + } + } + } + } + + if (OB_SUCC(ret) && !found) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("can't get multivalue col idx, or get doc id column", K(ret), K(multivalue_idx)); + } + + return ret; +} + + +} // end namespace storage +} // end namespace oceanbase diff --git a/src/sql/das/ob_das_domain_utils.h b/src/sql/das/ob_das_domain_utils.h new file mode 100644 index 0000000000..dfb2e5c0ad --- /dev/null +++ b/src/sql/das/ob_das_domain_utils.h @@ -0,0 +1,230 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_DAS_DOMAIN_UTILS_H +#define OCEANBASE_DAS_DOMAIN_UTILS_H + +#include "lib/hash/ob_hashset.h" +#include "sql/das/ob_das_dml_ctx_define.h" +#include "storage/fts/ob_fts_plugin_helper.h" + +namespace oceanbase +{ +namespace sql +{ + +class ObDASDomainUtils final +{ +public: + + static const uint64_t MVI_FULL_ROWKEY_THRESHOLD = 6; + static const uint64_t MVI_ROWKEY_SIZE_THRESHOLD = 48; + + ObDASDomainUtils() = default; + ~ObDASDomainUtils() = default; + + static int generate_spatial_index_rows( + ObIAllocator &allocator, + const ObDASDMLBaseCtDef &das_ctdef, + const ObString &wkb_str, + const IntFixedArray &row_projector, + const ObDASWriteBuffer::DmlRow &dml_row, + ObDomainIndexRow &spat_rows); + static int generate_fulltext_word_rows( + common::ObIAllocator &allocator, + storage::ObFTParseHelper *helper, + const common::ObObjMeta &ft_obj_meta, + const ObString &doc_id, + const ObString &fulltext, + const bool is_fts_index_aux, + ObDomainIndexRow &word_rows); + static int generate_multivalue_index_rows( + ObIAllocator &allocator, + const ObDASDMLBaseCtDef &das_ctdef, + int64_t mvi_idx, + int64_t mvi_arr_idx, + const ObString &json_data, + const IntFixedArray &row_projector, + const ObDASWriteBuffer::DmlRow &dml_row, + ObDomainIndexRow &domain_rows); +private: + typedef common::hash::ObHashMap ObFTWordMap; +private: + static int segment_and_calc_word_count( + common::ObIAllocator &allocator, + storage::ObFTParseHelper *helper, + const common::ObCollationType &type, + const ObString &fulltext, + int64_t &doc_length, + ObFTWordMap &words_count); + static int calc_save_rowkey_policy( + ObIAllocator &allocator, + const ObDASDMLBaseCtDef &das_ctdef, + const IntFixedArray &row_projector, + const ObDASWriteBuffer::DmlRow &dml_row, + const int64_t record_cnt, + bool& is_save_rowkey); + + static int get_pure_mutivalue_data( + const ObString &json_str, + const char*& data, + int64_t& data_len, + uint32_t& record_num); +}; + +class ObDomainDMLIterator +{ +public: + static const int64_t DEFAULT_DOMAIN_ROW_COUNT = 32; + typedef common::ObSEArray ObDomainIndexRow; + + static int create_domain_dml_iterator( + common::ObIAllocator &allocator, + const IntFixedArray *row_projector, + ObDASWriteBuffer::Iterator &write_iter, + const ObDASDMLBaseCtDef *das_ctdef, + const ObDASDMLBaseCtDef *main_ctdef, + ObDomainDMLIterator *&domain_iter); + +public: + virtual ~ObDomainDMLIterator(); + virtual void reset(); + virtual int rewind() + { + row_idx_ = 0; + return common::OB_SUCCESS; + } + void set_ctdef(const ObDASDMLBaseCtDef *das_ctdef, const IntFixedArray *row_projector); + void set_row_projector(const IntFixedArray *row_projector) { row_projector_ = row_projector; } + int get_next_domain_row(ObNewRow *&row); + + TO_STRING_KV(K_(row_idx), K_(rows), KPC_(row_projector), KPC_(das_ctdef), K_(main_ctdef)); +protected: + ObDomainDMLIterator( + common::ObIAllocator &allocator, + const IntFixedArray *row_projector, + ObDASWriteBuffer::Iterator &write_iter, + const ObDASDMLBaseCtDef *das_ctdef, + const ObDASDMLBaseCtDef *main_ctdef); + virtual int generate_domain_rows(const ObChunkDatumStore::StoredRow *store_row) = 0; + +protected: + uint32_t row_idx_; + ObDomainIndexRow rows_; + const IntFixedArray *row_projector_; + ObDASWriteBuffer::Iterator &write_iter_; + const ObDASDMLBaseCtDef *das_ctdef_; + const ObDASDMLBaseCtDef *main_ctdef_; + common::ObIAllocator &allocator_; + bool is_update_; +private: + DISALLOW_COPY_AND_ASSIGN(ObDomainDMLIterator); +}; + +class ObSpatialDMLIterator final : public ObDomainDMLIterator +{ +public: + ObSpatialDMLIterator( + common::ObIAllocator &allocator, + const IntFixedArray *row_projector, + ObDASWriteBuffer::Iterator &write_iter, + const ObDASDMLBaseCtDef *das_ctdef, + const ObDASDMLBaseCtDef *main_ctdef) + : ObDomainDMLIterator(allocator, row_projector, write_iter, das_ctdef, main_ctdef) + {} + virtual ~ObSpatialDMLIterator() = default; + +private: + virtual int generate_domain_rows(const ObChunkDatumStore::StoredRow *store_row) override; + int get_geo_wkb( + const ObChunkDatumStore::StoredRow *store_row, + int64_t &geo_idx, + ObString &geo_wkb, + ObObjMeta &geo_meta) const; + int get_geo_wkb_for_update( + const ObChunkDatumStore::StoredRow *store_row, + int64_t &geo_idx, + ObString &geo_wkb, + ObObjMeta &geo_meta) const; +}; + +class ObMultivalueDMLIterator final : public ObDomainDMLIterator +{ +public: + ObMultivalueDMLIterator( + common::ObIAllocator &allocator, + const IntFixedArray *row_projector, + ObDASWriteBuffer::Iterator &write_iter, + const ObDASDMLBaseCtDef *das_ctdef, + const ObDASDMLBaseCtDef *main_ctdef) + : ObDomainDMLIterator(allocator, row_projector, write_iter, das_ctdef, main_ctdef) + {} + virtual ~ObMultivalueDMLIterator() = default; + +private: + virtual int generate_domain_rows(const ObChunkDatumStore::StoredRow *store_row) override; + int get_multivlaue_json_data( + const ObChunkDatumStore::StoredRow *store_row, + int64_t& multivalue_idx, + int64_t& multivalue_arr_idx, + ObString &multivalue_data) const; + + int get_multivlaue_json_data_for_update( + const ObChunkDatumStore::StoredRow *store_row, + int64_t& multivalue_idx, + int64_t& multivalue_arr_idx, + ObString &multivalue_data) const; +}; + + +class ObFTDMLIterator final : public ObDomainDMLIterator +{ +public: + ObFTDMLIterator( + common::ObIAllocator &allocator, + const IntFixedArray *row_projector, + ObDASWriteBuffer::Iterator &write_iter, + const ObDASDMLBaseCtDef *das_ctdef, + const ObDASDMLBaseCtDef *main_ctdef) + : ObDomainDMLIterator(allocator, row_projector, write_iter, das_ctdef, main_ctdef), + ft_parse_helper_(), + is_inited_(false) + {} + virtual ~ObFTDMLIterator() = default; + + virtual void reset() override; + virtual int rewind() override; + int init(const common::ObString &parser_name); + + INHERIT_TO_STRING_KV("ObDomainDMLIterator", ObDomainDMLIterator, K_(ft_parse_helper), K_(is_inited)); +protected: + virtual int generate_domain_rows(const ObChunkDatumStore::StoredRow *store_row) override; + int get_ft_and_doc_id( + const ObChunkDatumStore::StoredRow *store_row, + ObString &doc_id, + ObString &ft, + common::ObObjMeta &ft_meta) const; + int get_ft_and_doc_id_for_update( + const ObChunkDatumStore::StoredRow *store_row, + ObString &doc_id, + ObString &ft, + common::ObObjMeta &ft_meta) const; + +private: + storage::ObFTParseHelper ft_parse_helper_; + bool is_inited_; +}; + +} // end namespace sql +} // end namespace oceanbase + +#endif // OCEANBASE_DAS_DOMAIN_UTILS_H diff --git a/src/sql/das/ob_das_factory.cpp b/src/sql/das/ob_das_factory.cpp index 7c78fa6968..9753737e45 100644 --- a/src/sql/das/ob_das_factory.cpp +++ b/src/sql/das/ob_das_factory.cpp @@ -13,7 +13,6 @@ #define USING_LOG_PREFIX SQL_DAS #include "sql/das/ob_das_factory.h" #include "sql/das/ob_das_scan_op.h" -#include "sql/das/ob_das_group_scan_op.h" #include "sql/das/ob_das_insert_op.h" #include "sql/das/ob_das_delete_op.h" #include "sql/das/ob_das_update_op.h" @@ -23,6 +22,8 @@ #include "sql/das/ob_das_def_reg.h" #include "sql/das/ob_das_rpc_processor.h" #include "sql/das/ob_das_ref.h" +#include "sql/das/ob_das_attach_define.h" +#include "sql/das/ob_text_retrieval_op.h" #include "share/datum/ob_datum_util.h" #define STORE_DAS_OBJ(obj_store, das_obj, class_name) \ @@ -168,9 +169,10 @@ struct DASInitAllocFunc static void init_array() { static constexpr int registered = das_reg::ObDASOpTypeTraits::registered_; + static constexpr int attached = das_reg::ObDASOpTypeTraits::attached_; G_DAS_ALLOC_FUNCTION_ARRAY[N] = ObDASTaskFactory::AllocFun { - (registered ? &AllocDASOpHelper::alloc : NULL), - (registered ? &AllocDASOpResultHelper::alloc : NULL), + ((registered && !attached) ? &AllocDASOpHelper::alloc : NULL), + ((registered && !attached) ? &AllocDASOpResultHelper::alloc : NULL), (registered ? &AllocDASCtDefHelper::alloc : NULL), (registered ? &AllocDASRtDefHelper::alloc : NULL) }; diff --git a/src/sql/das/ob_das_factory.h b/src/sql/das/ob_das_factory.h index 0aa5fba237..32788eab3c 100644 --- a/src/sql/das/ob_das_factory.h +++ b/src/sql/das/ob_das_factory.h @@ -52,7 +52,12 @@ public: static inline bool is_registered(const ObDASOpType op_type) { - return op_type >= 0 && op_type < DAS_OP_MAX && NULL != G_DAS_ALLOC_FUNS_[op_type].op_func_; + return op_type >= 0 && op_type < DAS_OP_MAX && NULL != G_DAS_ALLOC_FUNS_[op_type].ctdef_func_; + } + + static inline bool is_attached(const ObDASOpType op_type) + { + return op_type >= 0 && op_type < DAS_OP_MAX && NULL == G_DAS_ALLOC_FUNS_[op_type].op_func_; } struct AllocFun diff --git a/src/sql/das/ob_das_group_scan_op.h b/src/sql/das/ob_das_group_scan_op.h deleted file mode 100644 index 30d39b895e..0000000000 --- a/src/sql/das/ob_das_group_scan_op.h +++ /dev/null @@ -1,122 +0,0 @@ -/** - * Copyright (c) 2021 OceanBase - * OceanBase CE is licensed under Mulan PubL v2. - * You can use this software according to the terms and conditions of the Mulan PubL v2. - * You may obtain a copy of Mulan PubL v2 at: - * http://license.coscl.org.cn/MulanPubL-2.0 - * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, - * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, - * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. - * See the Mulan PubL v2 for more details. - */ - -#ifndef OBDEV_SRC_SQL_DAS_OB_DAS_GROUP_SCAN_OP_H_ -#define OBDEV_SRC_SQL_DAS_OB_DAS_GROUP_SCAN_OP_H_ -#include "sql/das/ob_das_scan_op.h" -#include "sql/das/ob_group_scan_iter.h" -namespace oceanbase -{ -namespace sql -{ -class ObGroupLookupOp : public ObLocalIndexLookupOp -{ -public: - ObGroupLookupOp() : ObLocalIndexLookupOp(ObNewRowIterator::IterType::ObGroupLookupOp), - group_iter_() - { - lookup_iter_ = &group_iter_; - } - virtual ~ObGroupLookupOp(); - virtual void reset() override - { - ObLocalIndexLookupOp::reset(); - index_group_cnt_ = 1; - lookup_group_cnt_ = 1; - } - virtual int64_t get_index_group_cnt() const override { return index_group_cnt_; } - virtual void set_index_group_cnt(int64_t group_cnt_) override {index_group_cnt_ = group_cnt_;} - virtual void inc_index_group_cnt() override { ++index_group_cnt_; } - virtual int64_t get_lookup_group_cnt() const override { return lookup_group_cnt_; } - virtual void inc_lookup_group_cnt() override { ++lookup_group_cnt_; } - virtual int switch_rowkey_scan_group() override - { - return static_cast(rowkey_iter_)->switch_scan_group(); - } - virtual int set_rowkey_scan_group(int64_t group_id) override - { - return static_cast(rowkey_iter_)->set_scan_group(group_id); - } - virtual ObNewRowIterator *&get_lookup_storage_iter() override; - int init_group_range(int64_t cur_group_idx, int64_t group_size) override; - virtual bool need_next_index_batch() const override; - virtual int init_group_scan_iter(int64_t cur_group_idx, - int64_t group_size, - ObExpr *group_id_expr); - virtual int switch_lookup_scan_group() override; - virtual int set_lookup_scan_group(int64_t group_id) override; - - int revert_iter(); -public: - ObGroupScanIter group_iter_; -}; - -class ObDASGroupScanOp : public ObDASScanOp -{ - OB_UNIS_VERSION(1); -public: - ObDASGroupScanOp(common::ObIAllocator &op_alloc); - virtual ~ObDASGroupScanOp(); - int open_op() override; - int release_op() override; - virtual int rescan() override; - virtual int switch_scan_group() override; - virtual int set_scan_group(int64_t group_id) override; - int64_t get_cur_group_idx() const { return iter_.get_cur_group_idx(); } - void init_group_range(int64_t cur_group_idx, int64_t group_size); - virtual ObLocalIndexLookupOp *get_lookup_op() override - { return group_lookup_op_; } - ObNewRowIterator *get_storage_scan_iter() override; - int do_local_index_lookup() override; - int decode_task_result(ObIDASTaskResult *task_result) override; - int fill_task_result(ObIDASTaskResult &task_result, bool &has_more, int64_t &memory_limit) override; - void set_is_exec_remote(bool v) { is_exec_remote_ = v; } - virtual bool need_all_output() override { return is_exec_remote_; } - TO_STRING_KV(K(iter_), KP(group_lookup_op_), K(group_size_), K(cur_group_idx_)); -private: - common::ObNewRowIterator *&get_scan_result() { return result_; } - ObNewRowIterator *get_output_result_iter() override - { - return result_iter_; - }; -private: - // can't set group_lookup_op_ to ObDASScanOp::result_ - // because may circular dependencies: - // ObDASGroupScanOp::result_ -> ObGroupLookupOp - // ObGroupLookupOp::rowkey_iter_ -> ObDASGroupScanOp::ObGroupScanIter - // ObDASGroupScanOp::ObGroupScanIter -> ObDASGroupScanOp::result_ - ObGroupLookupOp *group_lookup_op_; - ObGroupScanIter iter_; - // for normal group scan: - // local das: - // result_iter_ is &group_scan_iter_ - // remote das: - // local server: result_iter_ is &group_scan_iter_ and - // the input of group_scan_iter_ is ObDASScanResult - // remote server: result_iter_ is storage_iter - // for local index lookup group scan: - // local das: - // result_iter_ is group_lookup_op_ - // remote das: - // local server: result_iter_ is group_scan_iter of group_lookup_op_ and - // the input of this group_scan_iter is ObGroupScanIter - // remote server: result_iter_ is group_lookup_op_ and indicate the group_lookup_op is exec remote - // which will not need switch iter when lookup, and output the result of all group - ObNewRowIterator *result_iter_; - bool is_exec_remote_; - int64_t cur_group_idx_; - int64_t group_size_; -}; - -} // namespace sql -} // namespace oceanbase -#endif /* OBDEV_SRC_SQL_DAS_OB_DAS_BATCH_SCAN_OP_H_ */ diff --git a/src/sql/das/ob_das_insert_op.cpp b/src/sql/das/ob_das_insert_op.cpp index 8c247112fc..cb0dfbae9b 100644 --- a/src/sql/das/ob_das_insert_op.cpp +++ b/src/sql/das/ob_das_insert_op.cpp @@ -89,7 +89,7 @@ int ObDASIndexDMLAdaptor::write_rows(cons if (OB_TRY_LOCK_ROW_CONFLICT != ret) { LOG_WARN("insert rows to access service failed", K(ret)); } - } else if (!(ctdef.is_ignore_ || ctdef.table_param_.get_data_table().is_spatial_index()) + } else if (!(ctdef.is_ignore_ || ctdef.table_param_.get_data_table().is_domain_index()) && 0 == affected_rows) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected affected_rows after do insert", K(affected_rows), K(ret)); diff --git a/src/sql/das/ob_das_scan_op.cpp b/src/sql/das/ob_das_scan_op.cpp index 0a751cc1f9..cc9b5b6a6c 100644 --- a/src/sql/das/ob_das_scan_op.cpp +++ b/src/sql/das/ob_das_scan_op.cpp @@ -14,6 +14,8 @@ #include "sql/das/ob_das_scan_op.h" #include "sql/das/ob_das_extra_data.h" #include "sql/das/ob_das_spatial_index_lookup_op.h" +#include "sql/das/ob_domain_index_lookup_op.h" +#include "sql/das/ob_das_utils.h" #include "sql/engine/table/ob_table_scan_op.h" #include "sql/engine/px/ob_px_util.h" #include "sql/engine/ob_des_exec_context.h" @@ -201,16 +203,20 @@ int ObDASScanOp::swizzling_remote_task(ObDASRemoteInfo *remote_info) scan_rtdef_->p_pd_expr_op_->get_eval_ctx() .set_max_batch_size(scan_ctdef_->pd_expr_spec_.max_batch_size_); } - if (OB_SUCC(ret) && get_lookup_rtdef() != nullptr) { - const ObDASScanCtDef *lookup_ctdef = get_lookup_ctdef(); - ObDASScanRtDef *lookup_rtdef = get_lookup_rtdef(); - lookup_rtdef->stmt_allocator_.set_alloc(&CURRENT_CONTEXT->get_arena_allocator()); - lookup_rtdef->scan_allocator_.set_alloc(&CURRENT_CONTEXT->get_arena_allocator()); - if (OB_FAIL(lookup_rtdef->init_pd_op(*remote_info->exec_ctx_, *lookup_ctdef))) { - LOG_WARN("init lookup pushdown operator failed", K(ret)); - } else { - lookup_rtdef->p_pd_expr_op_->get_eval_ctx() - .set_max_batch_size(lookup_ctdef->pd_expr_spec_.max_batch_size_); + for (int i = 0; OB_SUCC(ret) && i < related_rtdefs_.count(); ++i) { + if (OB_NOT_NULL(related_rtdefs_.at(i)) && + (DAS_OP_TABLE_SCAN == related_rtdefs_.at(i)->op_type_ || + DAS_OP_TABLE_BATCH_SCAN == related_rtdefs_.at(i)->op_type_)) { + const ObDASScanCtDef *related_ctdef = static_cast(related_ctdefs_.at(i)); + ObDASScanRtDef *related_rtdef = static_cast(related_rtdefs_.at(i)); + related_rtdef->stmt_allocator_.set_alloc(&CURRENT_CONTEXT->get_arena_allocator()); + related_rtdef->scan_allocator_.set_alloc(&CURRENT_CONTEXT->get_arena_allocator()); + if (OB_FAIL(related_rtdef->init_pd_op(*remote_info->exec_ctx_, *related_ctdef))) { + LOG_WARN("init related rtdef pushdown operator failed", K(ret)); + } else { + related_rtdef->p_pd_expr_op_->get_eval_ctx() + .set_max_batch_size(related_ctdef->pd_expr_spec_.max_batch_size_); + } } } } @@ -314,7 +320,6 @@ ObITabletScan &ObDASScanOp::get_tsc_service() int ObDASScanOp::open_op() { int ret = OB_SUCCESS; - ObITabletScan &tsc_service = get_tsc_service(); //Retry may be called many times. //Only for DASScanOp now, we add a retry alloc to avoid //memory expansion. @@ -324,7 +329,7 @@ int ObDASScanOp::open_op() reset_access_datums_ptr(); if (OB_FAIL(init_scan_param())) { LOG_WARN("init scan param failed", K(ret)); - } else if (OB_FAIL(tsc_service.table_scan(scan_param_, result_))) { + } else if (OB_FAIL(do_table_scan())) { if (OB_SNAPSHOT_DISCARDED == ret && scan_param_.fb_snapshot_.is_valid()) { ret = OB_INVALID_QUERY_TIMESTAMP; } else if (OB_TRY_LOCK_ROW_CONFLICT != ret) { @@ -366,6 +371,10 @@ int ObDASScanOp::release_op() if (OB_SUCCESS == ret) { ret = lookup_ret; } + } else if (ObNewRowIterator::IterType::ObTextRetrievalOp == result_->get_type()) { + ObTextRetrievalOp *text_retrieval_op = static_cast(result_); + text_retrieval_op->reset(); + result_ = nullptr; } else { if (OB_FAIL(tsc_service.revert_scan_iter(result_))) { LOG_WARN("revert scan iterator failed", K(ret)); @@ -425,10 +434,33 @@ ObLocalIndexLookupOp *ObDASScanOp::get_lookup_op() return res; } +int ObDASScanOp::do_table_scan() +{ + int ret = OB_SUCCESS; + if (scan_param_.table_param_->is_fts_index() && attach_ctdef_ != nullptr) { + // full text index retrieval scan + if (OB_FAIL(do_text_retrieve(result_))) { + LOG_WARN("fail to retrieve token from full text index", K(ret)); + } + } else if (OB_FAIL(get_tsc_service().table_scan(scan_param_, result_))) { + if (OB_SNAPSHOT_DISCARDED == ret && scan_param_.fb_snapshot_.is_valid()) { + ret = OB_TABLE_DEFINITION_CHANGED; + } else if (OB_TRY_LOCK_ROW_CONFLICT != ret) { + LOG_WARN("fail to scan table", K(scan_param_), K(ret)); + } + } + return ret; +} + int ObDASScanOp::do_local_index_lookup() { int ret = OB_SUCCESS; - if (scan_param_.table_param_->is_spatial_index()) { + if (scan_param_.table_param_->is_fts_index() || + scan_param_.table_param_->is_multivalue_index()) { + if (OB_FAIL(do_domain_index_lookup())) { + LOG_WARN("failed to do domain index lookup", K(ret)); + } + } else if (scan_param_.table_param_->is_spatial_index()) { void *buf = op_alloc_.alloc(sizeof(ObSpatialIndexLookupOp)); if (OB_ISNULL(buf)) { ret = OB_ALLOCATE_MEMORY_FAILED; @@ -441,7 +473,7 @@ int ObDASScanOp::do_local_index_lookup() trans_desc_, snapshot_, scan_param_))) { LOG_WARN("init spatial lookup op failed", K(ret)); } else { - op->set_tablet_id(related_tablet_ids_.at(0)); + op->set_tablet_id(get_table_lookup_tablet_id()); op->set_ls_id(ls_id_); } } @@ -462,7 +494,7 @@ int ObDASScanOp::do_local_index_lookup() snapshot_))) { LOG_WARN("init lookup op failed", K(ret)); } else { - op->set_tablet_id(related_tablet_ids_.at(0)); + op->set_tablet_id(get_table_lookup_tablet_id()); op->set_ls_id(ls_id_); } } @@ -470,16 +502,66 @@ int ObDASScanOp::do_local_index_lookup() return ret; } +int ObDASScanOp::do_domain_index_lookup() +{ + int ret = OB_SUCCESS; + ObTabletID doc_id_idx_tablet_id; + + if (scan_param_.table_param_->is_fts_index()) { + ObFullTextIndexLookupOp *op = nullptr; + ObTabletID doc_id_idx_tablet_id; + const ObDASTableLookupCtDef *table_lookup_ctdef = nullptr; + ObDASTableLookupRtDef *table_lookup_rtdef = nullptr; + if (OB_FAIL(get_aux_lookup_tablet_id(doc_id_idx_tablet_id))) { + LOG_WARN("failed to get doc id idx tablet id", K(ret), K_(related_tablet_ids)); + } else if (OB_ISNULL(op = OB_NEWx(ObFullTextIndexLookupOp, &op_alloc_, op_alloc_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate full text index lookup op", K(ret)); + } else if (FALSE_IT(op->set_text_retrieval_iter(result_))) { + } else if (FALSE_IT(result_ = op)) { + } else if (OB_FAIL(op->init(attach_ctdef_, attach_rtdef_, trans_desc_, snapshot_, scan_param_))) { + LOG_WARN("failed to init full text index lookup op", K(ret)); + } else { + op->set_tablet_id(get_table_lookup_tablet_id()); + op->set_doc_id_idx_tablet_id(doc_id_idx_tablet_id); + op->set_ls_id(ls_id_); + } + } else if (scan_param_.table_param_->is_multivalue_index()) { + ObMulValueIndexLookupOp* op = nullptr; + if (OB_FAIL(get_aux_lookup_tablet_id(doc_id_idx_tablet_id))) { + LOG_WARN("failed to get doc id idx tablet id", K(ret), K_(related_tablet_ids)); + } else if (OB_ISNULL(op = OB_NEWx(ObMulValueIndexLookupOp, &op_alloc_, op_alloc_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate full text index lookup op", K(ret)); + } else if (FALSE_IT(op->set_rowkey_iter(result_))) { + } else if (FALSE_IT(result_ = op)) { + } else if (OB_FAIL(op->init(attach_ctdef_, attach_rtdef_, trans_desc_, snapshot_, scan_param_))) { + LOG_WARN("failed to init multivalue index lookup op", K(ret)); + } else { + op->set_tablet_id(get_table_lookup_tablet_id()); + op->set_doc_id_idx_tablet_id(doc_id_idx_tablet_id); + op->set_ls_id(ls_id_); + } + + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("unsupported domain index type", K(ret)); + } + return ret; +} + //output row from remote DAS task //maybe change the expr datum ptr to its RPC datum store //if we need to fetch next row from the next das task, //must reset the datum ptr to expr preallocate frame buffer //otherwise, get_next_row in the local das task maybe has a wrong status -void ObDASScanOp::reset_access_datums_ptr() +void ObDASScanOp::reset_access_datums_ptr(int64_t capacity) { if (scan_rtdef_->p_pd_expr_op_->is_vectorized()) { + int64_t reset_batch_size = capacity > 0 ? capacity : scan_rtdef_->eval_ctx_->max_batch_size_; + reset_batch_size = min(reset_batch_size, scan_rtdef_->eval_ctx_->max_batch_size_); FOREACH_CNT(e, scan_ctdef_->pd_expr_spec_.access_exprs_) { - (*e)->locate_datums_for_update(*scan_rtdef_->eval_ctx_, scan_rtdef_->eval_ctx_->max_batch_size_); + (*e)->locate_datums_for_update(*scan_rtdef_->eval_ctx_, reset_batch_size); ObEvalInfo &info = (*e)->get_eval_info(*scan_rtdef_->eval_ctx_); info.point_to_frame_ = true; } @@ -496,22 +578,22 @@ void ObDASScanOp::reset_access_datums_ptr() } if (OB_NOT_NULL(scan_ctdef_->trans_info_expr_)) { ObExpr *trans_expr = scan_ctdef_->trans_info_expr_; - trans_expr->locate_datums_for_update(*scan_rtdef_->eval_ctx_, scan_rtdef_->eval_ctx_->max_batch_size_); + trans_expr->locate_datums_for_update(*scan_rtdef_->eval_ctx_, reset_batch_size); ObEvalInfo &info = trans_expr->get_eval_info(*scan_rtdef_->eval_ctx_); info.point_to_frame_ = true; } } if (get_lookup_rtdef() != nullptr && get_lookup_rtdef()->p_pd_expr_op_->is_vectorized()) { + int64_t reset_batch_size = capacity > 0 ? capacity : scan_rtdef_->eval_ctx_->max_batch_size_; + reset_batch_size = min(reset_batch_size, scan_rtdef_->eval_ctx_->max_batch_size_); FOREACH_CNT(e, get_lookup_ctdef()->pd_expr_spec_.access_exprs_) { - (*e)->locate_datums_for_update(*get_lookup_rtdef()->eval_ctx_, - get_lookup_rtdef()->eval_ctx_->max_batch_size_); + (*e)->locate_datums_for_update(*get_lookup_rtdef()->eval_ctx_, reset_batch_size); ObEvalInfo &info = (*e)->get_eval_info(*get_lookup_rtdef()->eval_ctx_); info.point_to_frame_ = true; } if (OB_NOT_NULL(get_lookup_ctdef()->trans_info_expr_)) { ObExpr *trans_expr = get_lookup_ctdef()->trans_info_expr_; - trans_expr->locate_datums_for_update(*get_lookup_rtdef()->eval_ctx_, - get_lookup_rtdef()->eval_ctx_->max_batch_size_); + trans_expr->locate_datums_for_update(*get_lookup_rtdef()->eval_ctx_, reset_batch_size); ObEvalInfo &info = trans_expr->get_eval_info(*scan_rtdef_->eval_ctx_); info.point_to_frame_ = true; } @@ -561,11 +643,6 @@ int ObDASScanOp::fill_task_result(ObIDASTaskResult &task_result, bool &has_more, if (OB_FAIL(get_output_result_iter()->get_next_row())) { if (OB_ITER_END != ret) { LOG_WARN("get next row from result failed", K(ret)); - } else if (need_all_output()) { - ret = switch_scan_group(); - if (OB_SUCC(ret)) { - continue; - } } } else if (OB_UNLIKELY(simulate_row_cnt > 0 && datum_store.get_row_cnt() >= simulate_row_cnt)) { @@ -596,16 +673,8 @@ int ObDASScanOp::fill_task_result(ObIDASTaskResult &task_result, bool &has_more, if (OB_ITER_END != ret) { LOG_WARN("get next batch from result failed", K(ret)); } else { - if (need_all_output()) { - ret = switch_scan_group(); - if (OB_SUCC(ret)) { - continue; - } - } - if (OB_ITER_END == ret) { - iter_end = true; - ret = OB_SUCCESS; - } + iter_end = true; + ret = OB_SUCCESS; } } if (enable_rich_format()) { @@ -698,10 +767,14 @@ int ObDASScanOp::rescan() "scan_range", scan_param_.key_ranges_, "range_pos", scan_param_.range_array_pos_); ObLocalIndexLookupOp *lookup_op = get_lookup_op(); - if (OB_FAIL(tsc_service.table_rescan(scan_param_, get_storage_scan_iter()))) { + if (scan_param_.table_param_->is_fts_index() && attach_ctdef_ != nullptr) { + if (OB_FAIL(do_text_retrieve_rescan())) { + LOG_WARN("failed to do text retrieval rescan", K(ret)); + } + } else if (OB_FAIL(tsc_service.table_rescan(scan_param_, get_storage_scan_iter()))) { LOG_WARN("rescan the table iterator failed", K(ret)); } else if (lookup_op != nullptr) { - lookup_op->set_tablet_id(related_tablet_ids_.at(0)); + lookup_op->set_tablet_id(get_table_lookup_tablet_id()); lookup_op->set_ls_id(ls_id_); //lookup op's table_rescan will be drive by its get_next_row() //so will can not call it here @@ -720,7 +793,12 @@ int ObDASScanOp::reuse_iter() ObLocalIndexLookupOp *lookup_op = get_lookup_op(); const ObTabletID &storage_tablet_id = scan_param_.tablet_id_; scan_param_.need_switch_param_ = (storage_tablet_id.is_valid() && storage_tablet_id != tablet_id_ ? true : false); - if (OB_FAIL(tsc_service.reuse_scan_iter(scan_param_.need_switch_param_, get_storage_scan_iter()))) { + if (scan_param_.table_param_->is_fts_index() && attach_ctdef_ != nullptr) { + if (nullptr != lookup_op + && OB_FAIL(static_cast(lookup_op)->reuse_scan_iter())) { + LOG_WARN("failed to reuse text lookup iters", K(ret)); + } + } else if (OB_FAIL(tsc_service.reuse_scan_iter(scan_param_.need_switch_param_, get_storage_scan_iter()))) { LOG_WARN("reuse scan iterator failed", K(ret)); } else if (lookup_op != nullptr && OB_FAIL(lookup_op->reset_lookup_state())) { @@ -733,14 +811,273 @@ int ObDASScanOp::reuse_iter() return ret; } -int ObDASScanOp::set_lookup_tablet_id(const ObTabletID &tablet_id) +const ExprFixedArray &ObDASScanOp::get_result_outputs() const +{ + const ExprFixedArray *result_output = nullptr; + if (attach_ctdef_ != nullptr) { + OB_ASSERT(ObDASTaskFactory::is_attached(attach_ctdef_->op_type_)); + result_output = &(static_cast(attach_ctdef_)->result_output_); + } else if (get_lookup_ctdef() != nullptr) { + result_output = &get_lookup_ctdef()->result_output_; + } else { + result_output = &scan_ctdef_->result_output_; + } + return *result_output; +} + +const ObDASScanCtDef *ObDASScanOp::get_lookup_ctdef() const +{ + const ObDASScanCtDef *lookup_ctdef = nullptr; + if (nullptr == attach_ctdef_) { + if (!related_ctdefs_.empty()) { + OB_ASSERT(related_ctdefs_.count() == 1); + OB_ASSERT(related_ctdefs_.at(0)->op_type_ == DAS_OP_TABLE_SCAN); + lookup_ctdef = static_cast(related_ctdefs_.at(0)); + } + } else { + const ObDASTableLookupCtDef *table_lookup_ctdef = nullptr; + if (DAS_OP_TABLE_LOOKUP == attach_ctdef_->op_type_) { + table_lookup_ctdef = static_cast(attach_ctdef_); + lookup_ctdef = table_lookup_ctdef->get_lookup_scan_ctdef(); + } + } + return lookup_ctdef; +} + +ObDASScanRtDef *ObDASScanOp::get_lookup_rtdef() +{ + ObDASScanRtDef *lookup_rtdef = nullptr; + if (nullptr == attach_rtdef_) { + if (!related_rtdefs_.empty()) { + OB_ASSERT(related_rtdefs_.count() == 1); + OB_ASSERT(related_rtdefs_.at(0)->op_type_ == DAS_OP_TABLE_SCAN); + lookup_rtdef = static_cast(related_rtdefs_.at(0)); + } + } else { + ObDASTableLookupRtDef *table_lookup_rtdef = nullptr; + if (DAS_OP_TABLE_LOOKUP == attach_rtdef_->op_type_) { + table_lookup_rtdef = static_cast(attach_rtdef_); + lookup_rtdef = table_lookup_rtdef->get_lookup_scan_rtdef(); + } + } + return lookup_rtdef; +} + +int ObDASScanOp::reserve_related_buffer(const int64_t related_scan_cnt) { int ret = OB_SUCCESS; - if (related_tablet_ids_.empty()) { - related_tablet_ids_.set_capacity(1); - ret = related_tablet_ids_.push_back(tablet_id); + related_ctdefs_.clear(); + related_rtdefs_.clear(); + related_tablet_ids_.clear(); + OZ(related_ctdefs_.reserve(related_scan_cnt)); + OZ(related_rtdefs_.reserve(related_scan_cnt)); + OZ(related_tablet_ids_.reserve(related_scan_cnt)); + return ret; +} + +int ObDASScanOp::set_related_task_info(const ObDASBaseCtDef *lookup_ctdef, + ObDASBaseRtDef *lookup_rtdef, + const ObTabletID &tablet_id) +{ + int ret = OB_SUCCESS; + OZ(related_ctdefs_.push_back(lookup_ctdef)); + OZ(related_rtdefs_.push_back(lookup_rtdef)); + OZ(related_tablet_ids_.push_back(tablet_id)); + return ret; +} + +int ObDASScanOp::get_aux_lookup_tablet_id(common::ObTabletID &tablet_id) const +{ + int ret = OB_SUCCESS; + tablet_id.reset(); + const ObDASIRAuxLookupCtDef *aux_lookup_ctdef = nullptr; + ObDASIRAuxLookupRtDef *aux_lookup_rtdef = nullptr; + if (OB_FAIL(ObDASUtils::find_target_das_def(attach_ctdef_, + attach_rtdef_, + DAS_OP_IR_AUX_LOOKUP, + aux_lookup_ctdef, + aux_lookup_rtdef))) { + LOG_WARN("find aux lookup definition failed", K(ret)); } else { - related_tablet_ids_.at(0) = tablet_id; + for (int i = 0; !tablet_id.is_valid() && i < related_ctdefs_.count(); ++i) { + if (aux_lookup_ctdef->get_lookup_scan_ctdef() == related_ctdefs_.at(i)) { + tablet_id = related_tablet_ids_.at(i); + } + } + } + return ret; +} + +ObTabletID ObDASScanOp::get_table_lookup_tablet_id() const +{ + ObTabletID tablet_id; + if (get_lookup_ctdef() != nullptr) { + for (int i = 0; !tablet_id.is_valid() && i < related_ctdefs_.count(); ++i) { + if (get_lookup_ctdef() == related_ctdefs_.at(i)) { + tablet_id = related_tablet_ids_.at(i); + } + } + } + return tablet_id; +} + +int ObDASScanOp::do_text_retrieve(common::ObNewRowIterator *&retrieval_iter) +{ + int ret = OB_SUCCESS; + retrieval_iter = nullptr; + ObTextRetrievalOp *retrieval_op = nullptr; + ObTabletID inv_idx_tablet_id; + ObTabletID fwd_idx_tablet_id; + ObTabletID doc_id_idx_tablet_id; + const ObDASIRScanCtDef *ir_scan_ctdef = nullptr; + ObDASIRScanRtDef *ir_scan_rtdef = nullptr; + const ObDASIRAuxLookupCtDef *aux_lookup_ctdef = nullptr; + ObDASIRAuxLookupRtDef *aux_lookup_rtdef = nullptr; + const ObDASSortCtDef *sort_ctdef = nullptr; + ObDASSortRtDef *sort_rtdef = nullptr; + if (OB_ISNULL(retrieval_op = OB_NEWx(ObTextRetrievalOp, &op_alloc_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate text retrieval op", K(ret)); + } else if (FALSE_IT(retrieval_iter = retrieval_op)) { + } else if (OB_FAIL(get_text_ir_tablet_ids(inv_idx_tablet_id, fwd_idx_tablet_id, doc_id_idx_tablet_id))) { + LOG_WARN("failed to get text ir tablet ids", K(ret)); + } else if (OB_FAIL(ObDASUtils::find_target_das_def(attach_ctdef_, + attach_rtdef_, + DAS_OP_IR_SCAN, + ir_scan_ctdef, + ir_scan_rtdef))) { + LOG_WARN("find ir scan definition failed", K(ret)); + } else if (OB_FAIL(ObDASUtils::find_target_das_def(attach_ctdef_, + attach_rtdef_, + DAS_OP_IR_AUX_LOOKUP, + aux_lookup_ctdef, + aux_lookup_rtdef))) { + LOG_WARN("find aux lookup definition failed", K(ret)); + } else if (DAS_OP_SORT == aux_lookup_ctdef->get_doc_id_scan_ctdef()->op_type_) { + sort_ctdef = static_cast(aux_lookup_ctdef->get_doc_id_scan_ctdef()); + sort_rtdef = static_cast(aux_lookup_rtdef->get_doc_id_scan_rtdef()); + } + + if (FAILEDx(retrieval_op->init(ls_id_, + inv_idx_tablet_id, + fwd_idx_tablet_id, + doc_id_idx_tablet_id, + ir_scan_ctdef, + ir_scan_rtdef, + sort_ctdef, + sort_rtdef, + trans_desc_, + snapshot_))) { + LOG_WARN("failed to init text retrieval op", K(ret)); + } + return ret; +} + +int ObDASScanOp::do_text_retrieve_rescan() +{ + int ret = OB_SUCCESS; + ObTextRetrievalOp *text_retrieval_op = nullptr; + if (nullptr == result_ || result_->get_type() != ObNewRowIterator::IterType::ObLocalIndexLookupIterator) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected text retrieve rescan status", K(ret), KP_(result)); + } else { + ObTabletID inv_idx_tablet_id; + ObTabletID fwd_idx_tablet_id; + ObTabletID doc_id_idx_tablet_id; + ObTabletID aux_lookup_tablet_id; + const ObDASIRScanCtDef *ir_scan_ctdef = nullptr; + ObDASIRScanRtDef *ir_scan_rtdef = nullptr; + const ObDASIRAuxLookupCtDef *aux_lookup_ctdef = nullptr; + ObDASIRAuxLookupRtDef *aux_lookup_rtdef = nullptr; + const ObDASSortCtDef *sort_ctdef = nullptr; + ObDASSortRtDef *sort_rtdef = nullptr; + ObFullTextIndexLookupOp *text_lookup_op = static_cast(result_); + if (OB_FAIL(get_aux_lookup_tablet_id(aux_lookup_tablet_id))) { + LOG_WARN("failed to get doc id idx tablet id", K(ret), K_(related_tablet_ids)); + } else if (FALSE_IT(text_retrieval_op = static_cast( + text_lookup_op->get_text_retrieval_iter()))) { + } else if (OB_FAIL(get_text_ir_tablet_ids(inv_idx_tablet_id, fwd_idx_tablet_id, doc_id_idx_tablet_id))) { + LOG_WARN("failed to get text ir tablet ids", K(ret)); + } else if (OB_UNLIKELY(doc_id_idx_tablet_id != aux_lookup_tablet_id)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected aux lookup tablet id is not doc id idx tablet id", K(ret), + K(doc_id_idx_tablet_id), K(aux_lookup_tablet_id)); + } else if (OB_FAIL(ObDASUtils::find_target_das_def(attach_ctdef_, + attach_rtdef_, + DAS_OP_IR_SCAN, + ir_scan_ctdef, + ir_scan_rtdef))) { + LOG_WARN("find ir scan definition failed", K(ret)); + } else if (OB_FAIL(ObDASUtils::find_target_das_def(attach_ctdef_, + attach_rtdef_, + DAS_OP_IR_AUX_LOOKUP, + aux_lookup_ctdef, + aux_lookup_rtdef))) { + LOG_WARN("find aux lookup definition failed", K(ret)); + } else if (DAS_OP_SORT == aux_lookup_ctdef->get_doc_id_scan_ctdef()->op_type_) { + sort_ctdef = static_cast(aux_lookup_ctdef->get_doc_id_scan_ctdef()); + sort_rtdef = static_cast(aux_lookup_rtdef->get_doc_id_scan_rtdef()); + } + + if (OB_FAIL(ret)) { + } else if (nullptr != text_retrieval_op + && OB_FAIL(text_retrieval_op->rescan(ls_id_, + inv_idx_tablet_id, + fwd_idx_tablet_id, + doc_id_idx_tablet_id, + ir_scan_ctdef, + ir_scan_rtdef, + sort_ctdef, + sort_rtdef, + trans_desc_, + snapshot_))) { + LOG_WARN("failed to do text retrieval rescan", K(ret)); + } else { + text_lookup_op->set_tablet_id(get_table_lookup_tablet_id()); + text_lookup_op->set_ls_id(ls_id_); + text_lookup_op->set_doc_id_idx_tablet_id(aux_lookup_tablet_id); + } + } + return ret; +} + +int ObDASScanOp::get_text_ir_tablet_ids( + common::ObTabletID &inv_idx_tablet_id, + common::ObTabletID &fwd_idx_tablet_id, + common::ObTabletID &doc_id_idx_tablet_id) +{ + int ret = OB_SUCCESS; + inv_idx_tablet_id.reset(); + fwd_idx_tablet_id.reset(); + doc_id_idx_tablet_id.reset(); + if (OB_UNLIKELY(related_ctdefs_.count() != related_tablet_ids_.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected related scan array not match", K(ret), K_(related_ctdefs), K_(related_tablet_ids)); + } + for (int64_t i= 0; OB_SUCC(ret) && i < related_ctdefs_.count(); ++i) { + const ObDASScanCtDef *ctdef = static_cast(related_ctdefs_.at(i)); + switch (ctdef->ir_scan_type_) { + case ObTSCIRScanType::OB_NOT_A_SPEC_SCAN: { + break; + } + case ObTSCIRScanType::OB_IR_INV_IDX_SCAN: + case ObTSCIRScanType::OB_IR_INV_IDX_AGG: { + inv_idx_tablet_id = related_tablet_ids_.at(i); + break; + } + case ObTSCIRScanType::OB_IR_DOC_ID_IDX_AGG: { + doc_id_idx_tablet_id = related_tablet_ids_.at(i); + break; + } + case ObTSCIRScanType::OB_IR_FWD_IDX_AGG: { + fwd_idx_tablet_id = related_tablet_ids_.at(i); + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpeted ir scan type", K(ret), KPC(ctdef)); + } + } } return ret; } @@ -754,6 +1091,18 @@ OB_SERIALIZE_MEMBER((ObDASScanOp, ObIDASTaskOp), OB_SERIALIZE_MEMBER(ObDASObsoletedObj, flag_); +ObDASGroupScanOp::ObDASGroupScanOp(ObIAllocator &op_alloc) + : ObDASScanOp(op_alloc) +{ + +} + +ObDASGroupScanOp::~ObDASGroupScanOp() +{ +} + +OB_SERIALIZE_MEMBER((ObDASGroupScanOp, ObDASScanOp)); + ObDASScanResult::ObDASScanResult() : ObIDASTaskResult(), ObNewRowIterator(), @@ -985,57 +1334,51 @@ int ObLocalIndexLookupOp::get_next_rows(int64_t &count, int64_t capacity) int ObLocalIndexLookupOp::get_next_row_from_index_table() { int ret = OB_SUCCESS; - bool got_row = false; - do { - if (OB_FAIL(rowkey_iter_->get_next_row())) { - if (OB_ITER_END != ret) { - LOG_WARN("get next row from index scan failed", K(ret)); - } else if (is_group_scan()) { - // switch to next index group - if (OB_FAIL(switch_rowkey_scan_group())) { - if (OB_ITER_END != ret) { - LOG_WARN("rescan index operator failed", K(ret)); - } else { - LOG_DEBUG("switch group end",K(get_index_group_cnt()), K(lookup_rowkey_cnt_), KP(this)); - } - } else { - inc_index_group_cnt(); - LOG_DEBUG("switch to next index batch to fetch rowkey",K(get_index_group_cnt()), K(lookup_rowkey_cnt_), KP(this)); - } - } - } else { - got_row = true; + if (OB_FAIL(rowkey_iter_->get_next_row())) { + if (OB_ITER_END != ret) { + LOG_WARN("get next row from index scan failed", K(ret)); } - } while (OB_SUCC(ret)&& !got_row); + } + return ret; +} + +int ObLocalIndexLookupOp::get_next_rows_from_index_table(int64_t &count, int64_t capacity) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(rowkey_iter_->get_next_rows(count, capacity))) { + if (OB_ITER_END != ret) { + LOG_WARN("get next row from index scan failed", K(ret)); + } + } return ret; } int ObLocalIndexLookupOp::process_data_table_rowkey() { int ret = OB_SUCCESS; - // for group scan lookup, das result output of index - // contain rowkey and group_idx_expr, so when build rowkey range, - // need remove group_idx_expr - int64_t rowkey_cnt = is_group_scan() ? index_ctdef_->result_output_.count() - 1 - : index_ctdef_->result_output_.count(); + int64_t rowkey_cnt = index_ctdef_->result_output_.count(); + if (nullptr != index_ctdef_->group_id_expr_) { + rowkey_cnt -= 1; + } + if (nullptr != index_ctdef_->trans_info_expr_) { + rowkey_cnt -= 1; + } + int64_t group_idx = 0; ObObj *obj_ptr = nullptr; void *buf = nullptr; common::ObArenaAllocator& lookup_alloc = lookup_memctx_->get_arena_allocator(); ObNewRange lookup_range; - if (index_ctdef_->trans_info_expr_ != nullptr) { - rowkey_cnt = rowkey_cnt - 1; - } if (OB_ISNULL(buf = lookup_alloc.alloc(sizeof(ObObj) * rowkey_cnt))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("allocate buffer failed", K(ret), K(rowkey_cnt)); } else { - obj_ptr = new(buf) ObObj[rowkey_cnt]; + obj_ptr = new (buf) ObObj[rowkey_cnt]; } - for (int64_t i = 0; OB_SUCC(ret) && i < rowkey_cnt; ++i) { + for (int64_t i = 0; OB_SUCC(ret) && i < index_ctdef_->result_output_.count(); ++i) { ObObj tmp_obj; ObExpr *expr = index_ctdef_->result_output_.at(i); if (T_PSEUDO_GROUP_ID == expr->type_) { - // do nothing + group_idx = expr->locate_expr_datum(*lookup_rtdef_->eval_ctx_).get_int(); } else if (T_PSEUDO_ROW_TRANS_INFO_COLUMN == expr->type_) { // do nothing } else { @@ -1069,7 +1412,7 @@ int ObLocalIndexLookupOp::process_data_table_rowkey() uint64_t ref_table_id = lookup_ctdef_->ref_table_id_; if (OB_FAIL(lookup_range.build_range(ref_table_id, table_rowkey))) { LOG_WARN("build lookup range failed", K(ret), K(ref_table_id), K(table_rowkey)); - } else if (FALSE_IT(lookup_range.group_idx_ = get_index_group_cnt() - 1)) { + } else if (FALSE_IT(lookup_range.group_idx_ = group_idx)) { } else if (OB_FAIL(scan_param_.key_ranges_.push_back(lookup_range))) { LOG_WARN("store lookup key range failed", K(ret), K(scan_param_)); } @@ -1146,7 +1489,6 @@ int ObLocalIndexLookupOp::get_next_row_from_data_table() int ObLocalIndexLookupOp::get_next_rows_from_data_table(int64_t &count, int64_t capacity) { int ret = OB_SUCCESS; - LOG_DEBUG("local index lookup output rows", K(lookup_row_cnt_), K(get_index_group_cnt()), K(get_lookup_group_cnt()), K(lookup_rowkey_cnt_)); lookup_rtdef_->p_pd_expr_op_->clear_evaluated_flag(); if (scan_param_.key_ranges_.empty()) { ret = OB_ITER_END; @@ -1160,57 +1502,18 @@ int ObLocalIndexLookupOp::get_next_rows_from_data_table(int64_t &count, int64_t return ret; } -int ObLocalIndexLookupOp::process_next_index_batch_for_row() -{ - int ret = OB_SUCCESS; - if (need_next_index_batch()) { - reset_lookup_state(); - index_end_ = false; - state_ = INDEX_SCAN; - } else { - state_ = FINISHED; - } - return ret; - -} - -int ObLocalIndexLookupOp::process_next_index_batch_for_rows(int64_t &count) -{ - int ret = OB_SUCCESS; - if (OB_FAIL(check_lookup_row_cnt())) { - LOG_WARN("check lookup row cnt failed", K(ret)); - } else if (need_next_index_batch()) { - reset_lookup_state(); - index_end_ = false; - state_ = INDEX_SCAN; - ret = OB_SUCCESS; - } else { - state_ = FINISHED; - } - return ret; -} - -bool ObLocalIndexLookupOp::need_next_index_batch() const -{ - return !index_end_; -} - int ObLocalIndexLookupOp::check_lookup_row_cnt() { int ret = OB_SUCCESS; - //In group scan the jump read may happend, so the lookup_group_cnt and lookup_rowkey_cnt_ mismatch. if (GCONF.enable_defensive_check() && !is_group_scan_ && lookup_ctdef_->pd_expr_spec_.pushdown_filters_.empty()) { - if (OB_UNLIKELY(lookup_rowkey_cnt_ != lookup_row_cnt_) - && get_index_group_cnt() == get_lookup_group_cnt()) { + if (OB_UNLIKELY(lookup_rowkey_cnt_ != lookup_row_cnt_)) { ret = OB_ERR_DEFENSIVE_CHECK; - ObString func_name = ObString::make_string("check_lookup_row_cnt"); + ObString func_name = ObString::make_string("check_local_lookup_row_cnt"); LOG_USER_ERROR(OB_ERR_DEFENSIVE_CHECK, func_name.length(), func_name.ptr()); LOG_ERROR("Fatal Error!!! Catch a defensive error!", K(ret), K_(lookup_rowkey_cnt), K_(lookup_row_cnt), - "index_group_cnt", get_index_group_cnt(), - "lookup_group_cnt", get_lookup_group_cnt(), "index_table_id", index_ctdef_->ref_table_id_ , "data_table_tablet_id", tablet_id_ , KPC_(snapshot), @@ -1240,59 +1543,6 @@ int ObLocalIndexLookupOp::check_lookup_row_cnt() return ret; } -int ObLocalIndexLookupOp::do_index_table_scan_for_rows(const int64_t max_row_cnt, - const int64_t start_group_idx, - const int64_t default_row_batch_cnt) -{ - int ret = OB_SUCCESS; - int64_t rowkey_count = 0; - while (OB_SUCC(ret) && lookup_rowkey_cnt_ < default_row_batch_cnt) { - int64_t batch_size = min(max_row_cnt, default_row_batch_cnt - lookup_rowkey_cnt_); - do_clear_evaluated_flag(); - ret = rowkey_iter_->get_next_rows(rowkey_count, batch_size); - if (OB_ITER_END == ret && rowkey_count > 0) { - ret = OB_SUCCESS; - } - if (OB_UNLIKELY(OB_SUCCESS != ret)) { - if (OB_ITER_END != ret) { - LOG_WARN("get next batch from index scan failed", K(ret)); - } else if (is_group_scan()) { - //switch to next index iterator, call child's rescan - if (OB_FAIL(switch_rowkey_scan_group())) { - if (OB_ITER_END != ret) { - LOG_WARN("rescan index operator failed", K(ret)); - } - } else { - inc_index_group_cnt(); - LOG_DEBUG("switch to next index batch to fetch rowkey", K(get_index_group_cnt()), K(lookup_rowkey_cnt_)); - } - } - } else if (OB_FAIL(process_data_table_rowkeys(rowkey_count, nullptr/*skip*/))) { - LOG_WARN("process data table rowkey with das failed", K(ret)); - } else { - lookup_rowkey_cnt_ += rowkey_count; - } - } - if (OB_SUCC(ret) || OB_ITER_END == ret) { - state_ = DO_LOOKUP; - index_end_ = (OB_ITER_END == ret); - ret = OB_SUCCESS; - if (is_group_scan()) { - OZ(init_group_range(start_group_idx, get_index_group_cnt())); - } - } - LOG_DEBUG("index scan end", K(state_), K(index_end_),K(start_group_idx), K(get_index_group_cnt()), K(ret)); - return ret; -} - -void ObLocalIndexLookupOp::update_state_in_output_rows_state(int64_t &count) -{ - lookup_row_cnt_ += count; -} - -void ObLocalIndexLookupOp::update_states_in_finish_state() -{ } - OB_INLINE ObITabletScan &ObLocalIndexLookupOp::get_tsc_service() { return is_virtual_table(lookup_ctdef_->ref_table_id_) ? @@ -1378,6 +1628,8 @@ int ObLocalIndexLookupOp::reset_lookup_state() int ret = OB_SUCCESS; state_ = INDEX_SCAN; index_end_ = false; + lookup_row_cnt_ = 0; + lookup_rowkey_cnt_ = 0; trans_info_array_.reuse(); lookup_rtdef_->stmt_allocator_.set_alloc(index_rtdef_->stmt_allocator_.get_alloc()); // Keep lookup_rtdef_->stmt_allocator_.alloc_ consistent with index_rtdef_->stmt_allocator_.alloc_ @@ -1415,26 +1667,5 @@ int ObLocalIndexLookupOp::revert_iter() return ret; } -int ObLocalIndexLookupOp::switch_index_table_and_rowkey_group_id() -{ - int ret = OB_SUCCESS; - if (is_group_scan_) { - //Do the group scan jump read. - //Now we support jump read in GroupScan iter. - //Some of row read from index maybe jump. - //We need to sync index_group_cnt with lookup_group_cnt. - //Because in the rescan we manipulate the lookup_group_cnt. - set_index_group_cnt(get_lookup_group_cnt()); - ret = set_rowkey_scan_group(get_lookup_group_cnt() - 1); - if (OB_SUCCESS != ret) { - LOG_WARN("set_rowkey_scan_group fail",K(get_lookup_group_cnt() - 1),K(ret)); - if (OB_ITER_END == ret) { - ret = OB_ERR_UNEXPECTED; - } - } - } - return ret; -} - } // namespace sql } // namespace oceanbase diff --git a/src/sql/das/ob_das_scan_op.h b/src/sql/das/ob_das_scan_op.h index 76485c1727..aeeaf633d3 100644 --- a/src/sql/das/ob_das_scan_op.h +++ b/src/sql/das/ob_das_scan_op.h @@ -16,14 +16,13 @@ #include "storage/access/ob_dml_param.h" #include "sql/engine/basic/ob_chunk_datum_store.h" #include "sql/engine/table/ob_index_lookup_op_impl.h" + namespace oceanbase { namespace sql { class ObDASExtraData; class ObLocalIndexLookupOp; -struct ObDASIRCtDef; -struct ObDASIRRtDef; struct ObDASScanCtDef : ObDASBaseCtDef { @@ -185,6 +184,7 @@ public: class ObDASScanOp : public ObIDASTaskOp { friend class DASOpResultIter; + friend class ObDASMergeIter; OB_UNIS_VERSION(1); public: ObDASScanOp(common::ObIAllocator &op_alloc); @@ -203,33 +203,33 @@ public: virtual const ObDASBaseCtDef *get_ctdef() const override { return scan_ctdef_; } virtual ObDASBaseRtDef *get_rtdef() override { return scan_rtdef_; } bool need_check_output_datum() const { return scan_rtdef_->need_check_output_datum_; } - virtual const ExprFixedArray &get_result_outputs() const - { return get_lookup_ctdef() != nullptr ? get_lookup_ctdef()->result_output_ : scan_ctdef_->result_output_; } + virtual const ExprFixedArray &get_result_outputs() const; void set_scan_ctdef(const ObDASScanCtDef *scan_ctdef) { scan_ctdef_ = scan_ctdef; } void set_scan_rtdef(ObDASScanRtDef *scan_rtdef) { scan_rtdef_ = scan_rtdef; } + int reserve_related_buffer(const int64_t related_scan_cnt); + int set_related_task_info(const ObDASBaseCtDef *attach_ctdef, + ObDASBaseRtDef *attach_rtdef, + const common::ObTabletID &tablet_id); //only used in local index lookup, it it nullptr when scan data table or scan index table - int set_lookup_ctdef(const ObDASScanCtDef *lookup_ctdef) - { related_ctdefs_.set_capacity(1); return related_ctdefs_.push_back(lookup_ctdef); } - int set_lookup_rtdef(ObDASScanRtDef *lookup_rtdef) - { related_rtdefs_.set_capacity(1); return related_rtdefs_.push_back(lookup_rtdef); } - //only used in local index lookup, it it nullptr when scan data table or scan index table - const ObDASScanCtDef *get_lookup_ctdef() const - { return related_ctdefs_.empty() ? nullptr : static_cast(related_ctdefs_.at(0)); } - ObDASScanRtDef *get_lookup_rtdef() - { return related_rtdefs_.empty() ? nullptr : static_cast(related_rtdefs_.at(0)); } - int set_lookup_tablet_id(const common::ObTabletID &tablet_id); + const ObDASScanCtDef *get_lookup_ctdef() const; + ObDASScanRtDef *get_lookup_rtdef(); + int get_aux_lookup_tablet_id(common::ObTabletID &tablet_id) const; + common::ObTabletID get_table_lookup_tablet_id() const; int init_scan_param(); - virtual int rescan(); - virtual int reuse_iter(); - virtual void reset_access_datums_ptr() override; - virtual ObLocalIndexLookupOp *get_lookup_op(); - ObExpr *get_group_id_expr() { return scan_ctdef_->group_id_expr_; } - bool is_group_scan() { return NULL != scan_ctdef_->group_id_expr_; } + int rescan(); + int reuse_iter(); + void reset_access_datums_ptr(int64_t capacity = 0); + ObLocalIndexLookupOp *get_lookup_op(); bool is_contain_trans_info() {return NULL != scan_ctdef_->trans_info_expr_; } + int do_table_scan(); + int do_domain_index_lookup(); + int do_text_retrieve(common::ObNewRowIterator *&retrieval_iter); + int do_text_retrieve_rescan(); + int get_text_ir_tablet_ids( + common::ObTabletID &inv_idx_tablet_id, + common::ObTabletID &fwd_idx_tablet_id, + common::ObTabletID &doc_id_idx_tablet_id); bool enable_rich_format() const { return scan_rtdef_->enable_rich_format(); } - virtual bool need_all_output() { return false; } - virtual int switch_scan_group() { return common::OB_SUCCESS; }; - virtual int set_scan_group(int64_t group_id) { UNUSED(group_id); return common::OB_NOT_IMPLEMENT; }; INHERIT_TO_STRING_KV("parent", ObIDASTaskOp, KPC_(scan_ctdef), KPC_(scan_rtdef), @@ -238,9 +238,9 @@ public: "scan_flag", scan_param_.scan_flag_); protected: common::ObITabletScan &get_tsc_service(); - virtual int do_local_index_lookup(); - virtual common::ObNewRowIterator *get_storage_scan_iter(); - virtual common::ObNewRowIterator *get_output_result_iter() { return result_; } + int do_local_index_lookup(); + common::ObNewRowIterator *get_storage_scan_iter(); + common::ObNewRowIterator *get_output_result_iter() { return result_; } public: ObSEArray trans_info_array_; protected: @@ -323,23 +323,6 @@ public: lookup_memctx_(), status_(0) {} - ObLocalIndexLookupOp(const ObNewRowIterator::IterType iter_type) - : ObNewRowIterator(iter_type), - ObIndexLookupOpImpl(LOCAL_INDEX, 1000 /*default_batch_row_count */), - lookup_ctdef_(nullptr), - lookup_rtdef_(nullptr), - index_ctdef_(nullptr), - index_rtdef_(nullptr), - tx_desc_(nullptr), - snapshot_(nullptr), - rowkey_iter_(nullptr), - lookup_iter_(), - tablet_id_(), - ls_id_(), - scan_param_(), - lookup_memctx_(), - status_(0) - {} virtual ~ObLocalIndexLookupOp(); @@ -355,48 +338,27 @@ public: virtual void reset() override { } virtual void do_clear_evaluated_flag() override {index_rtdef_->p_pd_expr_op_->clear_evaluated_flag();} + virtual int reset_lookup_state() override; virtual int get_next_row_from_index_table() override; + virtual int get_next_rows_from_index_table(int64_t &count, int64_t capacity) override; virtual int process_data_table_rowkey() override; virtual int process_data_table_rowkeys(const int64_t size, const ObBitVector *skip) override; - virtual bool is_group_scan() const override { return is_group_scan_; } - virtual int init_group_range(int64_t cur_group_idx, int64_t group_size) override { return common::OB_NOT_IMPLEMENT; } virtual int do_index_lookup() override; virtual int get_next_row_from_data_table() override; virtual int get_next_rows_from_data_table(int64_t &count, int64_t capacity) override; - virtual int process_next_index_batch_for_row() override; - virtual int process_next_index_batch_for_rows(int64_t &count) override; - virtual bool need_next_index_batch() const override; virtual int check_lookup_row_cnt() override; - virtual int do_index_table_scan_for_rows(const int64_t max_row_cnt, - const int64_t start_group_idx, - const int64_t default_row_batch_cnt) override; - virtual void update_state_in_output_rows_state(int64_t &count) override; - virtual void update_states_in_finish_state() override; - virtual void update_states_after_finish_state() override {} + virtual ObEvalCtx & get_eval_ctx() override {return *(lookup_rtdef_->eval_ctx_);} virtual const ExprFixedArray & get_output_expr() override {return lookup_ctdef_->pd_expr_spec_.access_exprs_; } - // for lookup group scan - virtual int64_t get_index_group_cnt() const override { return 0; } - virtual int64_t get_lookup_group_cnt() const override { return 0; } - virtual void set_index_group_cnt(int64_t group_cnt_) { UNUSED(group_cnt_); /*do nothing*/ } - virtual void inc_index_group_cnt() { /*do nothing*/ } - virtual void inc_lookup_group_cnt() { /*do nothing*/ } - virtual int switch_rowkey_scan_group() { return common::OB_NOT_IMPLEMENT; } - virtual int set_rowkey_scan_group(int64_t group_id) { UNUSED(group_id); return common::OB_NOT_IMPLEMENT; } - virtual int switch_lookup_scan_group() { return common::OB_NOT_IMPLEMENT; } - virtual int set_lookup_scan_group(int64_t group_id) { UNUSED(group_id); return common::OB_NOT_IMPLEMENT; } - virtual ObNewRowIterator *&get_lookup_storage_iter() { return lookup_iter_; } - virtual ObNewRowIterator *get_lookup_iter() { return lookup_iter_; } - virtual int switch_index_table_and_rowkey_group_id() override; + ObNewRowIterator *&get_lookup_storage_iter() { return lookup_iter_; } + ObNewRowIterator *get_lookup_iter() { return lookup_iter_; } void set_is_group_scan(bool v) { is_group_scan_ = v; } - // for lookup group scan end - + bool is_group_scan() const { return is_group_scan_; } void set_tablet_id(const common::ObTabletID &tablet_id) { tablet_id_ = tablet_id; } void set_ls_id(const share::ObLSID &ls_id) { ls_id_ = ls_id; } void set_rowkey_iter(common::ObNewRowIterator *rowkey_iter) {rowkey_iter_ = rowkey_iter;} common::ObNewRowIterator *get_rowkey_iter() { return rowkey_iter_; } int reuse_iter(); - virtual int reset_lookup_state(); virtual int revert_iter(); VIRTUAL_TO_STRING_KV(KPC_(lookup_ctdef), KPC_(lookup_rtdef), @@ -406,9 +368,9 @@ public: K_(ls_id), K_(state), K_(index_end)); -private: - int init_scan_param(); common::ObITabletScan &get_tsc_service(); +protected: + virtual int init_scan_param(); protected: const ObDASScanCtDef *lookup_ctdef_; //lookup ctdef ObDASScanRtDef *lookup_rtdef_; //lookup rtdef @@ -416,33 +378,9 @@ protected: ObDASScanRtDef *index_rtdef_; transaction::ObTxDesc *tx_desc_; transaction::ObTxReadSnapshot *snapshot_; - // for group scan: - // local das : - // rowkey_iter_ is ObGroupScanIter - // remote das : - // local server: rowkey_iter_ not used - // remote server: rowkey_iter_ is ObGroupScanIter - // for normal scan: - // local das : - // rowkey_iter_ is storage_iter - // remote das: - // local server: rowkey_iter_ not used - // remote server: rowkey_iter_ is storage_iter + // Local index lookup is executed within a DAS task, whether executed locally or remotely, + // both index scan and lookup are completed on the same machine. common::ObNewRowIterator *rowkey_iter_; - // for group scan: - // local das: - // lookup_iter_ is ObGroupScanIter - // remote das: - // local server: - // lookup_iter_ is ObGroupScanIter, and the input of ObGroupScanIter is ObDASScanResult - // remote server: - // lookup_iter_ is ObGroupScanIter, and the input of ObGroupScanIter is storage iter, - // Here, TODO shengle: the lookup_iter_ can use storage iter directly for opt; - // for normal scan, - // local das: lookup_iter_ is storage_iter - // remote das: - // local server: lookup_iter_ not used - // remote server: lookup_iter_ is storage_iter common::ObNewRowIterator *lookup_iter_; common::ObTabletID tablet_id_; share::ObLSID ls_id_; @@ -458,6 +396,19 @@ protected: }; }; }; + +// NOTE: ObDASGroupScanOp defined here is For cross-version compatibility, and it will be removed in future barrier-version; +// For das remote execution in upgrade stage, ctrl(4.2.1) -> executor (4.2.3) +// the executor will execute das group-rescan op as the logic of das-scan op, and return the result to ctr; +class ObDASGroupScanOp : public ObDASScanOp +{ + OB_UNIS_VERSION(1); +public: + ObDASGroupScanOp(common::ObIAllocator &op_alloc); + virtual ~ObDASGroupScanOp(); +}; + + } // namespace sql } // namespace oceanbase #endif /* OBDEV_SRC_SQL_DAS_OB_DAS_SCAN_OP_H_ */ diff --git a/src/sql/das/ob_das_simple_op.h b/src/sql/das/ob_das_simple_op.h index 87b4eabcee..62e2fc7f3a 100644 --- a/src/sql/das/ob_das_simple_op.h +++ b/src/sql/das/ob_das_simple_op.h @@ -38,6 +38,43 @@ public: virtual int swizzling_remote_task(ObDASRemoteInfo *remote_info) override; }; +class ObDASEmptyOp : public ObDASSimpleOp +{ +public: + ObDASEmptyOp(common::ObIAllocator &op_alloc) + : ObDASSimpleOp(op_alloc) + { + } + virtual ~ObDASEmptyOp() = default; + virtual int open_op() override { return common::OB_NOT_IMPLEMENT; } + virtual int fill_task_result(ObIDASTaskResult &task_result, bool &has_more, int64_t &memory_limit) override + { + UNUSEDx(task_result, has_more, memory_limit); + return common::OB_NOT_IMPLEMENT; + } + virtual int decode_task_result(ObIDASTaskResult *task_result) override + { + UNUSEDx(task_result); + return common::OB_NOT_IMPLEMENT; + } +}; + +class ObDASEmptyResult : public ObIDASTaskResult +{ +public: + ObDASEmptyResult() {} + virtual ~ObDASEmptyResult() {} + virtual int init(const ObIDASTaskOp &op, common::ObIAllocator &alloc) override + { + UNUSEDx(op, alloc); + return common::OB_NOT_IMPLEMENT; + } + virtual int reuse() override + { + return common::OB_NOT_IMPLEMENT; + } +}; + struct ObDASEmptyCtDef : ObDASBaseCtDef { OB_UNIS_VERSION(1); diff --git a/src/sql/das/ob_das_spatial_index_lookup_op.cpp b/src/sql/das/ob_das_spatial_index_lookup_op.cpp index a14c1028c6..5a0a78f416 100644 --- a/src/sql/das/ob_das_spatial_index_lookup_op.cpp +++ b/src/sql/das/ob_das_spatial_index_lookup_op.cpp @@ -93,7 +93,7 @@ int ObSpatialIndexLookupOp::filter_by_mbr(const ObObj &mbr_obj, bool &pass_throu ObSpatialMBR idx_spa_mbr; bool is_point = (WKB_POINT_DATA_SIZE == mbr_str.length()); - if (OB_FAIL(ObSpatialMBR::from_string(mbr_str, ObGeoRelationType::T_INVALID, idx_spa_mbr, is_point))) { + if (OB_FAIL(ObSpatialMBR::from_string(mbr_str, ObDomainOpType::T_INVALID, idx_spa_mbr, is_point))) { LOG_WARN("fail to create index spatial mbr", K(ret), K(mbr_obj)); } else { idx_spa_mbr.is_point_ = is_point; @@ -125,9 +125,17 @@ int ObSpatialIndexLookupOp::save_rowkeys() } else if (last_rowkey_ != *idx_row) { ObNewRange lookup_range; uint64_t ref_table_id = lookup_ctdef_->ref_table_id_; + int64_t group_idx = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < index_ctdef_->result_output_.count(); ++i) { + ObObj tmp_obj; + ObExpr *expr = index_ctdef_->result_output_.at(i); + if (T_PSEUDO_GROUP_ID == expr->type_) { + group_idx = expr->locate_expr_datum(*lookup_rtdef_->eval_ctx_).get_int(); + } + } if (OB_FAIL(lookup_range.build_range(ref_table_id, *idx_row))) { LOG_WARN("build lookup range failed", K(ret), K(ref_table_id), K(*idx_row)); - } else if (FALSE_IT(lookup_range.group_idx_ = get_index_group_cnt() - 1)) { + } else if (FALSE_IT(lookup_range.group_idx_ = group_idx)) { } else if (OB_FAIL(scan_param_.key_ranges_.push_back(lookup_range))) { LOG_WARN("store lookup key range failed", K(ret), K(scan_param_)); } @@ -159,20 +167,6 @@ int ObSpatialIndexLookupOp::get_next_row() if (OB_FAIL(rowkey_iter_->get_next_row())) { if (OB_ITER_END != ret) { LOG_WARN("get next row from index scan failed", K(ret)); - } else if (is_group_scan_) { - // switch to next index group - if (OB_FAIL(switch_rowkey_scan_group())) { - if (OB_ITER_END != ret) { - LOG_WARN("rescan index operator failed", K(ret)); - } else { - LOG_DEBUG("switch group end", - K(get_index_group_cnt()), K(lookup_rowkey_cnt_), KP(this)); - } - } else { - inc_index_group_cnt(); - LOG_DEBUG("switch to next index batch to fetch rowkey", - K(get_index_group_cnt()), K(lookup_rowkey_cnt_), KP(this)); - } } } else if (OB_FAIL(process_data_table_rowkey())) { LOG_WARN("process data table rowkey with das failed", K(ret)); @@ -222,7 +216,7 @@ int ObSpatialIndexLookupOp::get_next_row() } else if (OB_FAIL(lookup_iter_->get_next_row())) { if (OB_ITER_END == ret) { ret = OB_SUCCESS; - if (need_next_index_batch()) { + if (!index_end_) { // reuse lookup_iter_ only ObLocalIndexLookupOp::reset_lookup_state(); index_end_ = false; diff --git a/src/sql/das/ob_das_task.cpp b/src/sql/das/ob_das_task.cpp index d89cc8be65..a88e06968c 100644 --- a/src/sql/das/ob_das_task.cpp +++ b/src/sql/das/ob_das_task.cpp @@ -83,10 +83,24 @@ OB_DEF_SERIALIZE(ObDASRemoteInfo) OB_UNIS_ENCODE(session_id_); OB_UNIS_ENCODE(plan_id_); OB_UNIS_ENCODE(plan_hash_); - - // placeholder for serialize the reference relationship between ctdefs and rtdefs. - // Full logic here requires some complicated data structure refactor on fts branch for ver 4.3.1. - // double check compatiblity before merge to master + //Serializing the reference relationship between ctdefs and rtdefs. + for (int i = 0; OB_SUCC(ret) && i < ctdefs_.count(); ++i) { + const ObDASBaseCtDef *ctdef = ctdefs_.at(i); + OB_UNIS_ENCODE(ctdef->children_cnt_); + for (int j = 0; OB_SUCC(ret) && j < ctdef->children_cnt_; ++j) { + const ObDASBaseCtDef *child_ctdef = ctdef->children_[j]; + OB_UNIS_ENCODE(child_ctdef); + } + } + for (int i = 0; OB_SUCC(ret) && i < rtdefs_.count(); ++i) { + ObDASBaseRtDef *rtdef = rtdefs_.at(i); + OB_UNIS_ENCODE(rtdef->ctdef_); + OB_UNIS_ENCODE(rtdef->children_cnt_); + for (int j = 0; OB_SUCC(ret) && j < rtdef->children_cnt_; ++j) { + ObDASBaseRtDef *child_rtdef = rtdef->children_[j]; + OB_UNIS_ENCODE(child_rtdef); + } + } return ret; } @@ -191,10 +205,42 @@ OB_DEF_DESERIALIZE(ObDASRemoteInfo) OB_UNIS_DECODE(session_id_); OB_UNIS_DECODE(plan_id_); OB_UNIS_DECODE(plan_hash_); - - // placeholder for serialize the reference relationship between ctdefs and rtdefs. - // Full logic here requires some complicated data structure refactor on fts branch for ver 4.3.1. - // double check compatiblity before merge to master + //rebuilding the reference relationship between ctdefs and rtdefs after deserialization. + for (int i = 0; OB_SUCC(ret) && i < ctdefs_.count(); ++i) { + ObDASBaseCtDef *ctdef = const_cast(ctdefs_.at(i)); + OB_UNIS_DECODE(ctdef->children_cnt_); + if (OB_SUCC(ret) && ctdef->children_cnt_ > 0) { + if (OB_ISNULL(ctdef->children_ = OB_NEW_ARRAY(ObDASBaseCtDef*, &exec_ctx_->get_allocator(), ctdef->children_cnt_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate ctdef children_ failed", K(ret), K(ctdef->children_cnt_)); + } + } + for (int j = 0; OB_SUCC(ret) && j < ctdef->children_cnt_; ++j) { + const ObDASBaseCtDef *child_ctdef = nullptr; + OB_UNIS_DECODE(child_ctdef); + if (OB_SUCC(ret)) { + ctdef->children_[j] = const_cast(child_ctdef); + } + } + } + for (int i = 0; OB_SUCC(ret) && i < rtdefs_.count(); ++i) { + ObDASBaseRtDef *rtdef = rtdefs_.at(i); + OB_UNIS_DECODE(rtdef->ctdef_); + OB_UNIS_DECODE(rtdef->children_cnt_); + if (OB_SUCC(ret) && rtdef->children_cnt_ > 0) { + if (OB_ISNULL(rtdef->children_ = OB_NEW_ARRAY(ObDASBaseRtDef*, &exec_ctx_->get_allocator(), rtdef->children_cnt_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate rtdef children_ failed", K(ret), K(rtdef->children_cnt_)); + } + } + for (int j = 0; OB_SUCC(ret) && j < rtdef->children_cnt_; ++j) { + ObDASBaseRtDef *child_rtdef = nullptr; + OB_UNIS_DECODE(child_rtdef); + if (OB_SUCC(ret)) { + rtdef->children_[j] = child_rtdef; + } + } + } return ret; } @@ -234,10 +280,24 @@ OB_DEF_SERIALIZE_SIZE(ObDASRemoteInfo) OB_UNIS_ADD_LEN(session_id_); OB_UNIS_ADD_LEN(plan_id_); OB_UNIS_ADD_LEN(plan_hash_); - - // placeholder for serialize the reference relationship between ctdefs and rtdefs. - // Full logic here requires some complicated data structure refactor on fts branch for ver 4.3.1. - // double check compatiblity before merge to master + //Serializing the reference relationship between ctdefs and rtdefs. + for (int i = 0; i < ctdefs_.count(); ++i) { + const ObDASBaseCtDef *ctdef = ctdefs_.at(i); + OB_UNIS_ADD_LEN(ctdef->children_cnt_); + for (int j = 0; j < ctdef->children_cnt_; ++j) { + const ObDASBaseCtDef *child_ctdef = ctdef->children_[j]; + OB_UNIS_ADD_LEN(child_ctdef); + } + } + for (int i = 0; i < rtdefs_.count(); ++i) { + ObDASBaseRtDef *rtdef = rtdefs_.at(i); + OB_UNIS_ADD_LEN(rtdef->ctdef_); + OB_UNIS_ADD_LEN(rtdef->children_cnt_); + for (int j = 0; j < rtdef->children_cnt_; ++j) { + ObDASBaseRtDef *child_rtdef = rtdef->children_[j]; + OB_UNIS_ADD_LEN(child_rtdef); + } + } return len; } diff --git a/src/sql/das/ob_das_task.h b/src/sql/das/ob_das_task.h index de1bf4f03b..56ec19994f 100644 --- a/src/sql/das/ob_das_task.h +++ b/src/sql/das/ob_das_task.h @@ -139,6 +139,8 @@ public: void set_ls_id(const share::ObLSID &ls_id) { ls_id_ = ls_id; } const share::ObLSID &get_ls_id() const { return ls_id_; } void set_tablet_loc(const ObDASTabletLoc *tablet_loc) { tablet_loc_ = tablet_loc; } + // tablet_loc_ will not be serialized, therefore it cannot be accessed during the execution phase + // of DASTaskOp. It can only be touched through das_ref and data_access_service layer. const ObDASTabletLoc *get_tablet_loc() const { return tablet_loc_; } inline int64_t get_ref_table_id() const { return tablet_loc_->loc_meta_->ref_table_id_; } virtual int decode_task_result(ObIDASTaskResult *task_result) = 0; @@ -168,6 +170,9 @@ public: DasTaskNode &get_node() { return das_task_node_; } int get_errcode() const { return errcode_; } void set_errcode(int errcode) { errcode_ = errcode; } + void set_attach_ctdef(const ObDASBaseCtDef *attach_ctdef) { attach_ctdef_ = attach_ctdef; } + void set_attach_rtdef(ObDASBaseRtDef *attach_rtdef) { attach_rtdef_ = attach_rtdef; } + ObDASBaseRtDef *get_attach_rtdef() { return attach_rtdef_; } VIRTUAL_TO_STRING_KV(K_(tenant_id), K_(task_id), K_(op_type), @@ -254,17 +259,26 @@ protected: int16_t write_branch_id_; // branch id for parallel write, required for partially rollback common::ObTabletID tablet_id_; share::ObLSID ls_id_; - const ObDASTabletLoc *tablet_loc_; //does not need serialize it + // tablet_loc_ will not be serialized, therefore it cannot be accessed during the execution phase + // of DASTaskOp. It can only be touched through das_ref and data_access_service layer. + const ObDASTabletLoc *tablet_loc_; common::ObIAllocator &op_alloc_; - //in DML DAS Task,related_ctdefs_ means related local index ctdefs - //in Scan DAS Task, related_ctdefs_ have only one element, means the lookup ctdef + //In DML DAS Task,related_ctdefs_ means related local index ctdefs + //In Scan DAS Task for normal secondary index, related_ctdefs_ have only one element, means the lookup ctdef + //In Scan DAS TASK for domain index, related_ctdefs_ means related local index scan ctdefs, + //For detailed arrangement information, please refer to the description in ObDASScanOp. + //The related_ctdef is used solely to retain the fundamental computational information executed with the data table and its index table, + //such as insert_ctdef, scan_ctdef, etc. + //It does not include other pushed-down operations bound and executed with the task, + //such as aux lookup ctdef, etc. DASCtDefFixedArray related_ctdefs_; DASRtDefFixedArray related_rtdefs_; + //The related_tablet_ids_ usually correspond to the related_ctdefs information. ObTabletIDFixedArray related_tablet_ids_; ObDasTaskStatus task_status_; // do not serialize DasTaskNode das_task_node_; // tasks's linked list node, do not serialize - ObDasAggregatedTasks *agg_tasks_; // task's agg task, do not serialize - DasTaskLinkedList *cur_agg_list_; // task's agg_list, do not serialize + ObDasAggregatedTasks *agg_tasks_; //task's agg task, do not serialize + DasTaskLinkedList *cur_agg_list_; //task's agg_list, do not serialize ObIDASTaskResult *op_result_; //The attach_ctdef describes the computations that are pushed down and executed as an attachment to the ObDASTaskOp, //such as the back table operation for full-text indexes, diff --git a/src/sql/das/ob_das_task_result.cpp b/src/sql/das/ob_das_task_result.cpp index dd6235e08c..b94dd13c29 100644 --- a/src/sql/das/ob_das_task_result.cpp +++ b/src/sql/das/ob_das_task_result.cpp @@ -228,11 +228,6 @@ int ObDASTaskResultMgr::save_task_result(int64_t task_id, if (OB_FAIL(result.get_next_row())) { if (OB_ITER_END != ret) { LOG_WARN("get next row from result failed", KR(ret)); - } else if (scan_op.need_all_output()) { - ret = scan_op.switch_scan_group(); - if (OB_SUCC(ret)) { - continue; - } } } else { if (OB_FAIL(datum_store.try_add_row(*output_exprs, @@ -254,16 +249,8 @@ int ObDASTaskResultMgr::save_task_result(int64_t task_id, if (OB_ITER_END != ret) { LOG_WARN("get next rows from result failed", KR(ret)); } else { - if (scan_op.need_all_output()) { - ret = scan_op.switch_scan_group(); - if (OB_SUCC(ret)) { - continue; - } - } - if (OB_ITER_END == ret) { - iter_end = true; - ret = OB_SUCCESS; - } + iter_end = true; + ret = OB_SUCCESS; } } if (OB_FAIL(ret) || 0 == read_rows) { @@ -377,16 +364,8 @@ int ObDASTaskResultMgr::save_task_result_by_vector(int64_t &read_rows, if (OB_ITER_END != ret) { LOG_WARN("get next rows from result failed", KR(ret)); } else { - if (scan_op.need_all_output()) { - ret = scan_op.switch_scan_group(); - if (OB_SUCC(ret)) { - continue; - } - } - if (OB_ITER_END == ret) { - iter_end = true; - ret = OB_SUCCESS; - } + iter_end = true; + ret = OB_SUCCESS; } } if (OB_FAIL(ret) || 0 == read_rows) { diff --git a/src/sql/das/ob_das_update_op.cpp b/src/sql/das/ob_das_update_op.cpp index 4a4a173874..e854e9b15c 100644 --- a/src/sql/das/ob_das_update_op.cpp +++ b/src/sql/das/ob_das_update_op.cpp @@ -16,6 +16,7 @@ #include "sql/engine/px/ob_px_util.h" #include "sql/engine/dml/ob_dml_service.h" #include "sql/das/ob_das_utils.h" +#include "sql/das/ob_das_domain_utils.h" #include "storage/tx_storage/ob_access_service.h" #include "sql/engine/expr/ob_expr_lob_utils.h" namespace oceanbase @@ -52,9 +53,9 @@ public: write_buffer_(write_buffer), old_row_(nullptr), new_row_(nullptr), + domain_iter_(nullptr), got_old_row_(false), - spat_rows_(nullptr), - spatial_row_idx_(0), + iter_has_built_(false), allocator_(alloc) { } @@ -65,26 +66,33 @@ public: virtual void reset() override { } int rewind(const ObDASDMLBaseCtDef *das_ctdef) { + int ret = common::OB_SUCCESS; old_row_ = nullptr; new_row_ = nullptr; got_old_row_ = false; - spatial_row_idx_ = 0; + iter_has_built_ = false; das_ctdef_ = static_cast(das_ctdef); - return OB_SUCCESS; + if (OB_NOT_NULL(domain_iter_)) { + domain_iter_->set_ctdef(das_ctdef_, &(got_old_row_ ? das_ctdef_->new_row_projector_ + : das_ctdef_->old_row_projector_)); + if (OB_FAIL(domain_iter_->rewind())) { + LOG_WARN("fail to rewind for domain iterator", K(ret)); + } + } + return ret; } private: - ObSpatIndexRow *get_spatial_index_rows() { return spat_rows_; } - int create_spatial_index_store(); - int get_next_spatial_index_row(ObNewRow *&row); + // domain index + int get_next_domain_index_row(ObNewRow *&row); private: const ObDASUpdCtDef *das_ctdef_; ObDASWriteBuffer &write_buffer_; ObNewRow *old_row_; ObNewRow *new_row_; ObDASWriteBuffer::Iterator result_iter_; + ObDomainDMLIterator *domain_iter_; bool got_old_row_; - ObSpatIndexRow *spat_rows_; - uint32_t spatial_row_idx_; + bool iter_has_built_; common::ObIAllocator &allocator_; }; @@ -93,10 +101,10 @@ int ObDASUpdIterator::get_next_row(ObNewRow *&row) int ret = OB_SUCCESS; const ObChunkDatumStore::StoredRow *sr = NULL; - if (OB_UNLIKELY(das_ctdef_->table_param_.get_data_table().is_spatial_index())) { - if (OB_FAIL(get_next_spatial_index_row(row))) { + if (OB_UNLIKELY(das_ctdef_->table_param_.get_data_table().is_domain_index())) { + if (OB_FAIL(get_next_domain_index_row(row))) { if (OB_ITER_END != ret) { - LOG_WARN("get next spatial index row failed", K(ret)); + LOG_WARN("get next domain index row failed", K(ret), K(das_ctdef_->table_param_.get_data_table())); } } } else if (!got_old_row_) { @@ -159,91 +167,40 @@ int ObDASUpdIterator::get_next_row(ObNewRow *&row) ObDASUpdIterator::~ObDASUpdIterator() { - if (spat_rows_ != nullptr) { - spat_rows_->~ObSEArray(); - spat_rows_ = nullptr; + if (nullptr != domain_iter_) { + domain_iter_->~ObDomainDMLIterator(); + domain_iter_ = nullptr; } } -int ObDASUpdIterator::create_spatial_index_store() +int ObDASUpdIterator::get_next_domain_index_row(ObNewRow *&row) { int ret = OB_SUCCESS; - void *buf = allocator_.alloc(sizeof(ObSpatIndexRow)); - if (OB_ISNULL(buf)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("allocate spatial row store failed", K(ret)); - } else { - spat_rows_ = new(buf) ObSpatIndexRow(); - } - return ret; -} - -int ObDASUpdIterator::get_next_spatial_index_row(ObNewRow *&row) -{ - int ret = OB_SUCCESS; - const ObChunkDatumStore::StoredRow *sr = NULL; - uint64_t rowkey_num = das_ctdef_->table_param_.get_data_table().get_rowkey_column_num(); - uint64_t old_proj = das_ctdef_->old_row_projector_.count(); - uint64_t new_proj = das_ctdef_->new_row_projector_.count(); - if (rowkey_num + 1 != old_proj || rowkey_num + 1 != new_proj) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("invalid project count", K(ret), K(rowkey_num), K(new_proj), K(old_proj)); - } else if (OB_ISNULL(old_row_)) { + if (!iter_has_built_) { if (OB_FAIL(write_buffer_.begin(result_iter_))) { LOG_WARN("begin write iterator failed", K(ret)); + } else { + iter_has_built_ = true; } } - if (OB_SUCC(ret)) { - ObDASWriteBuffer &write_buffer = get_write_buffer(); - ObSpatIndexRow *spatial_rows = get_spatial_index_rows(); - bool got_row = false; - while (OB_SUCC(ret) && ! got_row) { - if (OB_ISNULL(spatial_rows) || spatial_row_idx_ >= spatial_rows->count()) { - const ObChunkDatumStore::StoredRow *sr = nullptr; - spatial_row_idx_ = 0; - if (OB_FAIL(result_iter_.get_next_row(sr))) { - if (OB_ITER_END != ret) { - LOG_WARN("get next row from result iterator failed", K(ret)); - } else if (!got_old_row_) { - // ret == OB_ITER_END, old row is finished, get next new row - old_row_ = NULL; - got_old_row_ = true; - } - } else if (OB_ISNULL(spatial_rows)) { - if (OB_FAIL(create_spatial_index_store())) { - LOG_WARN("create spatial index rows store failed", K(ret)); - } else { - spatial_rows = get_spatial_index_rows(); - } - } - if (OB_NOT_NULL(spatial_rows)) { - spatial_rows->reuse(); - } - - if(OB_SUCC(ret)) { - // get full row successfully - const IntFixedArray &cur_proj = got_old_row_ ? das_ctdef_->new_row_projector_ : das_ctdef_->old_row_projector_; - int64_t geo_idx = cur_proj.at(rowkey_num); - ObString geo_wkb = sr->cells()[geo_idx].get_string(); - if (OB_FAIL(ObTextStringHelper::read_real_string_data(&allocator_, ObGeometryType, - CS_TYPE_UTF8MB4_BIN, true, geo_wkb))) { - LOG_WARN("fail to get real string data", K(ret), K(geo_wkb)); - } else if (OB_FAIL(ObDASUtils::generate_spatial_index_rows(allocator_, *das_ctdef_, geo_wkb, - cur_proj, *sr, *spatial_rows))) { - LOG_WARN("generate spatial_index_rows failed", K(ret), K(geo_idx), K(geo_wkb), K(rowkey_num)); - } - } - } - - if (OB_SUCC(ret) && spatial_row_idx_ < spatial_rows->count()) { - row = &(*spatial_rows)[spatial_row_idx_]; - old_row_ = row; - spatial_row_idx_++; - got_row = true; + const IntFixedArray &cur_proj = got_old_row_ ? das_ctdef_->new_row_projector_ : das_ctdef_->old_row_projector_; + if (OB_ISNULL(domain_iter_) && OB_FAIL(ObDomainDMLIterator::create_domain_dml_iterator( + allocator_, &cur_proj, result_iter_, das_ctdef_, nullptr/*main_ctdef*/, domain_iter_))) { + LOG_WARN("fail to create domain index dml iterator", K(ret)); + } else if (OB_FAIL(domain_iter_->get_next_domain_row(row))) { + if (OB_ITER_END != ret) { + LOG_WARN("fail to get next domain row", K(ret), KPC(domain_iter_)); + } else if (!got_old_row_) { + // ret == OB_ITER_END, old row is finished, get next new row + iter_has_built_ = false; + got_old_row_ = true; + domain_iter_->set_row_projector(&(das_ctdef_->new_row_projector_)); } } } + LOG_DEBUG("get next domain index row", K(ret), K(iter_has_built_), K(got_old_row_), + KPC(domain_iter_), KPC(row)); return ret; } @@ -257,16 +214,16 @@ int ObDASIndexDMLAdaptor::write_rows(cons { int ret = OB_SUCCESS; ObAccessService *as = MTL(ObAccessService *); - if (OB_UNLIKELY(ctdef.table_param_.get_data_table().is_spatial_index())) { + if (OB_UNLIKELY(ctdef.table_param_.get_data_table().is_domain_index())) { if (OB_FAIL(as->delete_rows(ls_id, tablet_id, *tx_desc_, dml_param_, ctdef.column_ids_, &iter, affected_rows))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret) { - LOG_WARN("delete rows to access service failed", K(ret)); + LOG_WARN("delete rows to access service failed", K(ret), K(ls_id), K(tablet_id)); } } else if (OB_FAIL(as->insert_rows(ls_id, tablet_id, *tx_desc_, dml_param_, ctdef.column_ids_, &iter, affected_rows))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret) { - LOG_WARN("insert rows to access service failed", K(ret)); + LOG_WARN("insert rows to access service failed", K(ret), K(ls_id), K(tablet_id)); } } } else if (ctdef.table_param_.get_data_table().is_mlog_table() diff --git a/src/sql/das/ob_das_utils.cpp b/src/sql/das/ob_das_utils.cpp index 1318c64da6..b59394babd 100644 --- a/src/sql/das/ob_das_utils.cpp +++ b/src/sql/das/ob_das_utils.cpp @@ -21,8 +21,6 @@ #include "share/location_cache/ob_location_service.h" #include "observer/ob_server_struct.h" #include "observer/omt/ob_tenant_srs.h" -#include "lib/geo/ob_s2adapter.h" -#include "lib/geo/ob_geo_utils.h" #include "share/ob_tablet_autoincrement_service.h" #include "storage/access/ob_dml_param.h" namespace oceanbase @@ -345,100 +343,6 @@ int ObDASUtils::reshape_datum_value(const ObObjMeta &col_type, return ret; } -int ObDASUtils::generate_spatial_index_rows( - ObIAllocator &allocator, - const ObDASDMLBaseCtDef &das_ctdef, - const ObString &wkb_str, - const IntFixedArray &row_projector, - const ObDASWriteBuffer::DmlRow &dml_row, - ObSpatIndexRow &spat_rows) -{ - int ret = OB_SUCCESS; - omt::ObSrsCacheGuard srs_guard; - const ObSrsItem *srs_item = NULL; - const ObSrsBoundsItem *srs_bound = NULL; - uint32_t srid = UINT32_MAX; - uint64_t rowkey_num = das_ctdef.table_param_.get_data_table().get_rowkey_column_num(); - lib::ObMallocHookAttrGuard malloc_guard(lib::ObMemAttr(MTL_ID(), "S2Adapter")); - - if (OB_FAIL(ObGeoTypeUtil::get_srid_from_wkb(wkb_str, srid))) { - LOG_WARN("failed to get srid", K(ret), K(wkb_str)); - } else if (srid != 0 && - OB_FAIL(OTSRS_MGR->get_tenant_srs_guard(srs_guard))) { - LOG_WARN("failed to get srs guard", K(ret), K(MTL_ID()), K(srid)); - } else if (srid != 0 && - OB_FAIL(srs_guard.get_srs_item(srid, srs_item))) { - LOG_WARN("failed to get srs item", K(ret), K(MTL_ID()), K(srid)); - } else if (((srid == 0) || !(srs_item->is_geographical_srs())) && - OB_FAIL(OTSRS_MGR->get_srs_bounds(srid, srs_item, srs_bound))) { - LOG_WARN("failed to get srs bound", K(ret), K(srid)); - } else { - ObS2Adapter s2object(&allocator, srid != 0 ? srs_item->is_geographical_srs() : false); - ObSpatialMBR spa_mbr; - ObObj *obj_arr = NULL; - ObS2Cellids cellids; - char *mbr = NULL; - int64_t mbr_len = 0; - if (OB_FAIL(s2object.init(wkb_str, srs_bound))) { - LOG_WARN("Init s2object failed", K(ret)); - } else if (OB_FAIL(s2object.get_cellids(cellids, false))) { - LOG_WARN("Get cellids from s2object failed", K(ret)); - } else if (OB_FAIL(s2object.get_mbr(spa_mbr))) { - LOG_WARN("Get mbr from s2object failed", K(ret)); - } else if (spa_mbr.is_empty()) { - if (cellids.size() == 0) { - LOG_DEBUG("it's might be empty geometry collection", K(wkb_str)); - } else { - ret = OB_ERR_GIS_INVALID_DATA; - LOG_WARN("invalid geometry", K(ret), K(wkb_str)); - } - } else if (OB_ISNULL(mbr = reinterpret_cast(allocator.alloc(OB_DEFAULT_MBR_SIZE)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to alloc memory for spatial index row mbr", K(ret)); - } else if (OB_FAIL(spa_mbr.to_char(mbr, mbr_len))) { - LOG_WARN("failed transform ObSpatialMBR to string", K(ret)); - } else { - for (uint64_t i = 0; OB_SUCC(ret) && i < cellids.size(); i++) { - if (OB_ISNULL(obj_arr = reinterpret_cast(allocator.alloc(sizeof(ObObj) * rowkey_num)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to alloc memory for spatial index row cells", K(ret)); - } else { - // 索引行[cellid_obj][mbr_obj][rowkey_obj] - for(uint64_t j = 0; OB_SUCC(ret) && j < rowkey_num; j++) { - obj_arr[j].set_nop_value(); - const ObObjMeta &col_type = das_ctdef.column_types_.at(j); - const ObAccuracy &col_accuracy = das_ctdef.column_accuracys_.at(j); - int64_t projector_idx = row_projector.at(j); - if (OB_FAIL(dml_row.cells()[projector_idx].to_obj(obj_arr[j], col_type))) { - LOG_WARN("stored row to new row obj failed", K(ret), - K(dml_row.cells()[projector_idx]), K(col_type), K(projector_idx), K(j)); - } else if (OB_FAIL(ObDASUtils::reshape_storage_value(col_type, col_accuracy, allocator, obj_arr[j]))) { - LOG_WARN("reshape storage value failed", K(ret), K(col_type), K(projector_idx), K(j)); - } - } - if (OB_SUCC(ret)) { - int64_t cellid_col_idx = 0; - int64_t mbr_col_idx = 1; - obj_arr[cellid_col_idx].set_uint64(cellids.at(i)); - ObString mbr_val(mbr_len, mbr); - obj_arr[mbr_col_idx].set_varchar(mbr_val); - obj_arr[mbr_col_idx].set_collation_type(CS_TYPE_BINARY); - obj_arr[mbr_col_idx].set_collation_level(CS_LEVEL_IMPLICIT); - ObNewRow row; - row.cells_ = obj_arr; - row.count_ = rowkey_num; - if (OB_FAIL(spat_rows.push_back(row))) { - LOG_WARN("failed to push back spatial index row", K(ret), K(row)); - } - } - } - } - } - } - - return ret; -} - int ObDASUtils::wait_das_retry(int64_t retry_cnt) { int ret = OB_SUCCESS; @@ -459,6 +363,39 @@ int ObDASUtils::wait_das_retry(int64_t retry_cnt) return ret; } +int ObDASUtils::find_child_das_def(const ObDASBaseCtDef *root_ctdef, + ObDASBaseRtDef *root_rtdef, + ObDASOpType op_type, + const ObDASBaseCtDef *&target_ctdef, + ObDASBaseRtDef *&target_rtdef) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(root_ctdef) || OB_ISNULL(root_rtdef)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("root ctdef or rtdef is nullptr", K(ret), KP(root_ctdef), K(root_rtdef)); + } else if (OB_UNLIKELY(root_ctdef->op_type_ != root_rtdef->op_type_ + || root_ctdef->children_cnt_ != root_rtdef->children_cnt_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("the op_type of ctdef and rtdef do not match", K(ret), + K(root_ctdef->op_type_), K(root_rtdef->op_type_), + K(root_ctdef->children_cnt_), K(root_rtdef->children_cnt_)); + } else if (root_ctdef->op_type_ == op_type) { + target_ctdef = root_ctdef; + target_rtdef = root_rtdef; + } else { + for (int i = 0; OB_SUCC(ret) && i < root_ctdef->children_cnt_; ++i) { + if (OB_FAIL(find_child_das_def(root_ctdef->children_[i], + root_rtdef->children_[i], + op_type, + target_ctdef, + target_rtdef))) { + LOG_WARN("find child das def failed", K(ret)); + } + } + } + return ret; +} + int ObDASUtils::generate_mlog_row(const ObTabletID &tablet_id, const storage::ObDMLBaseParam &dml_param, ObNewRow &row, diff --git a/src/sql/das/ob_das_utils.h b/src/sql/das/ob_das_utils.h index 16d770be78..da50e6a3f3 100644 --- a/src/sql/das/ob_das_utils.h +++ b/src/sql/das/ob_das_utils.h @@ -21,6 +21,7 @@ #include "rpc/obrpc/ob_rpc_result_code.h" #include "sql/das/ob_das_define.h" #include "sql/das/ob_das_dml_ctx_define.h" +#include "sql/das/ob_das_def_reg.h" namespace oceanbase { namespace sql @@ -63,13 +64,33 @@ public: ObIAllocator &allocator, blocksstable::ObStorageDatum &datum_value); static int padding_fixed_string_value(int64_t max_len, ObIAllocator &alloc, ObObj &value); - static int generate_spatial_index_rows(ObIAllocator &allocator, - const ObDASDMLBaseCtDef &das_ctdef, - const ObString &wkb_str, - const IntFixedArray &row_projector, - const ObDASWriteBuffer::DmlRow &dml_row, - ObSpatIndexRow &spat_rows); static int wait_das_retry(int64_t retry_cnt); + static int find_child_das_def(const ObDASBaseCtDef *root_ctdef, + ObDASBaseRtDef *root_rtdef, + ObDASOpType op_type, + const ObDASBaseCtDef *&target_ctdef, + ObDASBaseRtDef *&target_rtdef); + template + static int find_target_das_def(const ObDASBaseCtDef *root_ctdef, + ObDASBaseRtDef *root_rtdef, + ObDASOpType op_type, + const CtDefType *&target_ctdef, + RtDefType *&target_rtdef) + { + int ret = common::OB_SUCCESS; + const ObDASBaseCtDef *base_ctdef = nullptr; + ObDASBaseRtDef *base_rtdef = nullptr; + if (OB_FAIL(find_child_das_def(root_ctdef, root_rtdef, op_type, base_ctdef, base_rtdef))) { + SQL_DAS_LOG(WARN, "find chld das def failed", K(ret)); + } else if (OB_ISNULL(base_ctdef) || OB_ISNULL(base_rtdef)) { + ret = common::OB_ERR_UNEXPECTED; + SQL_DAS_LOG(WARN, "can not find the target op def", K(ret), K(op_type), KP(base_ctdef), KP(base_rtdef)); + } else { + target_ctdef = static_cast(base_ctdef); + target_rtdef = static_cast(base_rtdef); + } + return ret; + } static int generate_mlog_row(const common::ObTabletID &tablet_id, const storage::ObDMLBaseParam &dml_param, common::ObNewRow &row, diff --git a/src/sql/das/ob_data_access_service.cpp b/src/sql/das/ob_data_access_service.cpp index 6f72a08515..5d52b12fbc 100644 --- a/src/sql/das/ob_data_access_service.cpp +++ b/src/sql/das/ob_data_access_service.cpp @@ -723,6 +723,38 @@ int ObDataAccessService::collect_das_task_info(ObDASTaskArg &task_arg, ObDASRemo LOG_WARN("append task op related rtdefs to remote info failed", K(ret)); } } + if (OB_SUCC(ret)) { + if (OB_FAIL(collect_das_task_attach_info(remote_info, task_op->get_attach_rtdef()))) { + LOG_WARN("collect das task attach info failed", K(ret)); + } + } + } + return ret; +} + +int ObDataAccessService::collect_das_task_attach_info(ObDASRemoteInfo &remote_info, + ObDASBaseRtDef *attach_rtdef) +{ + int ret = OB_SUCCESS; + if (OB_NOT_NULL(attach_rtdef)) { + if (attach_rtdef->ctdef_ != nullptr) { + remote_info.has_expr_ |= attach_rtdef->ctdef_->has_expr(); + remote_info.need_calc_expr_ |= attach_rtdef->ctdef_->has_pdfilter_or_calc_expr(); + remote_info.need_calc_udf_ |= attach_rtdef->ctdef_->has_pl_udf(); + if (OB_FAIL(add_var_to_array_no_dup(remote_info.ctdefs_, attach_rtdef->ctdef_))) { + LOG_WARN("store remote ctdef failed", K(ret)); + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(add_var_to_array_no_dup(remote_info.rtdefs_, attach_rtdef))) { + LOG_WARN("store remote rtdef failed", K(ret)); + } + } + for (int i = 0; OB_SUCC(ret) && i < attach_rtdef->children_cnt_; ++i) { + if (OB_FAIL(collect_das_task_attach_info(remote_info, attach_rtdef->children_[i]))) { + LOG_WARN("recursively collect das task attach info failed", K(ret)); + } + } } return ret; } diff --git a/src/sql/das/ob_data_access_service.h b/src/sql/das/ob_data_access_service.h index 5561ad1351..79be0c11e4 100644 --- a/src/sql/das/ob_data_access_service.h +++ b/src/sql/das/ob_data_access_service.h @@ -63,6 +63,8 @@ private: int do_async_remote_das_task(ObDASRef &das_ref, ObDasAggregatedTasks &aggregated_tasks, ObDASTaskArg &task_arg); int do_sync_remote_das_task(ObDASRef &das_ref, ObDasAggregatedTasks &aggregated_tasks, ObDASTaskArg &task_arg); int collect_das_task_info(ObDASTaskArg &task_arg, ObDASRemoteInfo &remote_info); + int collect_das_task_attach_info(ObDASRemoteInfo &remote_info, + ObDASBaseRtDef *attach_rtdef); void calc_das_task_parallelism(const ObDASRef &das_ref, const ObDasAggregatedTasks &task_ops, int &target_parallelism); private: obrpc::ObDASRpcProxy das_rpc_proxy_; diff --git a/src/sql/das/ob_domain_index_lookup_op.cpp b/src/sql/das/ob_domain_index_lookup_op.cpp new file mode 100644 index 0000000000..df5757a5ca --- /dev/null +++ b/src/sql/das/ob_domain_index_lookup_op.cpp @@ -0,0 +1,1034 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ +#define USING_LOG_PREFIX SQL_DAS +#include "sql/das/ob_das_scan_op.h" +#include "sql/das/ob_text_retrieval_op.h" +#include "sql/das/ob_domain_index_lookup_op.h" +#include "sql/das/ob_das_utils.h" +#include "sql/engine/ob_exec_context.h" +#include "storage/access/ob_dml_param.h" +namespace oceanbase +{ +using namespace common; +using namespace storage; +using namespace transaction; +namespace sql +{ + +int ObDomainIndexLookupOp::init( + const ObDASScanCtDef *lookup_ctdef, + ObDASScanRtDef *lookup_rtdef, + const ObDASScanCtDef *index_ctdef, + ObDASScanRtDef *index_rtdef, + const ObDASScanCtDef *doc_id_lookup_ctdef, + ObDASScanRtDef *doc_id_lookup_rtdef, + ObTxDesc *tx_desc, + ObTxReadSnapshot *snapshot, + ObTableScanParam &scan_param) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ObLocalIndexLookupOp::init( + lookup_ctdef, lookup_rtdef, index_ctdef, index_rtdef, tx_desc, snapshot))) { + LOG_WARN("ObLocalIndexLookupOp init failed", K(ret)); + } else { + doc_id_lookup_ctdef_ = doc_id_lookup_ctdef; + doc_id_lookup_rtdef_ = doc_id_lookup_rtdef; + need_scan_aux_ = (doc_id_lookup_ctdef_ != nullptr); + } + return ret; +} + +int ObDomainIndexLookupOp::reset_lookup_state() +{ + INIT_SUCC(ret); + if (OB_FAIL(ObLocalIndexLookupOp::reset_lookup_state())) { + LOG_WARN("reset domain lookup state failed", K(ret)); + } else if (nullptr != lookup_iter_) { + doc_id_scan_param_.key_ranges_.reuse(); + doc_id_scan_param_.ss_key_ranges_.reuse(); + } + return ret; +} + +int ObDomainIndexLookupOp::next_state() +{ + INIT_SUCC(ret); + if (state_ == INDEX_SCAN) { + if (0 == lookup_rowkey_cnt_) { + state_ = LookupState::FINISHED; + } else if (need_scan_aux_) { + state_ = LookupState::AUX_LOOKUP; + } else { + state_ = LookupState::DO_LOOKUP; + } + } else if (state_ == LookupState::AUX_LOOKUP) { + state_ = LookupState::DO_LOOKUP; + } else if (state_ == LookupState::DO_LOOKUP) { + state_ = LookupState::OUTPUT_ROWS; + } else if (state_ == LookupState::OUTPUT_ROWS) { + state_ = LookupState::INDEX_SCAN; + } else { + ret = OB_ERR_UNEXPECTED; + } + LOG_DEBUG("domain index to next state", K(ret), K(state_)); + return ret; +} + +int ObDomainIndexLookupOp::get_next_row() +{ + int ret = OB_SUCCESS; + bool got_next_row = false; + while (OB_SUCC(ret) && !got_next_row) { + switch (state_) { + case INDEX_SCAN: { + reset_lookup_state(); + if (OB_FAIL(fetch_index_table_rowkey())) { + if (OB_UNLIKELY(ret != OB_ITER_END)) { + LOG_WARN("failed get index table rowkey", K(ret)); + } else { + index_end_ = true; + ret = OB_SUCCESS; + } + } else { + ++lookup_rowkey_cnt_; + } + + if (FAILEDx(next_state())) { + LOG_WARN("failed to switch to next lookup state", K(ret), K(state_)); + } + break; + } + case AUX_LOOKUP: { + if (OB_FAIL(get_aux_table_rowkey())) { + if (ret != OB_ITER_END) { + LOG_WARN("do aux index lookup failed", K(ret)); + } + } else { + // ++lookup_rowkey_cnt_; + if (OB_FAIL(next_state())) { + LOG_WARN("failed to switch to next lookup state", K(ret), K(state_)); + } + } + break; + } + case DO_LOOKUP: { + if (OB_FAIL(do_index_lookup())) { + LOG_WARN("do index lookup failed", K(ret)); + } else if (OB_FAIL(next_state())) { + LOG_WARN("failed to switch to next lookup state", K(ret), K(state_)); + } + break; + } + case OUTPUT_ROWS: { + if (OB_FAIL(get_next_row_from_data_table())) { + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + if (OB_FAIL(check_lookup_row_cnt())) { + LOG_WARN("failed to check lookup row cnt", K(ret)); + } else if (OB_FAIL(next_state())) { + LOG_WARN("failed to switch to next lookup state", K(ret)); + } + } else { + LOG_WARN("look up get next row failed", K(ret)); + } + } else { + got_next_row = true; + ++lookup_row_cnt_; + LOG_DEBUG("got next row from table lookup", K(ret), K(lookup_row_cnt_), K(lookup_rowkey_cnt_), "main table output", ROWEXPR2STR(get_eval_ctx(), get_output_expr()) ); + } + break; + } + case FINISHED: { + ret = OB_ITER_END; + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected state", K(state_)); + } + } + } + + return ret; +} + +int ObDomainIndexLookupOp::get_next_rows(int64_t &count, int64_t capacity) +{ + int ret = OB_SUCCESS; + bool got_next_row = false; + while (OB_SUCC(ret) && !got_next_row) { + switch (state_) { + case INDEX_SCAN: { + reset_lookup_state(); + int64_t rowkey_count = 0; + lookup_row_cnt_ = 0; + lookup_row_cnt_ = 0; + if (OB_FAIL(fetch_index_table_rowkeys(rowkey_count, capacity))) { + LOG_WARN("failed get rowkeys from index table", K(ret)); + } else if (0 == rowkey_count) { + index_end_ = true; + } + if (OB_SUCC(ret)) { + if (rowkey_count > 0) { + lookup_rowkey_cnt_ += rowkey_count; + } + if (OB_FAIL(next_state())) { + LOG_WARN("failed to switch to next lookup state", K(ret), K(state_)); + } + } + break; + } + case AUX_LOOKUP: { + if (OB_FAIL(get_aux_table_rowkeys(lookup_rowkey_cnt_))) { + if (ret != OB_ITER_END) { + LOG_WARN("do aux index lookup failed", K(ret)); + } + } else if (OB_FAIL(next_state())) { + LOG_WARN("failed to switch to next lookup state", K(ret), K(state_)); + } + break; + } + case DO_LOOKUP: { + lookup_row_cnt_ = 0; + if (OB_FAIL(do_index_lookup())) { + LOG_WARN("do index lookup failed", K(ret)); + } else if (OB_FAIL(next_state())) { + LOG_WARN("failed to switch to next lookup state", K(ret), K(state_)); + } + break; + } + case OUTPUT_ROWS: { + if (OB_FAIL(get_next_rows_from_data_table(count, capacity))) { + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + if (count > 0) { + lookup_row_cnt_ += count; + got_next_row = true; + } else if (OB_FAIL(check_lookup_row_cnt())) { + LOG_WARN("failed to check table lookup", K(ret)); + } else if (OB_FAIL(next_state())) { + LOG_WARN("failed to switch to next lookup state", K(ret), K_(state)); + } + } else { + LOG_WARN("look up get next row failed", K(ret)); + } + } else { + got_next_row = true; + lookup_row_cnt_ += count; + const ObBitVector *skip = nullptr; + PRINT_VECTORIZED_ROWS(SQL, DEBUG, get_eval_ctx(), get_output_expr(), count, skip, + K(ret), K(lookup_row_cnt_), K(lookup_rowkey_cnt_)); + } + break; + } + case FINISHED: { + ret = OB_ITER_END; + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected state", K(state_)); + } + } + } + + return ret; +} + +int ObDomainIndexLookupOp::set_lookup_doc_id_key(ObExpr *doc_id_expr, ObEvalCtx *eval_ctx_) +{ + int ret = OB_SUCCESS; + common::ObArenaAllocator &lookup_alloc = lookup_memctx_->get_arena_allocator(); + ObNewRange doc_id_range; + ObDatum &doc_id_datum = doc_id_expr->locate_expr_datum(*eval_ctx_); + if (OB_FAIL(doc_id_datum.to_obj(doc_id_key_obj_, doc_id_expr->obj_meta_, doc_id_expr->obj_datum_map_))) { + LOG_WARN("failed to cast datum to obj", K(ret), K(doc_id_key_obj_)); + } else { + ObRowkey doc_id_rowkey(&doc_id_key_obj_, 1); + uint64_t ref_table_id = doc_id_lookup_ctdef_->ref_table_id_; + if (OB_FAIL(doc_id_range.build_range(ref_table_id, doc_id_rowkey))) { + LOG_WARN("build doc id lookup range failed", K(ret)); + } else if (OB_FAIL(doc_id_scan_param_.key_ranges_.push_back(doc_id_range))) { + LOG_WARN("store lookup key range failed", K(ret)); + } else { + LOG_DEBUG("generate doc id scan range", K(ret), K(doc_id_range)); + } + } + return ret; +} + +int ObDomainIndexLookupOp::set_doc_id_idx_lookup_param( + const ObDASScanCtDef *aux_lookup_ctdef, + ObDASScanRtDef *aux_lookup_rtdef, + storage::ObTableScanParam& aux_scan_param, + common::ObTabletID tablet_id, + share::ObLSID ls_id) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(aux_lookup_ctdef) + || OB_ISNULL(aux_lookup_rtdef) + || OB_UNLIKELY(!tablet_id.is_valid() || !ls_id.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(aux_lookup_ctdef), KP(aux_lookup_rtdef), K(tablet_id), K(ls_id)); + } else { + aux_scan_param.tenant_id_ = MTL_ID(); + aux_scan_param.tx_lock_timeout_ = aux_lookup_rtdef->tx_lock_timeout_; + aux_scan_param.index_id_ = aux_lookup_ctdef->ref_table_id_; + aux_scan_param.is_get_ = aux_lookup_ctdef->is_get_; + aux_scan_param.is_for_foreign_check_ = aux_lookup_rtdef->is_for_foreign_check_; + aux_scan_param.timeout_ = aux_lookup_rtdef->timeout_ts_; + aux_scan_param.scan_flag_ = aux_lookup_rtdef->scan_flag_; + aux_scan_param.reserved_cell_count_ = aux_lookup_ctdef->access_column_ids_.count(); + aux_scan_param.allocator_ = &aux_lookup_rtdef->stmt_allocator_; + aux_scan_param.scan_allocator_ = &aux_lookup_rtdef->scan_allocator_; + aux_scan_param.sql_mode_ = aux_lookup_rtdef->sql_mode_; + aux_scan_param.frozen_version_ = aux_lookup_rtdef->frozen_version_; + aux_scan_param.force_refresh_lc_ = aux_lookup_rtdef->force_refresh_lc_; + aux_scan_param.output_exprs_ = &(aux_lookup_ctdef->pd_expr_spec_.access_exprs_); + aux_scan_param.ext_file_column_exprs_ = &(aux_lookup_ctdef->pd_expr_spec_.ext_file_column_exprs_); + aux_scan_param.ext_column_convert_exprs_ = &(aux_lookup_ctdef->pd_expr_spec_.ext_column_convert_exprs_); + aux_scan_param.calc_exprs_ = &(aux_lookup_ctdef->pd_expr_spec_.calc_exprs_); + aux_scan_param.aggregate_exprs_ = &(aux_lookup_ctdef->pd_expr_spec_.pd_storage_aggregate_output_); + aux_scan_param.table_param_ = &(aux_lookup_ctdef->table_param_); + aux_scan_param.op_ = aux_lookup_rtdef->p_pd_expr_op_; + aux_scan_param.row2exprs_projector_ = aux_lookup_rtdef->p_row2exprs_projector_; + aux_scan_param.schema_version_ = aux_lookup_ctdef->schema_version_; + aux_scan_param.tenant_schema_version_ = aux_lookup_rtdef->tenant_schema_version_; + aux_scan_param.limit_param_ = aux_lookup_rtdef->limit_param_; + aux_scan_param.need_scn_ = aux_lookup_rtdef->need_scn_; + aux_scan_param.pd_storage_flag_ = aux_lookup_ctdef->pd_expr_spec_.pd_storage_flag_.pd_flag_; + aux_scan_param.fb_snapshot_ = aux_lookup_rtdef->fb_snapshot_; + aux_scan_param.fb_read_tx_uncommitted_ = aux_lookup_rtdef->fb_read_tx_uncommitted_; + if (aux_lookup_rtdef->is_for_foreign_check_) { + aux_scan_param.trans_desc_ = tx_desc_; + } + aux_scan_param.ls_id_ = ls_id; + aux_scan_param.tablet_id_ = tablet_id; + if (aux_lookup_rtdef->sample_info_ != nullptr) { + aux_scan_param.sample_info_ = *aux_lookup_rtdef->sample_info_; + } + if (OB_NOT_NULL(snapshot_)) { + aux_scan_param.snapshot_ = *snapshot_; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("snapshot is null", K(ret), KPC(this)); + } + if (OB_NOT_NULL(tx_desc_)) { + aux_scan_param.tx_id_ = tx_desc_->get_tx_id(); + } else { + aux_scan_param.tx_id_.reset(); + } + if (!aux_lookup_ctdef->pd_expr_spec_.pushdown_filters_.empty()) { + aux_scan_param.op_filters_ = &aux_lookup_ctdef->pd_expr_spec_.pushdown_filters_; + } + aux_scan_param.pd_storage_filters_ = aux_lookup_rtdef->p_pd_expr_op_->pd_storage_filters_; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(aux_scan_param.column_ids_.assign(aux_lookup_ctdef->access_column_ids_))) { + LOG_WARN("init column ids failed", K(ret)); + } + //external table scan params + if (OB_SUCC(ret) && aux_lookup_ctdef->is_external_table_) { + aux_scan_param.external_file_access_info_ = aux_lookup_ctdef->external_file_access_info_.str_; + aux_scan_param.external_file_location_ = aux_lookup_ctdef->external_file_location_.str_; + if (OB_FAIL(aux_scan_param.external_file_format_.load_from_string(aux_lookup_ctdef->external_file_format_str_.str_, *aux_scan_param.allocator_))) { + LOG_WARN("fail to load from string", K(ret)); + } else { + uint64_t max_idx = 0; + for (int i = 0; i < aux_scan_param.ext_file_column_exprs_->count(); i++) { + max_idx = std::max(max_idx, aux_scan_param.ext_file_column_exprs_->at(i)->extra_); + } + aux_scan_param.external_file_format_.csv_format_.file_column_nums_ = static_cast(max_idx); + } + } + if (OB_SUCC(ret)) { + LOG_DEBUG("init scan param", K(aux_scan_param)); + } + } + return ret; +} + +void ObDomainIndexLookupOp::do_clear_evaluated_flag() +{ + ObLocalIndexLookupOp::do_clear_evaluated_flag(); + if (OB_NOT_NULL(doc_id_lookup_rtdef_)) { + doc_id_lookup_rtdef_->p_pd_expr_op_->clear_evaluated_flag(); + } +} + +int ObDomainIndexLookupOp::revert_iter() +{ + int ret = OB_SUCCESS; + // rowkey_iter is reverted at ObLocalIndexLookupOp + if (OB_NOT_NULL(doc_id_lookup_rtdef_)) { + doc_id_scan_param_.need_switch_param_ = false; + doc_id_scan_param_.destroy_schema_guard(); + } + + if (OB_FAIL(ObLocalIndexLookupOp::revert_iter())) { + LOG_WARN("failed to revert local index lookup op iter", K(ret)); + } + return ret; +} + +int ObDomainIndexLookupOp::reuse_scan_iter() +{ + int ret = OB_SUCCESS; + + reset_lookup_state(); + + if (OB_NOT_NULL(doc_id_lookup_rtdef_)) { + ObITabletScan &tsc_service = get_tsc_service(); + const ObTabletID &scan_tablet_id = doc_id_scan_param_.tablet_id_; + doc_id_scan_param_.need_switch_param_ = scan_tablet_id.is_valid() && scan_tablet_id != doc_id_idx_tablet_id_; + if (OB_FAIL(tsc_service.reuse_scan_iter(doc_id_scan_param_.need_switch_param_, rowkey_iter_))) { + LOG_WARN("failed to reuse scan iter", K(ret)); + } else if (nullptr != lookup_iter_) { + doc_id_scan_param_.key_ranges_.reuse(); + doc_id_scan_param_.ss_key_ranges_.reuse(); + } + } + return ret; +} + +int ObFullTextIndexLookupOp::init(const ObDASBaseCtDef *table_lookup_ctdef, + ObDASBaseRtDef *table_lookup_rtdef, + transaction::ObTxDesc *tx_desc, + transaction::ObTxReadSnapshot *snapshot, + storage::ObTableScanParam &scan_param) +{ + int ret = OB_SUCCESS; + const ObDASTableLookupCtDef *tbl_lookup_ctdef = nullptr; + ObDASTableLookupRtDef *tbl_lookup_rtdef = nullptr; + const ObDASIRAuxLookupCtDef *aux_lookup_ctdef = nullptr; + ObDASIRAuxLookupRtDef *aux_lookup_rtdef = nullptr; + const ObDASIRScanCtDef *ir_scan_ctdef = nullptr; + ObDASIRScanRtDef *ir_scan_rtdef = nullptr; + if (OB_ISNULL(table_lookup_ctdef) || OB_ISNULL(table_lookup_rtdef)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("table lookup param is nullptr", KP(table_lookup_ctdef), KP(table_lookup_rtdef)); + } else if (OB_FAIL(ObDASUtils::find_target_das_def(table_lookup_ctdef, + table_lookup_rtdef, + DAS_OP_TABLE_LOOKUP, + tbl_lookup_ctdef, + tbl_lookup_rtdef))) { + LOG_WARN("find data table lookup def failed", K(ret)); + } else if (OB_FAIL(ObDASUtils::find_target_das_def(tbl_lookup_ctdef, + tbl_lookup_rtdef, + DAS_OP_IR_AUX_LOOKUP, + aux_lookup_ctdef, + aux_lookup_rtdef))) { + LOG_WARN("find ir aux lookup def failed", K(ret)); + } else if (OB_FAIL(ObDASUtils::find_target_das_def(aux_lookup_ctdef, + aux_lookup_rtdef, + DAS_OP_IR_SCAN, + ir_scan_ctdef, + ir_scan_rtdef))) { + LOG_WARN("find ir scan def failed", K(ret)); + } else { + if (OB_FAIL(ObDomainIndexLookupOp::init(tbl_lookup_ctdef->get_lookup_scan_ctdef(), + tbl_lookup_rtdef->get_lookup_scan_rtdef(), + ir_scan_ctdef->get_inv_idx_scan_ctdef(), + ir_scan_rtdef->get_inv_idx_scan_rtdef(), + aux_lookup_ctdef->get_lookup_scan_ctdef(), + aux_lookup_rtdef->get_lookup_scan_rtdef(), + tx_desc, + snapshot, + scan_param))) { + LOG_WARN("failed to init domain index lookup op", K(ret)); + } else { + need_scan_aux_ = true; + doc_id_lookup_ctdef_ = aux_lookup_ctdef->get_lookup_scan_ctdef(); + doc_id_lookup_rtdef_ = aux_lookup_rtdef->get_lookup_scan_rtdef(); + doc_id_expr_ = ir_scan_ctdef->inv_scan_doc_id_col_; + retrieval_ctx_ = ir_scan_rtdef->eval_ctx_; + } + } + return ret; +} + +int ObFullTextIndexLookupOp::reset_lookup_state() +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ObDomainIndexLookupOp::reset_lookup_state())) { + LOG_WARN("failed to reset lookup state for domain index lookup op", K(ret)); + } else { + if (nullptr != lookup_iter_) { + doc_id_scan_param_.key_ranges_.reuse(); + doc_id_scan_param_.ss_key_ranges_.reuse(); + } + } + return ret; +} + +void ObFullTextIndexLookupOp::do_clear_evaluated_flag() +{ + return ObDomainIndexLookupOp::do_clear_evaluated_flag(); +} + +int ObFullTextIndexLookupOp::revert_iter() +{ + int ret = OB_SUCCESS; + if (nullptr != text_retrieval_iter_) { + text_retrieval_iter_->reset(); + text_retrieval_iter_->~ObNewRowIterator(); + if (nullptr != allocator_) { + allocator_->free(text_retrieval_iter_); + } + text_retrieval_iter_ = nullptr; + } + + if (OB_FAIL(ObDomainIndexLookupOp::revert_iter())) { + LOG_WARN("failed to revert local index lookup op iter", K(ret)); + } + return ret; +} + +int ObFullTextIndexLookupOp::fetch_index_table_rowkey() +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(text_retrieval_iter_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null text retrieval iterator for index lookup", K(ret), KP(text_retrieval_iter_)); + } else if (OB_FAIL(text_retrieval_iter_->get_next_row())) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to get next next row from text retrieval iter", K(ret)); + } + } else if (OB_FAIL(set_lookup_doc_id_key(doc_id_expr_, retrieval_ctx_))) { + LOG_WARN("failed to set lookup doc id query key", K(ret)); + } + return ret; +} + +int ObFullTextIndexLookupOp::fetch_index_table_rowkeys(int64_t &count, const int64_t capacity) +{ + int ret = OB_SUCCESS; + int64_t index_scan_row_cnt = 0; + if (OB_ISNULL(text_retrieval_iter_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null text retrieval iterator for index lookup", K(ret), KP(text_retrieval_iter_)); + } else if (OB_FAIL(text_retrieval_iter_->get_next_rows(index_scan_row_cnt, capacity))) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to get next next row from text retrieval iter", K(ret)); + } else { + ret = OB_SUCCESS; + } + } + if (OB_SUCC(ret) && index_scan_row_cnt > 0) { + if (OB_FAIL(set_lookup_doc_id_keys(index_scan_row_cnt))) { + LOG_WARN("failed to set lookup doc id query key", K(ret)); + } else { + count += index_scan_row_cnt; + } + } + return ret; +} + +int ObFullTextIndexLookupOp::do_aux_table_lookup() +{ + int ret = OB_SUCCESS; + ObITabletScan &tsc_service = get_tsc_service(); + if (nullptr == rowkey_iter_) { + // init doc_id -> rowkey table iterator as rowkey iter + if (OB_FAIL(set_doc_id_idx_lookup_param( + doc_id_lookup_ctdef_, doc_id_lookup_rtdef_, doc_id_scan_param_, doc_id_idx_tablet_id_, ls_id_))) { + LOG_WARN("failed to init doc id lookup scan param", K(ret)); + } else if (tsc_service.table_scan(doc_id_scan_param_, rowkey_iter_)) { + if (OB_SNAPSHOT_DISCARDED == ret && scan_param_.fb_snapshot_.is_valid()) { + ret = OB_INVALID_QUERY_TIMESTAMP; + } else if (OB_TRY_LOCK_ROW_CONFLICT != ret) { + LOG_WARN("fail to scan table", K(scan_param_), K(ret)); + } + } + } else { + const ObTabletID &scan_tablet_id = doc_id_scan_param_.tablet_id_; + doc_id_scan_param_.need_switch_param_ = scan_tablet_id.is_valid() && (doc_id_idx_tablet_id_ != scan_tablet_id); + doc_id_scan_param_.tablet_id_ = doc_id_idx_tablet_id_; + doc_id_scan_param_.ls_id_ = ls_id_; + if (OB_FAIL(tsc_service.reuse_scan_iter(doc_id_scan_param_.need_switch_param_, rowkey_iter_))) { + LOG_WARN("failed to reuse doc id iterator", K(ret)); + } else if (OB_FAIL(tsc_service.table_rescan(doc_id_scan_param_, rowkey_iter_))) { + LOG_WARN("failed to rescan doc id rowkey table", K(ret), K_(doc_id_idx_tablet_id), K(scan_tablet_id)); + } + } + return ret; +} + +int ObFullTextIndexLookupOp::get_aux_table_rowkey() +{ + int ret = OB_SUCCESS; + doc_id_lookup_rtdef_->p_pd_expr_op_->clear_evaluated_flag(); + if (index_end_ && doc_id_scan_param_.key_ranges_.empty()) { + ret = OB_ITER_END; + } else if (OB_FAIL(do_aux_table_lookup())) { + LOG_WARN("failed to do aux table lookup", K(ret)); + } else if (OB_FAIL(rowkey_iter_->get_next_row())) { + LOG_WARN("failed to get rowkey by doc id", K(ret)); + } else if (OB_FAIL(set_main_table_lookup_key())) { + LOG_WARN("failed to set main table lookup key", K(ret)); + } + return ret; +} + +int ObFullTextIndexLookupOp::get_aux_table_rowkeys(const int64_t lookup_row_cnt) +{ + int ret = OB_SUCCESS; + doc_id_lookup_rtdef_->p_pd_expr_op_->clear_evaluated_flag(); + int64_t rowkey_cnt = 0; + if (index_end_ && doc_id_scan_param_.key_ranges_.empty()) { + ret = OB_ITER_END; + } else if (OB_FAIL(do_aux_table_lookup())) { + LOG_WARN("failed to do aux table lookup", K(ret)); + } else if (OB_FAIL(rowkey_iter_->get_next_rows(rowkey_cnt, lookup_row_cnt))) { + LOG_WARN("failed to get rowkey by doc id", K(ret), K(doc_id_scan_param_.key_ranges_)); + } else if (OB_UNLIKELY(lookup_row_cnt != rowkey_cnt)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected aux lookup row count not match", K(ret), K(rowkey_cnt), K(lookup_row_cnt)); + } else { + ObEvalCtx::BatchInfoScopeGuard batch_info_guard(*doc_id_lookup_rtdef_->eval_ctx_); + batch_info_guard.set_batch_size(lookup_row_cnt); + for (int64_t i = 0; OB_SUCC(ret) && i < lookup_row_cnt; ++i) { + batch_info_guard.set_batch_idx(i); + if (OB_FAIL(set_main_table_lookup_key())) { + LOG_WARN("failed to set main table lookup key", K(ret)); + } + } + + } + return ret; +} + +void ObMulValueIndexLookupOp::do_clear_evaluated_flag() +{ + lookup_rtdef_->p_pd_expr_op_->clear_evaluated_flag(); + return ObDomainIndexLookupOp::do_clear_evaluated_flag(); +} + +int ObMulValueIndexLookupOp::init_scan_param() +{ + int ret = OB_SUCCESS; + + if (OB_FAIL(ObDomainIndexLookupOp::init_scan_param())) { + LOG_WARN("failed to init scan param", K(ret)); + } + + return ret; +} + +int ObMulValueIndexLookupOp::init(const ObDASBaseCtDef *table_lookup_ctdef, + ObDASBaseRtDef *table_lookup_rtdef, + ObTxDesc *tx_desc, + ObTxReadSnapshot *snapshot, + ObTableScanParam &scan_param) +{ + int ret = OB_SUCCESS; + + const ObDASTableLookupCtDef *tbl_lookup_ctdef = nullptr; + ObDASTableLookupRtDef *tbl_lookup_rtdef = nullptr; + const ObDASIRAuxLookupCtDef *aux_lookup_ctdef = nullptr; + ObDASIRAuxLookupRtDef *aux_lookup_rtdef = nullptr; + + if (OB_ISNULL(table_lookup_ctdef) || OB_ISNULL(table_lookup_rtdef)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("table lookup param is nullptr", KP(table_lookup_ctdef), KP(table_lookup_rtdef)); + } else if (OB_FAIL(ObDASUtils::find_target_das_def(table_lookup_ctdef, + table_lookup_rtdef, + DAS_OP_TABLE_LOOKUP, + tbl_lookup_ctdef, + tbl_lookup_rtdef))) { + LOG_WARN("find data table lookup def failed", K(ret)); + } else if (OB_FAIL(ObDASUtils::find_target_das_def(table_lookup_ctdef, + table_lookup_rtdef, + DAS_OP_IR_AUX_LOOKUP, + aux_lookup_ctdef, + aux_lookup_rtdef))) { + LOG_WARN("find ir aux lookup def failed", K(ret)); + } else if (aux_lookup_ctdef->children_cnt_ != 2) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("find index def failed", K(ret), K(aux_lookup_ctdef->children_cnt_)); + } else { + const ObDASScanCtDef* index_ctdef = static_cast(aux_lookup_ctdef->children_[0]); + ObDASScanRtDef * index_rtdef = static_cast(aux_lookup_rtdef->children_[0]); + + if (OB_FAIL(ObDomainIndexLookupOp::init(tbl_lookup_ctdef->get_lookup_scan_ctdef(), + tbl_lookup_rtdef->get_lookup_scan_rtdef(), + index_ctdef, + index_rtdef, + aux_lookup_ctdef->get_lookup_scan_ctdef(), + aux_lookup_rtdef->get_lookup_scan_rtdef(), + tx_desc, snapshot, scan_param))) { + LOG_WARN("ObLocalIndexLookupOp init failed", K(ret)); + } + } + return ret; +} + +int ObMulValueIndexLookupOp::init_sort() +{ + int ret = OB_SUCCESS; + cmp_ret_ = OB_SUCCESS; + aux_cmp_ret_ = OB_SUCCESS; + + new (&comparer_) ObDomainRowkeyComp(cmp_ret_); + new (&aux_comparer_) ObDomainRowkeyComp(aux_cmp_ret_); + const int64_t file_buf_size = ObExternalSortConstant::DEFAULT_FILE_READ_WRITE_BUFFER; + const int64_t expire_timestamp = 0; + const int64_t buf_limit = SORT_MEMORY_LIMIT; + const uint64_t tenant_id = MTL_ID(); + sorter_.clean_up(); + aux_sorter_.clean_up(); + if (OB_FAIL(sorter_.init(buf_limit, file_buf_size, expire_timestamp, tenant_id, &comparer_))) { + LOG_WARN("fail to init sorter", K(ret)); + } else if (OB_FAIL(aux_sorter_.init(buf_limit, file_buf_size, expire_timestamp, tenant_id, &aux_comparer_))) { + LOG_WARN("fail to init aux sorter", K(ret)); + } + + return ret; +} + +int ObMulValueIndexLookupOp::save_doc_id_and_rowkey() +{ + int ret = OB_SUCCESS; + + int64_t index_column_cnt = index_ctdef_->result_output_.count(); + ObObj *obj_ptr = nullptr; + + ObIAllocator &allocator = lookup_memctx_->get_arena_allocator(); + + if (OB_ISNULL(obj_ptr = static_cast(allocator.alloc(sizeof(ObObj) * index_column_cnt)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate buffer failed", K(ret), K(index_column_cnt)); + } else { + obj_ptr = new(obj_ptr) ObObj[index_column_cnt]; + } + + int64_t rowkey_null_count = 0; + + for (int64_t i = 0; OB_SUCC(ret) && i < index_column_cnt - 1; ++i) { + ObObj tmp_obj; + ObExpr *expr = index_ctdef_->result_output_.at(i); + if (T_PSEUDO_GROUP_ID == expr->type_) { + // do nothing + } else { + ObDatum &col_datum = expr->locate_expr_datum(*lookup_rtdef_->eval_ctx_); + if (OB_FAIL(col_datum.to_obj(tmp_obj, expr->obj_meta_, expr->obj_datum_map_))) { + LOG_WARN("convert datum to obj failed", K(ret)); + } else if (OB_FAIL(ob_write_obj(allocator, tmp_obj, obj_ptr[i]))) { + LOG_WARN("deep copy rowkey value failed", K(ret), K(tmp_obj)); + } else if (col_datum.is_null()) { + rowkey_null_count++; + } + } + } + + if (OB_FAIL(ret)) { + } else if (rowkey_null_count != index_column_cnt - 1) { + ++index_rowkey_cnt_; + ++lookup_rowkey_cnt_; + ObRowkey main_rowkey(obj_ptr, index_column_cnt - 1); + if (OB_FAIL(sorter_.add_item(main_rowkey))) { + LOG_WARN("filter mbr failed", K(ret)); + } + } else { + ++aux_key_count_; + ++lookup_rowkey_cnt_; + // last column is doc-id + int64_t doc_id_idx = index_column_cnt - 1; + ObExpr* doc_id_expr = index_ctdef_->result_output_.at(doc_id_idx); + ObDatum& doc_id_datum = doc_id_expr->locate_expr_datum(*lookup_rtdef_->eval_ctx_); + ObObj tmp_obj; + if (OB_FAIL(doc_id_datum.to_obj(tmp_obj, doc_id_expr->obj_meta_, doc_id_expr->obj_datum_map_))) { + LOG_WARN("convert datum to obj failed", K(ret)); + } else if (OB_FAIL(ob_write_obj(allocator, tmp_obj, obj_ptr[0]))) { + LOG_WARN("deep copy rowkey value failed", K(ret), K(tmp_obj)); + } else if (doc_id_datum.is_null()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("docid and rowkey can't both be null", K(ret)); + } else { + ObRowkey table_rowkey(obj_ptr, 1); + if (OB_FAIL(aux_sorter_.add_item(table_rowkey))) { + LOG_WARN("filter mbr failed", K(ret)); + } + } + } + + return ret; +} + +int ObMulValueIndexLookupOp::fetch_index_table_rowkey() +{ + int ret = OB_SUCCESS; + ObITabletScan &tsc_service = get_tsc_service(); + + if (OB_FAIL(init_sort())) { + LOG_WARN("fail to init sorter", K(ret)); + } else { + while (OB_SUCC(ret)) { + index_rtdef_->p_pd_expr_op_->clear_evaluated_flag(); + if (OB_FAIL(rowkey_iter_->get_next_row())) { + if (OB_ITER_END != ret) { + LOG_WARN("get next row from index scan failed", K(ret)); + } + } else if (OB_FAIL(save_doc_id_and_rowkey())) { + LOG_WARN("process data table rowkey with das failed", K(ret)); + } + } + } + return ret; +} + +int ObMulValueIndexLookupOp::save_aux_rowkeys() +{ + INIT_SUCC(ret); + + doc_id_scan_param_.key_ranges_.reset(); + const ObRowkey *idx_row = nullptr; + + int64_t simulate_batch_row_cnt = - EVENT_CALL(EventTable::EN_TABLE_LOOKUP_BATCH_ROW_COUNT); + int64_t default_row_batch_cnt = simulate_batch_row_cnt > 0 ? simulate_batch_row_cnt : MAX_NUM_PER_BATCH; + + if (OB_FAIL(aux_sorter_.do_sort(true))) { + LOG_WARN("do docid sort failed", K(ret)); + } + + for (int64_t i = 0; OB_SUCC(ret) && i < default_row_batch_cnt; ++i) { + if (OB_FAIL(aux_sorter_.get_next_item(idx_row))) { + if (OB_ITER_END != ret) { + LOG_WARN("fail to get next sorted item", K(ret), K(i)); + } else { + ret = OB_SUCCESS; + } + } else if (aux_last_rowkey_ != *idx_row) { + ObNewRange lookup_range; + uint64_t ref_table_id = doc_id_lookup_ctdef_->ref_table_id_; + if (OB_FAIL(lookup_range.build_range(ref_table_id, *idx_row))) { + LOG_WARN("build lookup range failed", K(ret), K(ref_table_id), K(*idx_row)); + } else if (OB_FAIL(doc_id_scan_param_.key_ranges_.push_back(lookup_range))) { + LOG_WARN("store lookup key range failed", K(ret), K(doc_id_scan_param_)); + } + aux_last_rowkey_ = *idx_row; + LOG_DEBUG("build data table range", K(ret), K(*idx_row), K(lookup_range), K(doc_id_scan_param_.key_ranges_.count())); + } + } + return ret; +} + +int ObMulValueIndexLookupOp::save_rowkeys() +{ + int ret = OB_SUCCESS; + ObStoreRowkey src_key; + const ObRowkey *idx_row = NULL; + int64_t simulate_batch_row_cnt = - EVENT_CALL(EventTable::EN_TABLE_LOOKUP_BATCH_ROW_COUNT); + int64_t default_row_batch_cnt = simulate_batch_row_cnt > 0 ? simulate_batch_row_cnt : MAX_NUM_PER_BATCH; + + if (OB_FAIL(sorter_.do_sort(true))) { + LOG_WARN("do rowkey sort failed", K(ret)); + } + + for (int64_t i = 0; OB_SUCC(ret) && i < default_row_batch_cnt; ++i) { + if (OB_FAIL(sorter_.get_next_item(idx_row))) { + if (ret == OB_ITER_END) { + ret = i > 0 ? OB_SUCCESS : ret; + } else if (OB_ITER_END != ret) { + LOG_WARN("fail to get next sorted item", K(ret), K(i)); + } + } else if (last_rowkey_ != *idx_row) { + int64_t group_idx = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < index_ctdef_->result_output_.count(); ++i) { + ObObj tmp_obj; + ObExpr *expr = index_ctdef_->result_output_.at(i); + if (T_PSEUDO_GROUP_ID == expr->type_) { + group_idx = expr->locate_expr_datum(*lookup_rtdef_->eval_ctx_).get_int(); + } + } + + ObNewRange lookup_range; + uint64_t ref_table_id = lookup_ctdef_->ref_table_id_; + if (OB_FAIL(lookup_range.build_range(ref_table_id, *idx_row))) { + LOG_WARN("build lookup range failed", K(ret), K(ref_table_id), K(*idx_row)); + } else if (FALSE_IT(lookup_range.group_idx_ = group_idx)) { + } else if (OB_FAIL(scan_param_.key_ranges_.push_back(lookup_range))) { + LOG_WARN("store lookup key range failed", K(ret), K(scan_param_)); + } + last_rowkey_ = *idx_row; + LOG_DEBUG("build data table range", K(ret), K(*idx_row), K(lookup_range), K(scan_param_.key_ranges_.count())); + } + } + return ret; +} + +int ObFullTextIndexLookupOp::set_lookup_doc_id_keys(const int64_t size) +{ + int ret = OB_SUCCESS; + ObEvalCtx::BatchInfoScopeGuard batch_info_guard(*retrieval_ctx_); + batch_info_guard.set_batch_size(size); + for (int64_t i = 0; OB_SUCC(ret) && i < size; ++i) { + batch_info_guard.set_batch_idx(i); + if (OB_FAIL(set_lookup_doc_id_key(doc_id_expr_, retrieval_ctx_))) { + LOG_WARN("failed to set lookup doc id key", K(ret)); + } + } + return ret; +} + +int ObFullTextIndexLookupOp::set_main_table_lookup_key() +{ + int ret = OB_SUCCESS; + int64_t rowkey_cnt = doc_id_lookup_ctdef_->result_output_.count(); + void *buf = nullptr; + ObObj *obj_ptr = nullptr; + common::ObArenaAllocator &lookup_alloc = lookup_memctx_->get_arena_allocator(); + ObNewRange lookup_range; + if (nullptr != doc_id_lookup_ctdef_->trans_info_expr_) { + rowkey_cnt = rowkey_cnt - 1; + } + + if (OB_UNLIKELY(rowkey_cnt <= 0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid rowkey cnt", K(ret), KPC(doc_id_lookup_ctdef_)); + } else if (OB_ISNULL(buf = lookup_alloc.alloc(sizeof(ObObj) * rowkey_cnt))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(rowkey_cnt)); + } else { + obj_ptr = new (buf) ObObj[rowkey_cnt]; + } + + for (int64_t i = 0; OB_SUCC(ret) && i < rowkey_cnt; ++i) { + ObObj tmp_obj; + ObExpr *expr = doc_id_lookup_ctdef_->result_output_.at(i); + if (T_PSEUDO_GROUP_ID == expr->type_) { + // do nothing + } else { + ObDatum &col_datum = expr->locate_expr_datum(*doc_id_lookup_rtdef_->eval_ctx_); + if (OB_FAIL(col_datum.to_obj(tmp_obj, expr->obj_meta_, expr->obj_datum_map_))) { + LOG_WARN("convert datum to obj failed", K(ret)); + } else if (OB_FAIL(ob_write_obj(lookup_alloc, tmp_obj, obj_ptr[i]))) { + LOG_WARN("deep copy rowkey value failed", K(ret), K(tmp_obj)); + } + } + } + + if (OB_SUCC(ret)) { + ObRowkey table_rowkey(obj_ptr, rowkey_cnt); + if (OB_FAIL(lookup_range.build_range(lookup_ctdef_->ref_table_id_, table_rowkey))) { + LOG_WARN("failed to build lookup range", K(ret), K(table_rowkey)); + } else if (OB_FAIL(scan_param_.key_ranges_.push_back(lookup_range))) { + LOG_WARN("store lookup key range failed", K(ret), K(scan_param_)); + } else { + LOG_DEBUG("get rowkey from docid rowkey table", K(ret), K(table_rowkey), K(lookup_range)); + } + } + return ret; +} + +int ObMulValueIndexLookupOp::get_aux_table_rowkey() +{ + INIT_SUCC(ret); + + if (OB_FAIL(fetch_rowkey_from_aux())) { + LOG_WARN("fetch rowkey from doc-rowkey table failed", K(ret)); + } else if (OB_FAIL(save_rowkeys())) { + LOG_WARN("store rowkeys failed", K(ret)); + } + + return ret; +} + + +int ObMulValueIndexLookupOp::fetch_rowkey_from_aux() +{ + INIT_SUCC(ret); + + ObITabletScan &tsc_service = get_tsc_service(); + ObNewRowIterator *&storage_iter = get_aux_lookup_iter(); + + if (aux_key_count_ == 0) { + //do nothing + } else if (storage_iter == nullptr) { + //first index lookup, init scan param and do table scan + if (OB_FAIL(set_doc_id_idx_lookup_param( + doc_id_lookup_ctdef_, doc_id_lookup_rtdef_, doc_id_scan_param_, doc_id_idx_tablet_id_, ls_id_))) { + LOG_WARN("failed to init doc id lookup scan param", K(ret)); + } else if (OB_FAIL(save_aux_rowkeys())) { + LOG_WARN("failed to save aux keys failed", K(ret)); + } else if (OB_FAIL(tsc_service.table_scan(doc_id_scan_param_, + storage_iter))) { + if (OB_SNAPSHOT_DISCARDED == ret && doc_id_scan_param_.fb_snapshot_.is_valid()) { + ret = OB_INVALID_QUERY_TIMESTAMP; + } else if (OB_TRY_LOCK_ROW_CONFLICT != ret) { + LOG_WARN("fail to scan table", K(doc_id_scan_param_), K(ret)); + } + } + } else { + const ObTabletID &storage_tablet_id = doc_id_scan_param_.tablet_id_; + doc_id_scan_param_.need_switch_param_ = (storage_tablet_id.is_valid() && storage_tablet_id != tablet_id_ ? true : false); + doc_id_scan_param_.tablet_id_ = tablet_id_; + doc_id_scan_param_.ls_id_ = ls_id_; + if (OB_FAIL(save_aux_rowkeys())) { + LOG_WARN("failed to save aux keys failed", K(ret)); + } else if (OB_FAIL(tsc_service.table_rescan(doc_id_scan_param_, storage_iter))) { + LOG_WARN("table_rescan scan iter failed", K(ret)); + } + } + + if (aux_key_count_ > 0) { + while (OB_SUCC(ret)) { + doc_id_lookup_rtdef_->p_pd_expr_op_->clear_evaluated_flag(); + if (OB_FAIL(storage_iter->get_next_row())) { + if (OB_ITER_END != ret) { + LOG_WARN("get next row from index scan failed", K(ret)); + } else { + ret = OB_SUCCESS; + break; + } + } else { + int64_t rowkey_colunmn_cnt = doc_id_lookup_ctdef_->result_output_.count(); + ObObj *obj_ptr = nullptr; + ObIAllocator &allocator = lookup_memctx_->get_arena_allocator(); + + if (OB_ISNULL(obj_ptr = static_cast(allocator.alloc(sizeof(ObObj) * rowkey_colunmn_cnt)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate buffer failed", K(ret), K(rowkey_colunmn_cnt)); + } else { + obj_ptr = new(obj_ptr) ObObj[rowkey_colunmn_cnt]; + } + + for (int64_t i = 0; OB_SUCC(ret) && i < rowkey_colunmn_cnt; ++i) { + ObObj tmp_obj; + ObExpr *expr = doc_id_lookup_ctdef_->result_output_.at(i); + if (T_PSEUDO_GROUP_ID == expr->type_) { + // do nothing + } else { + ObDatum &rowkey_datum = expr->locate_expr_datum(*doc_id_lookup_rtdef_->eval_ctx_); + if (OB_FAIL(rowkey_datum.to_obj(tmp_obj, expr->obj_meta_, expr->obj_datum_map_))) { + LOG_WARN("convert datum to obj failed", K(ret)); + } else if (OB_FAIL(ob_write_obj(allocator, tmp_obj, obj_ptr[i]))) { + LOG_WARN("deep copy rowkey value failed", K(ret), K(tmp_obj)); + } + } + } + if (OB_SUCC(ret)) { + ObRowkey table_rowkey(obj_ptr, rowkey_colunmn_cnt); + if (OB_FAIL(sorter_.add_item(table_rowkey))) { + LOG_WARN("filter mbr failed", K(ret)); + } else { + LOG_TRACE("add rowkey success", K(table_rowkey), K(obj_ptr), K(obj_ptr[0]), K(rowkey_colunmn_cnt)); + } + } + } + } + } + + return ret; +} + +} // namespace sql +} // namespace oceanbase diff --git a/src/sql/das/ob_domain_index_lookup_op.h b/src/sql/das/ob_domain_index_lookup_op.h new file mode 100644 index 0000000000..cdb4417a53 --- /dev/null +++ b/src/sql/das/ob_domain_index_lookup_op.h @@ -0,0 +1,205 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ +#ifndef OBDEV_SRC_SQL_DAS_OB_DOMAIN_INDEX_LOOKUP_OP_H_ +#define OBDEV_SRC_SQL_DAS_OB_DOMAIN_INDEX_LOOKUP_OP_H_ +#include "sql/das/ob_das_scan_op.h" +#include "storage/ob_store_row_comparer.h" +#include "storage/ob_parallel_external_sort.h" +#include "storage/tx_storage/ob_access_service.h" +namespace oceanbase +{ +namespace sql +{ + +class ObDomainRowkeyComp { +public: + ObDomainRowkeyComp(int &sort_ret) : result_code_(sort_ret) {} + + bool operator()(const ObRowkey *left, const ObRowkey *right) + { + bool bool_ret = false; + if (OB_UNLIKELY(common::OB_SUCCESS != result_code_)) { + //do nothing + } else if (OB_UNLIKELY(NULL == left) + || OB_UNLIKELY(NULL == right)) { + result_code_ = common::OB_INVALID_ARGUMENT; + LOG_WARN_RET(result_code_, "Invaid argument, ", KP(left), KP(right), K_(result_code)); + } else { + bool_ret = (*left) < (*right); + } + return bool_ret; + } + + int &result_code_; +}; + +class ObDomainIndexLookupOp : public ObLocalIndexLookupOp +{ +public: + ObDomainIndexLookupOp(ObIAllocator &allocator) : + ObLocalIndexLookupOp(), + allocator_(&allocator), + doc_id_scan_param_(), + doc_id_lookup_ctdef_(nullptr), + doc_id_lookup_rtdef_(nullptr), + doc_id_idx_tablet_id_(), + doc_id_expr_(nullptr), + doc_id_key_obj_(), + cmp_ret_(0), + comparer_(cmp_ret_), + sorter_(allocator), + need_scan_aux_(false) {} + + virtual ~ObDomainIndexLookupOp() {} + + int init(const ObDASScanCtDef *lookup_ctdef, + ObDASScanRtDef *lookup_rtdef, + const ObDASScanCtDef *index_ctdef, + ObDASScanRtDef *index_rtdef, + const ObDASScanCtDef *doc_id_lookup_ctdef, + ObDASScanRtDef *doc_id_lookup_rtdef, + transaction::ObTxDesc *tx_desc, + transaction::ObTxReadSnapshot *snapshot, + storage::ObTableScanParam &scan_param); + + virtual int get_next_row() override; + virtual int get_next_rows(int64_t &count, int64_t capacity) override; + void set_doc_id_idx_tablet_id(const ObTabletID &doc_id_idx_tablet_id) + { doc_id_idx_tablet_id_ = doc_id_idx_tablet_id; } + + virtual int revert_iter() override; + virtual int reuse_scan_iter(); + ObITabletScan& get_tsc_service() { return *(MTL(ObAccessService *)); } +protected: + virtual int init_scan_param() override { return ObLocalIndexLookupOp::init_scan_param(); } +protected: + virtual int reset_lookup_state() override; + virtual int next_state(); + virtual int init_sort() { return OB_SUCCESS; } + // get index table rowkey, add rowkey as scan parameter of maintable / auxiliary lookup on demand + virtual int fetch_index_table_rowkey() { return OB_NOT_IMPLEMENT; } + virtual int fetch_index_table_rowkeys(int64_t &count, const int64_t capacity) { return OB_NOT_IMPLEMENT; } + // get maintable rowkey for index lookup by from auxiliary index table on demand; + virtual int get_aux_table_rowkey() { return OB_NOT_IMPLEMENT; } + virtual int get_aux_table_rowkeys(const int64_t lookup_row_cnt) { return OB_NOT_IMPLEMENT; } + + virtual int do_aux_table_lookup() { return OB_SUCCESS; } + virtual void do_clear_evaluated_flag(); + virtual int set_lookup_doc_id_key(ObExpr *doc_id_expr, ObEvalCtx *eval_ctx_); + int set_doc_id_idx_lookup_param( + const ObDASScanCtDef *aux_lookup_ctdef, + ObDASScanRtDef *aux_lookup_rtdef, + storage::ObTableScanParam& aux_scan_param, + common::ObTabletID tablet_id_, + share::ObLSID ls_id_); + +protected: + ObIAllocator *allocator_; + storage::ObTableScanParam doc_id_scan_param_; + const ObDASScanCtDef *doc_id_lookup_ctdef_; + ObDASScanRtDef *doc_id_lookup_rtdef_; + ObTabletID doc_id_idx_tablet_id_; + ObExpr *doc_id_expr_; + ObObj doc_id_key_obj_; + + int cmp_ret_; + ObDomainRowkeyComp comparer_; + ObExternalSort sorter_; // use ObRowKeyCompare to compare rowkey + bool need_scan_aux_; + + static const int64_t SORT_MEMORY_LIMIT = 32L * 1024L * 1024L; + static const int64_t MAX_NUM_PER_BATCH = 1000; +}; + +class ObFullTextIndexLookupOp : public ObDomainIndexLookupOp +{ +public: + explicit ObFullTextIndexLookupOp(ObIAllocator &allocator) + : ObDomainIndexLookupOp(allocator), + text_retrieval_iter_(nullptr), + retrieval_ctx_(nullptr) {} + + virtual ~ObFullTextIndexLookupOp() {} + + int init(const ObDASBaseCtDef *table_lookup_ctdef, + ObDASBaseRtDef *table_lookup_rtdef, + transaction::ObTxDesc *tx_desc, + transaction::ObTxReadSnapshot *snapshot, + storage::ObTableScanParam &scan_param); + void set_text_retrieval_iter(common::ObNewRowIterator *text_retrieval_iter) + { + text_retrieval_iter_ = text_retrieval_iter; + } + common::ObNewRowIterator *get_text_retrieval_iter() { return text_retrieval_iter_; } + virtual int reset_lookup_state() override; + virtual int do_aux_table_lookup(); + virtual int revert_iter() override; + virtual void do_clear_evaluated_flag() override; +protected: + virtual int fetch_index_table_rowkey() override; + virtual int fetch_index_table_rowkeys(int64_t &count, const int64_t capacity) override; + virtual int get_aux_table_rowkey() override; + virtual int get_aux_table_rowkeys(const int64_t lookup_row_cnt) override; +private: + int set_lookup_doc_id_keys(const int64_t size); + int set_main_table_lookup_key(); +private: + ObNewRowIterator *text_retrieval_iter_; + ObEvalCtx *retrieval_ctx_; +}; + +class ObMulValueIndexLookupOp : public ObDomainIndexLookupOp +{ +public: + explicit ObMulValueIndexLookupOp(ObIAllocator &allocator) + : ObDomainIndexLookupOp(allocator), + aux_cmp_ret_(0), + aux_key_count_(0), + index_rowkey_cnt_(0), + aux_comparer_(aux_cmp_ret_), + aux_sorter_(allocator), + aux_lookup_iter_(nullptr), + last_rowkey_(), + aux_last_rowkey_() {} + + virtual ~ObMulValueIndexLookupOp() {} + virtual void do_clear_evaluated_flag() override; + int init(const ObDASBaseCtDef *table_lookup_ctdef, + ObDASBaseRtDef *table_lookup_rtdef, + transaction::ObTxDesc *tx_desc, + transaction::ObTxReadSnapshot *snapshot, + storage::ObTableScanParam &scan_param); +protected: + virtual int init_scan_param() override; +protected: + virtual int fetch_index_table_rowkey(); + int init_sort(); + int save_aux_rowkeys(); + int save_rowkeys(); + int save_doc_id_and_rowkey(); + int fetch_rowkey_from_aux(); + virtual int get_aux_table_rowkey() override; + ObNewRowIterator*& get_aux_lookup_iter() { return aux_lookup_iter_; } +private: + int aux_cmp_ret_; + uint32_t aux_key_count_; + int index_rowkey_cnt_; + ObDomainRowkeyComp aux_comparer_; + ObExternalSort aux_sorter_; + common::ObNewRowIterator *aux_lookup_iter_; + ObRowkey last_rowkey_; + ObRowkey aux_last_rowkey_; +}; + +} // namespace sql +} // namespace oceanbase +#endif /* OBDEV_SRC_SQL_DAS_OB_DOMAIN_INDEX_LOOKUP_OP_H_ */ diff --git a/src/sql/das/ob_text_retrieval_op.cpp b/src/sql/das/ob_text_retrieval_op.cpp index 937da52292..e2cff3832c 100644 --- a/src/sql/das/ob_text_retrieval_op.cpp +++ b/src/sql/das/ob_text_retrieval_op.cpp @@ -12,153 +12,974 @@ #define USING_LOG_PREFIX SQL_DAS #include "sql/das/ob_text_retrieval_op.h" +#include "share/text_analysis/ob_text_analyzer.h" +#include "storage/fts/ob_fts_plugin_helper.h" +#include "storage/tx_storage/ob_access_service.h" namespace oceanbase { namespace sql { +OB_SERIALIZE_MEMBER((ObDASIRScanCtDef, ObDASAttachCtDef), + flags_, + search_text_, + inv_scan_doc_id_col_, + inv_scan_doc_length_col_, + match_filter_, + relevance_expr_, + relevance_proj_col_, + estimated_total_doc_cnt_); -OB_DEF_SERIALIZE_SIZE(ObDASIRCtDef) +OB_SERIALIZE_MEMBER(ObDASIRScanRtDef); + +OB_SERIALIZE_MEMBER((ObDASIRAuxLookupCtDef, ObDASAttachCtDef), + relevance_proj_col_); + +OB_SERIALIZE_MEMBER((ObDASIRAuxLookupRtDef, ObDASAttachRtDef)); + +ObIRIterLoserTreeItem::ObIRIterLoserTreeItem() + : relevance_(0), doc_id_(), iter_idx_(-1) { - int64_t len = 0; - LST_DO_CODE(OB_UNIS_ADD_LEN, - inv_idx_scan_ctdef_, - search_text_, - inv_scan_doc_id_col_, - match_filter_, - need_relevance_); - if (need_relevance_) { - if (OB_NOT_NULL(inv_idx_agg_ctdef_) - && OB_NOT_NULL(fwd_idx_scan_ctdef_) - && OB_NOT_NULL(doc_id_idx_whole_agg_ctdef_)) { - LST_DO_CODE(OB_UNIS_ADD_LEN, - relevance_expr_, - relevance_proj_col_, - *inv_idx_agg_ctdef_, - *doc_id_idx_whole_agg_ctdef_, - *fwd_idx_scan_ctdef_); +} + +ObIRIterLoserTreeCmp::ObIRIterLoserTreeCmp() + : cmp_func_(), is_inited_(false) +{ +} + +ObIRIterLoserTreeCmp::~ObIRIterLoserTreeCmp() +{ +} + +int ObIRIterLoserTreeCmp::init() +{ + int ret = OB_SUCCESS; + sql::ObExprBasicFuncs *basic_funcs = ObDatumFuncs::get_basic_func(ObVarcharType, CS_TYPE_BINARY); + cmp_func_ = lib::is_oracle_mode() ? basic_funcs->null_last_cmp_ : basic_funcs->null_first_cmp_; + if (OB_ISNULL(cmp_func_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to init IRIterLoserTreeCmp", K(ret)); + } else { + is_inited_ = true; + } + return ret; +} + +int ObIRIterLoserTreeCmp::cmp( + const ObIRIterLoserTreeItem &l, + const ObIRIterLoserTreeItem &r, + int64_t &cmp_ret) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not inited", K(ret)); + } else { + ObDatum l_datum; + ObDatum r_datum; + l_datum.set_string(l.doc_id_.get_string()); + r_datum.set_string(r.doc_id_.get_string()); + int tmp_ret = 0; + if (OB_FAIL(cmp_func_(l_datum, r_datum, tmp_ret))) { + LOG_WARN("failed to compare doc id by datum", K(ret)); + } else { + cmp_ret = tmp_ret; + } + } + return ret; +} + +ObTextRetrievalMerge::ObTextRetrievalMerge() + : common::ObNewRowIterator(), + relation_type_(MAX_RELATION_TYPE), + processing_type_(MAX_PROC_TYPE), + allocator_(nullptr), + retrieval_param_(), + query_tokens_(), + loser_tree_cmp_(), + iter_row_heap_(nullptr), + next_batch_iter_idxes_(), + next_batch_cnt_(0), + whole_doc_cnt_iter_(nullptr), + whole_doc_agg_param_(), + doc_cnt_calculated_(false), + is_inited_(false) +{ +} + +ObTextRetrievalMerge::~ObTextRetrievalMerge() +{ + reset(); + ObNewRowIterator::~ObNewRowIterator(); +} + +int ObTextRetrievalMerge::init( + const share::ObLSID &ls_id, + const ObTabletID &inv_idx_tablet_id, + const ObTabletID &fwd_idx_tablet_id, + const ObTabletID &doc_id_idx_tablet_id, + const ObDASIRScanCtDef *ir_ctdef, + ObDASIRScanRtDef *ir_rtdef, + transaction::ObTxDesc *tx_desc, + transaction::ObTxReadSnapshot *snapshot, + ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("double initialization", K(ret)); + } else { + relation_type_ = TokenRelationType::DISJUNCTIVE; + processing_type_ = RetrievalProcType::DAAT; + allocator_ = &allocator; + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(loser_tree_cmp_.init())) { + LOG_WARN("failed to init loser tree comparator", K(ret)); + } else if (OB_ISNULL(iter_row_heap_ = OB_NEWx(ObIRIterLoserTree, allocator_, loser_tree_cmp_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate loser tree", K(ret)); + } else if (OB_FAIL(init_iter_params(ls_id, inv_idx_tablet_id, fwd_idx_tablet_id, doc_id_idx_tablet_id, ir_ctdef, ir_rtdef))) { + LOG_WARN("failed to init iter params", K(ret)); + } else if (0 == query_tokens_.count()) { + // empty token set + LOG_DEBUG("empty query token set after tokenization", K(ret), KPC(ir_ctdef)); + is_inited_ = true; + } else if (OB_UNLIKELY(query_tokens_.count() > OB_MAX_TEXT_RETRIEVAL_TOKEN_CNT)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("too many query tokens in a single query not supported", K(ret), K_(query_tokens)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "text retrieval query with token count exceed limit"); + } else if (OB_FAIL(iter_row_heap_->init(query_tokens_.count(), *allocator_))) { + LOG_WARN("failed to init iter loser tree", K(ret)); + } else if (OB_FAIL(init_iters(tx_desc, snapshot, query_tokens_))) { + LOG_WARN("failed to init iterators", K(ret), K_(query_tokens)); + } else if (OB_FAIL(init_total_doc_cnt_param(tx_desc, snapshot))) { + LOG_WARN("failed to do total doc cnt", K(ret)); + } else { + is_inited_ = true; + } + LOG_DEBUG("init text retrieval op", K(ret), K_(retrieval_param)); + } + + return ret; +} + +int ObTextRetrievalMerge::rescan( + const share::ObLSID &ls_id, + const ObTabletID &inv_idx_tablet_id, + const ObTabletID &fwd_idx_tablet_id, + const ObTabletID &doc_id_idx_tablet_id, + const ObDASIRScanCtDef *ir_ctdef, + ObDASIRScanRtDef *ir_rtdef, + transaction::ObTxDesc *tx_desc, + transaction::ObTxReadSnapshot *snapshot, + ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + // TODO: opt rescan + reset(); + if (OB_FAIL(init(ls_id, + inv_idx_tablet_id, + fwd_idx_tablet_id, + doc_id_idx_tablet_id, + ir_ctdef, + ir_rtdef, + tx_desc, + snapshot, + allocator))) { + LOG_WARN("failed to re init", K(ret)); + } + return ret; +} + +void ObTextRetrievalMerge::reset() +{ + query_tokens_.reset(); + if (nullptr != iter_row_heap_) { + iter_row_heap_->~ObIRIterLoserTree(); + iter_row_heap_ = nullptr; + } + release_iters(); + next_batch_iter_idxes_.reset(); + next_batch_cnt_ = 0; + allocator_ = nullptr; + doc_cnt_calculated_ = false; + is_inited_ = false; +} + +int ObTextRetrievalMerge::get_next_row(ObNewRow *&row) +{ + int ret = OB_SUCCESS; + + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not inited", K(ret)); + } else if (0 == query_tokens_.count()) { + ret = OB_ITER_END; + } else if (!doc_cnt_calculated_) { + if (OB_FAIL(do_total_doc_cnt())) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to do total document count", K(ret), K_(retrieval_param)); } + } else { + doc_cnt_calculated_ = true; + } } - return len; + + bool got_valid_document = false; + ObExpr *match_filter = retrieval_param_.get_ir_ctdef()->match_filter_; + ObDatum *filter_res = nullptr; + while (OB_SUCC(ret) && !got_valid_document) { + clear_evaluated_infos(); + if (OB_FAIL(pull_next_batch_rows())) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to pull next batch rows from iterator", K(ret)); + } + } else if (OB_FAIL(next_disjunctive_document())) { + LOG_WARN("failed to get next document with disjunctive tokens", K(ret)); + } else if (OB_ISNULL(match_filter)) { + got_valid_document = true; + } else if (OB_FAIL(match_filter->eval(*retrieval_param_.get_ir_rtdef()->eval_ctx_, filter_res))) { + LOG_WARN("failed to evaluate match filter", K(ret)); + } else { + got_valid_document = !(filter_res->is_null() || 0 == filter_res->get_int()); + } + } + + return ret; } -OB_DEF_SERIALIZE(ObDASIRCtDef) +int ObTextRetrievalMerge::get_next_rows(int64_t &count, int64_t capacity) +{ + // only one row at a time + // TODO: support batch vectorized execution later + int ret = OB_SUCCESS; + if (OB_FAIL(get_next_row())) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to get next row", K(ret)); + } + } else { + count += 1; + } + return ret; +} + +int ObTextRetrievalMerge::init_iter_params( + const share::ObLSID &ls_id, + const ObTabletID &inv_idx_tablet_id, + const ObTabletID &fwd_idx_tablet_id, + const ObTabletID &doc_id_idx_tablet_id, + const ObDASIRScanCtDef *ir_ctdef, + ObDASIRScanRtDef *ir_rtdef) { int ret = OB_SUCCESS; - LST_DO_CODE(OB_UNIS_ENCODE, - inv_idx_scan_ctdef_, - search_text_, - inv_scan_doc_id_col_, - match_filter_, - need_relevance_); - if (OB_SUCC(ret) && need_relevance_) { - if (OB_ISNULL(inv_idx_agg_ctdef_) - || OB_ISNULL(fwd_idx_scan_ctdef_) - || OB_ISNULL(doc_id_idx_whole_agg_ctdef_)) { + if (OB_FAIL(init_query_tokens(ir_ctdef, ir_rtdef))) { + LOG_WARN("failed to init query tokens", K(ret)); + } else { + retrieval_param_.set_param( + ls_id, inv_idx_tablet_id, fwd_idx_tablet_id, doc_id_idx_tablet_id, ir_ctdef, ir_rtdef); + } + return ret; +} + +int ObTextRetrievalMerge::init_query_tokens(const ObDASIRScanCtDef *ir_ctdef, ObDASIRScanRtDef *ir_rtdef) +{ + int ret = OB_SUCCESS; + ObExpr *search_text = ir_ctdef->search_text_; + ObEvalCtx *eval_ctx = ir_rtdef->eval_ctx_; + ObDatum *search_text_datum = nullptr; + if (OB_ISNULL(search_text) || OB_ISNULL(eval_ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(ret), KP(search_text), KP(eval_ctx)); + } else if (OB_FAIL(search_text->eval(*eval_ctx, search_text_datum))) { + LOG_WARN("expr evaluation failed", K(ret)); + } else if (0 == search_text_datum->len_) { + // empty query text + } else { + // TODO: FTParseHelper currently does not support deduplicate tokens + // We should abstract such universal analyse functors into utility structs + const ObString &search_text_string = search_text_datum->get_string(); + const ObString &parser_name = ir_ctdef->get_inv_idx_scan_ctdef()->table_param_.get_parser_name(); + const ObCollationType &cs_type = search_text->datum_meta_.cs_type_; + int64_t doc_length = 0; + storage::ObFTParseHelper tokenize_helper; + common::ObSEArray tokens; + hash::ObHashMap token_map; + const int64_t ft_word_bkt_cnt = MAX(search_text_string.length() / 10, 2); + if (OB_FAIL(tokenize_helper.init(allocator_, parser_name))) { + LOG_WARN("failed to init tokenize helper", K(ret)); + } else if (OB_FAIL(token_map.create(ft_word_bkt_cnt, common::ObMemAttr(MTL_ID(), "FTWordMap")))) { + LOG_WARN("failed to create token map", K(ret)); + } else if (OB_FAIL(tokenize_helper.segment( + cs_type, search_text_string.ptr(), search_text_string.length(), doc_length, tokens))) { + LOG_WARN("failed to segment"); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < tokens.count(); ++i) { + const ObFTWord &token = tokens.at(i); + int64_t word_count = 0; + if (OB_FAIL(token_map.get_refactored(token, word_count)) && OB_HASH_NOT_EXIST != ret) { + LOG_WARN("fail to get ft word", K(ret), K(token)); + } else { + word_count = OB_HASH_NOT_EXIST == ret ? 1 : ++word_count; + if (OB_FAIL(token_map.set_refactored(token, word_count, 1/*overwrite*/))) { + LOG_WARN("fail to set ft word and count", K(ret), K(token)); + } + } + } + for (hash::ObHashMap::const_iterator iter = token_map.begin(); + OB_SUCC(ret) && iter != token_map.end(); + ++iter) { + const ObFTWord &token = iter->first; + ObString token_string; + if (OB_FAIL(ob_write_string(*allocator_, token.get_word(), token_string))) { + LOG_WARN("failed to deep copy query token", K(ret)); + } else if (OB_FAIL(query_tokens_.push_back(token_string))) { + LOG_WARN("failed to append query token", K(ret)); + } + } + } + +// TODO: try use this interface instead +/* + share::ObITokenStream *token_stream = nullptr; + share::ObTextAnalysisCtx query_analysis_ctx; + query_analysis_ctx.need_grouping_ = true; + query_analysis_ctx.filter_stopword_ = true; + query_analysis_ctx.cs_ = common::ObCharset::get_charset(search_text->obj_meta_.get_collation_type()); + share::ObEnglishTextAnalyzer query_analyzer; + if (OB_FAIL(query_analyzer.init(query_analysis_ctx, token_analyze_alloc))) { + LOG_WARN("failed to init query text analyzer", K(ret)); + } else if (OB_FAIL(query_analyzer.analyze(*search_text_datum, token_stream))) { + LOG_WARN("failed to analyze search text", K(ret), K(query_analysis_ctx), KPC(search_text_datum)); + } + while (OB_SUCC(ret)) { + ObDatum token; + ObString token_string; + if (OB_FAIL(token_stream->get_next(token))) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to get next query token", K(ret)); + } + } else if (OB_FAIL(ob_write_string(token_analyze_alloc, token.get_string(), token_string))) { + LOG_WARN("failed to deep copy query token", K(ret)); + } else if (OB_FAIL(query_tokens_.push_back(token_string))) { + LOG_WARN("failed to append query token", K(ret)); + } + } + + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to init query tokens", K(ret)); + } else { + ret = OB_SUCCESS; + } +*/ + + LOG_DEBUG("tokenized text query:", K(ret), KPC(search_text_datum), K_(query_tokens)); + } + return ret; +} + +int ObTextRetrievalMerge::init_iters( + transaction::ObTxDesc *tx_desc, + transaction::ObTxReadSnapshot *snapshot, + const ObIArray &query_tokens) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(0 == query_tokens.count() || query_tokens.count() > OB_MAX_TEXT_RETRIEVAL_TOKEN_CNT)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid empty query tokens", K(ret), K(query_tokens.count()), K(query_tokens)); + } else if (FALSE_IT(next_batch_iter_idxes_.set_allocator(allocator_))) { + } else if (OB_FAIL(next_batch_iter_idxes_.init(query_tokens_.count()))) { + LOG_WARN("failed to init next batch iter idxes array", K(ret)); + } else { + next_batch_cnt_ = query_tokens.count(); + } + for (int64_t i = 0; OB_SUCC(ret) && i < query_tokens.count(); ++i) { + const ObString &query_token = query_tokens.at(i); + storage::ObTextRetrievalIterator *iter = nullptr; + if (OB_ISNULL(iter = OB_NEWx(storage::ObTextRetrievalIterator, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory for text retrieval iterator", K(ret)); + } else if (OB_FAIL(iter->init(retrieval_param_, query_token, tx_desc, snapshot))) { + LOG_WARN("failed to init one single text ir iterator", K(ret), K(i), K(query_token)); + } else if (OB_FAIL(token_iters_.push_back(iter))) { + LOG_WARN("failed to append token iter to array", K(ret)); + } else { + next_batch_iter_idxes_[i] = i; + } + + if (OB_FAIL(ret)) { + if (nullptr != iter) { + iter->~ObTextRetrievalIterator(); + allocator_->free(iter); + } + } + } + + if (OB_FAIL(ret)) { + release_iters(); + } + return ret; +} + +void ObTextRetrievalMerge::release_iters() +{ + int ret = OB_SUCCESS; + if (nullptr != allocator_) { + for (int64_t i = 0; i < token_iters_.count(); ++i) { + storage::ObTextRetrievalIterator *iter = token_iters_.at(i); + if (nullptr != iter) { + iter->reset(); + iter->~ObTextRetrievalIterator(); + allocator_->free(iter); + } + } + token_iters_.reset(); + if (nullptr != whole_doc_cnt_iter_) { + ObITabletScan *tsc_service = MTL(ObAccessService *); + if (nullptr != tsc_service) { + if (OB_FAIL(tsc_service->revert_scan_iter(whole_doc_cnt_iter_))) { + LOG_WARN("failed to revert scan iter", K(ret)); + } + whole_doc_cnt_iter_ = nullptr; + } + } + whole_doc_agg_param_.need_switch_param_ = false; + whole_doc_agg_param_.destroy(); + } +} + +int ObTextRetrievalMerge::pull_next_batch_rows() +{ + int ret = OB_SUCCESS; + ObIRIterLoserTreeItem item; + for (int64_t i = 0; OB_SUCC(ret) && i < next_batch_cnt_; ++i) { + const int64_t iter_idx = next_batch_iter_idxes_[i]; + storage::ObTextRetrievalIterator *iter = nullptr; + if (OB_ISNULL(iter = token_iters_.at(iter_idx))) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected", K(ret), KP_(inv_idx_agg_ctdef), KP_(fwd_idx_scan_ctdef), KP_(doc_id_idx_whole_agg_ctdef)); + LOG_WARN("unexpected null token iter ptr", K(ret), K(iter_idx), K(token_iters_.count())); + } else if (OB_FAIL(iter->get_next_row())) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to get pull next batch rows from iterator", K(ret)); + } else { + ret = OB_SUCCESS; + } + } else if (OB_FAIL(fill_loser_tree_item(*iter, iter_idx, item))) { + LOG_WARN("fail to fill loser tree item", K(ret)); + } else if (OB_FAIL(iter_row_heap_->push(item))) { + LOG_WARN("fail to push item to loser tree", K(ret)); + } + } + + if (OB_SUCC(ret)) { + if (iter_row_heap_->empty()) { + ret = OB_ITER_END; + } else if (OB_FAIL(iter_row_heap_->rebuild())) { + LOG_WARN("fail to rebuild loser tree", K(ret), K_(next_batch_cnt)); } else { - LST_DO_CODE(OB_UNIS_ENCODE, - relevance_expr_, - relevance_proj_col_, - *inv_idx_agg_ctdef_, - *doc_id_idx_whole_agg_ctdef_, - *fwd_idx_scan_ctdef_); + next_batch_cnt_ = 0; } } return ret; } -OB_DEF_DESERIALIZE(ObDASIRCtDef) +int ObTextRetrievalMerge::next_disjunctive_document() { int ret = OB_SUCCESS; - need_relevance_ = false; - LST_DO_CODE(OB_UNIS_DECODE, - inv_idx_scan_ctdef_, - search_text_, - inv_scan_doc_id_col_, - match_filter_, - need_relevance_); - if (OB_SUCC(ret) && need_relevance_) { - if (OB_ISNULL(inv_idx_agg_ctdef_ = OB_NEWx(ObDASScanCtDef, &allocator_, allocator_))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to allocate memory for ctdef", K(ret)); - } else if (OB_ISNULL(fwd_idx_scan_ctdef_ = OB_NEWx(ObDASScanCtDef, &allocator_, allocator_))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to allocate memory for ctdef", K(ret)); - } else if (OB_ISNULL(doc_id_idx_whole_agg_ctdef_ = OB_NEWx(ObDASScanCtDef, &allocator_, allocator_))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to allocate memory for ctdef", K(ret)); - } else { - LST_DO_CODE(OB_UNIS_DECODE, - relevance_expr_, - relevance_proj_col_, - *inv_idx_agg_ctdef_, - *doc_id_idx_whole_agg_ctdef_, - *fwd_idx_scan_ctdef_); + int64_t doc_cnt = 0; + bool curr_doc_end = false; + const ObIRIterLoserTreeItem *top_item = nullptr; + // Do we need to use ObExpr to collect relevance? + double cur_doc_relevance = 0.0; + while (OB_SUCC(ret) && !iter_row_heap_->empty() && !curr_doc_end) { + if (iter_row_heap_->is_unique_champion()) { + curr_doc_end = true; } + if (OB_FAIL(iter_row_heap_->top(top_item))) { + LOG_WARN("failed to get top item from heap", K(ret)); + } else { + // consider to add an expr for collectiong conjunction result between query tokens here? + cur_doc_relevance += top_item->relevance_; + next_batch_iter_idxes_[next_batch_cnt_++] = top_item->iter_idx_; + if (OB_FAIL(iter_row_heap_->pop())) { + LOG_WARN("failed to pop top item in heap", K(ret)); + } + } + } + + if (OB_SUCC(ret) && retrieval_param_.get_ir_ctdef()->need_proj_relevance_score()) { + if (OB_FAIL(project_result(*top_item, cur_doc_relevance))) { + LOG_WARN("failed to project relevance", K(ret)); + } + } + + return ret; +} + +int ObTextRetrievalMerge::project_result(const ObIRIterLoserTreeItem &item, const double relevance) +{ + int ret = OB_SUCCESS; + ObExpr *relevance_proj_col = retrieval_param_.get_ir_ctdef()->relevance_proj_col_; + // TODO: usage of doc id column is somehow weird here, since in single token retrieval iterators, + // we use doc id expr to scan doc_id column for scan document. But here after DaaT processing, we use this expr + // to record current disjunctive documents. Though current implementation can make sure lifetime is + // safe, but it's tricky and indirect to read. + // P.S we cannot allocate multiple doc id expr at cg for every query token since tokenization now is an runtime operation + ObExpr *doc_id_col = retrieval_param_.get_ir_ctdef()->inv_scan_doc_id_col_; + ObEvalCtx *eval_ctx = retrieval_param_.get_ir_rtdef()->eval_ctx_; + if (OB_ISNULL(relevance_proj_col) || OB_ISNULL(doc_id_col) || OB_ISNULL(eval_ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr to relevance proejction column", + K(ret), KP(relevance_proj_col), KP(doc_id_col), KP(eval_ctx)); + } else { + ObDatum &relevance_proj_datum = relevance_proj_col->locate_datum_for_write(*eval_ctx); + ObDatum &doc_id_proj_datum = doc_id_col->locate_datum_for_write(*eval_ctx); + relevance_proj_datum.set_double(relevance); + doc_id_proj_datum.set_string(item.doc_id_.get_string()); + LOG_DEBUG("project one fulltext search result", K(ret), K(item)); } return ret; } -OB_DEF_SERIALIZE_SIZE(ObDASIRRtDef) -{ - int64_t len = 0; - LST_DO_CODE(OB_UNIS_ADD_LEN, - inv_idx_scan_rtdef_, - need_relevance_); - if (need_relevance_) { - if (OB_NOT_NULL(inv_idx_agg_rtdef_) && OB_NOT_NULL(doc_id_idx_whole_agg_rtdef_) && OB_NOT_NULL(fwd_idx_rtdef_)) { - LST_DO_CODE(OB_UNIS_ADD_LEN, - *inv_idx_agg_rtdef_, - *doc_id_idx_whole_agg_rtdef_, - *fwd_idx_rtdef_); - } - } - return len; -} - -OB_DEF_SERIALIZE(ObDASIRRtDef) +int ObTextRetrievalMerge::fill_loser_tree_item( + storage::ObTextRetrievalIterator &iter, + const int64_t iter_idx, + ObIRIterLoserTreeItem &item) { int ret = OB_SUCCESS; - LST_DO_CODE(OB_UNIS_ENCODE, - inv_idx_scan_rtdef_, - need_relevance_); - if (OB_SUCC(ret) && need_relevance_) { - if (OB_ISNULL(inv_idx_agg_rtdef_) || OB_ISNULL(doc_id_idx_whole_agg_rtdef_) || OB_ISNULL(fwd_idx_rtdef_)) { + item.iter_idx_ = iter_idx; + ObExpr *relevance_expr = retrieval_param_.get_ir_ctdef()->relevance_expr_; + ObExpr *doc_id_expr = retrieval_param_.get_ir_ctdef()->inv_scan_doc_id_col_; + const ObDatum &relevance_datum = relevance_expr->locate_expr_datum(*retrieval_param_.get_ir_rtdef()->eval_ctx_); + const ObDatum &doc_id_datum = doc_id_expr->locate_expr_datum(*retrieval_param_.get_ir_rtdef()->eval_ctx_); + item.relevance_ = relevance_datum.get_double(); + if (OB_FAIL(item.doc_id_.from_string(doc_id_datum.get_string()))) { + LOG_WARN("failed to get ObDocId from string", K(ret)); + } + return ret; +} + +int ObTextRetrievalMerge::init_total_doc_cnt_param( + transaction::ObTxDesc *tx_desc, + transaction::ObTxReadSnapshot *snapshot) +{ + int ret = OB_SUCCESS; + const ObDASScanCtDef *ctdef = retrieval_param_.get_doc_id_idx_agg_ctdef(); + ObDASScanRtDef *rtdef = retrieval_param_.get_ir_rtdef()->get_doc_id_idx_agg_rtdef(); + if (OB_ISNULL(ctdef) || OB_ISNULL(rtdef)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected scan descriptor", K(ret)); + } else { + ObTableScanParam &scan_param = whole_doc_agg_param_; + scan_param.tenant_id_ = MTL_ID(); + scan_param.tx_lock_timeout_ = rtdef->tx_lock_timeout_; + scan_param.index_id_ = ctdef->ref_table_id_; + scan_param.is_get_ = false; // scan + scan_param.is_for_foreign_check_ = false; + scan_param.timeout_ = rtdef->timeout_ts_; + scan_param.scan_flag_ = rtdef->scan_flag_; + scan_param.reserved_cell_count_ = ctdef->access_column_ids_.count(); + scan_param.allocator_ = &rtdef->stmt_allocator_; + scan_param.scan_allocator_ = &rtdef->scan_allocator_; + scan_param.sql_mode_ = rtdef->sql_mode_; + scan_param.frozen_version_ = rtdef->frozen_version_; + scan_param.force_refresh_lc_ = rtdef->force_refresh_lc_; + scan_param.output_exprs_ = &(ctdef->pd_expr_spec_.access_exprs_); + scan_param.calc_exprs_ = &(ctdef->pd_expr_spec_.calc_exprs_); + scan_param.aggregate_exprs_ = &(ctdef->pd_expr_spec_.pd_storage_aggregate_output_); + scan_param.table_param_ = &(ctdef->table_param_); + scan_param.op_ = rtdef->p_pd_expr_op_; + scan_param.row2exprs_projector_ = rtdef->p_row2exprs_projector_; + scan_param.schema_version_ = ctdef->schema_version_; + scan_param.tenant_schema_version_ = rtdef->tenant_schema_version_; + scan_param.limit_param_ = rtdef->limit_param_; + scan_param.need_scn_ = rtdef->need_scn_; + scan_param.pd_storage_flag_ = ctdef->pd_expr_spec_.pd_storage_flag_.pd_flag_; + scan_param.fb_snapshot_ = rtdef->fb_snapshot_; + scan_param.fb_read_tx_uncommitted_ = rtdef->fb_read_tx_uncommitted_; + scan_param.ls_id_ = retrieval_param_.get_ls_id(); + scan_param.tablet_id_ = retrieval_param_.get_doc_id_idx_tablet_id(); + if (ctdef->pd_expr_spec_.pushdown_filters_.empty()) { + scan_param.op_filters_ = &ctdef->pd_expr_spec_.pushdown_filters_; + } + scan_param.pd_storage_filters_ = rtdef->p_pd_expr_op_->pd_storage_filters_; + if (OB_NOT_NULL(tx_desc)) { + scan_param.tx_id_ = tx_desc->get_tx_id(); + } else { + scan_param.tx_id_.reset(); + } + + if (OB_NOT_NULL(snapshot)) { + scan_param.snapshot_ = *snapshot; + } else { ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected null rtdef", K(ret), KP_(inv_idx_agg_rtdef), KP_(fwd_idx_rtdef), KP_(doc_id_idx_whole_agg_rtdef)); - } else { - LST_DO_CODE(OB_UNIS_ENCODE, - *inv_idx_agg_rtdef_, - *doc_id_idx_whole_agg_rtdef_, - *fwd_idx_rtdef_); + LOG_ERROR("null snapshot", K(ret), KP(snapshot)); + } + + if (FAILEDx(scan_param.column_ids_.assign(ctdef->access_column_ids_))) { + LOG_WARN("failed to init column ids", K(ret)); } } return ret; } -OB_DEF_DESERIALIZE(ObDASIRRtDef) +int ObTextRetrievalMerge::do_total_doc_cnt() { int ret = OB_SUCCESS; - LST_DO_CODE(OB_UNIS_DECODE, - inv_idx_scan_rtdef_, - need_relevance_); - if (OB_SUCC(ret) && need_relevance_) { - if (OB_ISNULL(inv_idx_agg_rtdef_ = OB_NEWx(ObDASScanRtDef, &allocator_))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to allocate memory for rtdef", K(ret)); - } else if (OB_ISNULL(fwd_idx_rtdef_ = OB_NEWx(ObDASScanRtDef, &allocator_))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to allocate memory for rtdef", K(ret)); - } else if (OB_ISNULL(doc_id_idx_whole_agg_rtdef_ = OB_NEWx(ObDASScanRtDef, &allocator_))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to allocate memory for rtdef", K(ret)); + + if (retrieval_param_.get_ir_ctdef()->need_do_total_doc_cnt()) { + // When estimation info not exist, or we found estimation info not accurate, calculate document count by scan + ObITabletScan *tsc_service = MTL(ObAccessService *); + if (OB_ISNULL(tsc_service)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get table access service", K(ret)); + } else if (OB_FAIL(tsc_service->table_scan(whole_doc_agg_param_, whole_doc_cnt_iter_))) { + if (OB_SNAPSHOT_DISCARDED == ret && whole_doc_agg_param_.fb_snapshot_.is_valid()) { + ret = OB_INVALID_QUERY_TIMESTAMP; + } else if (OB_TRY_LOCK_ROW_CONFLICT != ret) { + LOG_WARN("failed to do table scan for document count aggregation", K(ret)); + } } else { - LST_DO_CODE(OB_UNIS_DECODE, - *inv_idx_agg_rtdef_, - *doc_id_idx_whole_agg_rtdef_, - *fwd_idx_rtdef_); + if (OB_UNLIKELY(!static_cast(whole_doc_agg_param_.pd_storage_flag_).is_aggregate_pushdown())) { + ret = OB_NOT_IMPLEMENT; + LOG_ERROR("aggregate without pushdown not implemented", K(ret)); + } else if (OB_FAIL(whole_doc_cnt_iter_->get_next_row())) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to get aggregated row from iter", K(ret)); + } + } + } + } else { + // use estimated document count for relevance estimation + // Need to note that when total doc count is under estimated too much, the IDF component in BM25 + // would be invalidate and result to token frequence have major influence on final relevance score + ObExpr *total_doc_cnt_expr = whole_doc_agg_param_.aggregate_exprs_->at(0); + if (OB_ISNULL(total_doc_cnt_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null total doc cnt expr", K(ret)); + } else { + ObDatum &total_doc_cnt = total_doc_cnt_expr->locate_datum_for_write(*retrieval_param_.get_ir_rtdef()->eval_ctx_); + total_doc_cnt.set_int(retrieval_param_.get_ir_ctdef()->estimated_total_doc_cnt_); + FLOG_INFO("[Salton] use estimated row count as partition document count", K(ret), K(total_doc_cnt)); + } + } + + return ret; +} + +void ObTextRetrievalMerge::clear_evaluated_infos() +{ + ObExpr *match_filter = retrieval_param_.get_ir_ctdef()->match_filter_; + ObEvalCtx *eval_ctx = retrieval_param_.get_ir_rtdef()->eval_ctx_; + if (nullptr != match_filter) { + if (match_filter->is_batch_result()) { + match_filter->get_evaluated_flags(*eval_ctx).unset(eval_ctx->get_batch_idx()); + } else { + match_filter->get_eval_info(*eval_ctx).clear_evaluated_flag(); + } + } +} + + + +ObTextRetrievalOp::ObTextRetrievalOp() + : common::ObNewRowIterator(ObNewRowIterator::IterType::ObTextRetrievalOp), + mem_context_(), + token_merge_(), + limit_param_(), + input_row_cnt_(0), + output_row_cnt_(0), + sort_impl_(nullptr), + sort_row_(), + sort_finished_(false), + is_inited_(false) +{ +} + +ObTextRetrievalOp::~ObTextRetrievalOp() +{ + reset(); + ObNewRowIterator::~ObNewRowIterator(); +} + +int ObTextRetrievalOp::init( + const share::ObLSID &ls_id, + const ObTabletID &inv_idx_tablet_id, + const ObTabletID &fwd_idx_tablet_id, + const ObTabletID &doc_id_idx_tablet_id, + const ObDASIRScanCtDef *ir_ctdef, + ObDASIRScanRtDef *ir_rtdef, + const ObDASSortCtDef *sort_ctdef, + ObDASSortRtDef *sort_rtdef, + transaction::ObTxDesc *tx_desc, + transaction::ObTxReadSnapshot *snapshot) +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("double initialization", K(ret)); + } else { + if (OB_ISNULL(mem_context_)) { + lib::ContextParam param; + param.set_mem_attr(MTL_ID(), "TextIROp", ObCtxIds::DEFAULT_CTX_ID); + if (OB_FAIL(CURRENT_CONTEXT->CREATE_CONTEXT(mem_context_, param))) { + LOG_WARN("failed to create text retrieval operator memory context", K(ret)); + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(token_merge_.init( + ls_id, + inv_idx_tablet_id, + fwd_idx_tablet_id, + doc_id_idx_tablet_id, + ir_ctdef, + ir_rtdef, + tx_desc, + snapshot, + mem_context_->get_arena_allocator()))) { + LOG_WARN("failed to init token merge", K(ret)); + } else if (OB_FAIL(init_limit(ir_ctdef, ir_rtdef, sort_ctdef, sort_rtdef))) { + LOG_WARN("failed to init limit", K(ret), KPC(ir_ctdef), KPC(ir_rtdef)); + } else if (nullptr != sort_ctdef && OB_FAIL(init_sort(ir_ctdef, sort_ctdef, sort_rtdef))) { + LOG_WARN("failed to init sort", K(ret), KPC(ir_ctdef), KPC(ir_rtdef)); + } else { + is_inited_ = true; + } + LOG_DEBUG("init text retrieval op", K(ret)); + } + + return ret; +} + +int ObTextRetrievalOp::rescan( + const share::ObLSID &ls_id, + const ObTabletID &inv_idx_tablet_id, + const ObTabletID &fwd_idx_tablet_id, + const ObTabletID &doc_id_idx_tablet_id, + const ObDASIRScanCtDef *ir_ctdef, + ObDASIRScanRtDef *ir_rtdef, + const ObDASSortCtDef *sort_ctdef, + ObDASSortRtDef *sort_rtdef, + transaction::ObTxDesc *tx_desc, + transaction::ObTxReadSnapshot *snapshot) +{ + int ret = OB_SUCCESS; + // TODO: opt rescan + reset(); + if (OB_FAIL(init(ls_id, + inv_idx_tablet_id, + fwd_idx_tablet_id, + doc_id_idx_tablet_id, + ir_ctdef, + ir_rtdef, + sort_ctdef, + sort_rtdef, + tx_desc, + snapshot))) { + LOG_WARN("failed to re init", K(ret)); + } + return ret; +} + +void ObTextRetrievalOp::reset() +{ + token_merge_.reset(); + if (nullptr != sort_impl_) { + sort_impl_->reset(); + sort_impl_->~ObSortOpImpl(); + sort_impl_ = nullptr; + } + if (nullptr != mem_context_) { + mem_context_->reset_remain_one_page(); + DESTROY_CONTEXT(mem_context_); + mem_context_ = nullptr; + } + sort_row_.reset(); + input_row_cnt_ = 0; + output_row_cnt_ = 0; + limit_param_.offset_ = 0; + limit_param_.limit_ = -1; + sort_finished_ = false; + is_inited_ = false; +} + +int ObTextRetrievalOp::get_next_row(ObNewRow *&row) +{ + int ret = OB_SUCCESS; + + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not inited", K(ret)); + } else { + if (limit_param_.limit_ > 0 && output_row_cnt_ >= limit_param_.limit_) { + ret = OB_ITER_END; + LOG_DEBUG("get row with limit finished", + K(ret), K_(limit_param), K_(output_row_cnt), K_(input_row_cnt)); + } + + bool got_valid_document = false; + while (OB_SUCC(ret) && !got_valid_document) { + if (OB_FAIL(inner_get_next_row_for_output())) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to get next row from token merge", K(ret)); + } + } else { + ++input_row_cnt_; + if (input_row_cnt_ > limit_param_.offset_) { + got_valid_document = true; + ++output_row_cnt_; + } + } + } + } + + return ret; +} + +int ObTextRetrievalOp::inner_get_next_row_for_output() +{ + int ret = OB_SUCCESS; + if (nullptr != sort_impl_) { + if (!sort_finished_ && OB_FAIL(do_sort())) { + LOG_WARN("failed to do sort", K(ret)); + } else if (OB_FAIL(sort_impl_->get_next_row(sort_row_))) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to get next row from sort op", K(ret)); + } + } + } else if (OB_FAIL(token_merge_.get_next_row())) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to get next row from token merge", K(ret)); + } + } + return ret; +} + +int ObTextRetrievalOp::get_next_rows(int64_t &count, int64_t capacity) +{ + // only one row at a time + // TODO: support batch vectorized execution later + int ret = OB_SUCCESS; + if (OB_FAIL(get_next_row())) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to get next row", K(ret)); + } + } else { + count += 1; + } + return ret; +} + +int ObTextRetrievalOp::init_sort( + const ObDASIRScanCtDef *ir_ctdef, + const ObDASSortCtDef *sort_ctdef, + ObDASSortRtDef *sort_rtdef) +{ + int ret = OB_SUCCESS; + const int64_t top_k_cnt = limit_param_.is_valid() ? (limit_param_.limit_ + limit_param_.offset_) : INT64_MAX; + if (OB_ISNULL(sort_ctdef) || OB_ISNULL(sort_rtdef) || OB_ISNULL(ir_ctdef)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null sort def", K(ret), KPC(sort_rtdef), KPC(sort_ctdef), KPC(ir_ctdef)); + } else if (OB_ISNULL(sort_impl_ = OB_NEWx(ObSortOpImpl, &mem_context_->get_arena_allocator()))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate sort op", K(ret)); + } else if (OB_FAIL(sort_impl_->init( + MTL_ID(), + &sort_ctdef->sort_collations_, + &sort_ctdef->sort_cmp_funcs_, + sort_rtdef->eval_ctx_, + &sort_rtdef->eval_ctx_->exec_ctx_, + false, /* enable encode sort key */ + false, /* local order */ + false, /* need rewind */ + 0, /* part_cnt */ + top_k_cnt, + sort_ctdef->fetch_with_ties_))) { + LOG_WARN("failed to init inner sort op", K(ret)); + } else if (OB_FAIL(append(sort_row_, sort_ctdef->sort_exprs_))) { + LOG_WARN("failed to append sort exprs", K(ret)); + } else { + for (int64_t i = 0; i < ir_ctdef->result_output_.count() && OB_SUCC(ret); ++i) { + ObExpr *expr = ir_ctdef->result_output_.at(i); + if (is_contain(sort_row_, expr)) { + // skip + } else if (OB_FAIL(sort_row_.push_back(expr))) { + LOG_WARN("failed to append sort rows", K(ret)); + } + } + } + return ret; +} + +int ObTextRetrievalOp::init_limit( + const ObDASIRScanCtDef *ir_ctdef, + ObDASIRScanRtDef *ir_rtdef, + const ObDASSortCtDef *sort_ctdef, + ObDASSortRtDef *sort_rtdef) +{ + int ret = OB_SUCCESS; + if (nullptr != sort_ctdef) { + // try init top-k limits + bool is_null = false; + if (OB_UNLIKELY((nullptr != sort_ctdef->limit_expr_ || nullptr != sort_ctdef->offset_expr_) + && ir_rtdef->get_inv_idx_scan_rtdef()->limit_param_.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected top k limit with table scan limit pushdown", K(ret), KPC(ir_ctdef), KPC(ir_rtdef)); + } else if (nullptr != sort_ctdef->limit_expr_) { + ObDatum *limit_datum = nullptr; + if (OB_FAIL(sort_ctdef->limit_expr_->eval(*sort_rtdef->eval_ctx_, limit_datum))) { + LOG_WARN("failed to eval limit expr", K(ret)); + } else if (limit_datum->is_null()) { + is_null = true; + limit_param_.limit_ = 0; + } else { + limit_param_.limit_ = limit_datum->get_int() < 0 ? 0 : limit_datum->get_int(); + } + } + + if (OB_SUCC(ret) && !is_null && nullptr != sort_ctdef->offset_expr_) { + ObDatum *offset_datum = nullptr; + if (OB_FAIL(sort_ctdef->offset_expr_->eval(*sort_rtdef->eval_ctx_, offset_datum))) { + LOG_WARN("failed to eval offset expr", K(ret)); + } else if (offset_datum->is_null()) { + limit_param_.offset_ = 0; + limit_param_.limit_ = 0; + } else { + limit_param_.offset_ = offset_datum->get_int() < 0 ? 0 : offset_datum->get_int(); + } + } + } else { + // init with table scan pushdown limit + limit_param_ = ir_rtdef->get_inv_idx_scan_rtdef()->limit_param_; + } + return ret; +} + +int ObTextRetrievalOp::do_sort() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(sort_finished_) || OB_ISNULL(sort_impl_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected sort status", K(ret), K_(sort_finished), K_(token_merge), KP_(sort_impl)); + } else { + while (OB_SUCC(ret)) { + if (OB_FAIL(token_merge_.get_next_row())) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to get next index row", K(ret)); + } + } else if (OB_FAIL(sort_impl_->add_row(sort_row_))) { + LOG_WARN("failed to add to to top k processor", K(ret)); + } + } + + if (OB_LIKELY(OB_ITER_END == ret)) { + ret = OB_SUCCESS; + if (OB_FAIL(sort_impl_->sort())) { + LOG_WARN("failed to do top-k sort", K(ret)); + } else { + sort_finished_ = true; + } } } return ret; diff --git a/src/sql/das/ob_text_retrieval_op.h b/src/sql/das/ob_text_retrieval_op.h index 7f8fe2b597..a1bc85aac7 100644 --- a/src/sql/das/ob_text_retrieval_op.h +++ b/src/sql/das/ob_text_retrieval_op.h @@ -13,94 +13,387 @@ #ifndef OBDEV_SRC_SQL_DAS_OB_TEXT_RETRIEVAL_OP_H_ #define OBDEV_SRC_SQL_DAS_OB_TEXT_RETRIEVAL_OP_H_ +#include "lib/container/ob_loser_tree.h" #include "sql/das/ob_das_task.h" #include "sql/das/ob_das_scan_op.h" +#include "sql/das/ob_das_attach_define.h" +#include "sql/engine/sort/ob_sort_op_impl.h" +#include "storage/fts/ob_text_retrieval_iterator.h" namespace oceanbase { namespace sql { -static const int64_t OB_MAX_TEXT_RETRIEVAL_TOKEN_CNT = 64; +static const int64_t OB_MAX_TEXT_RETRIEVAL_TOKEN_CNT = 256; -// Lots of meta data in ctdef / rtdef for inverted index / forward index scan are redundant here -// Should we make constant meta datas shared between iterators ? -struct ObDASIRCtDef +struct ObDASIRScanCtDef : ObDASAttachCtDef { OB_UNIS_VERSION(1); public: - ObDASIRCtDef(common::ObIAllocator &alloc) - : allocator_(alloc), + ObDASIRScanCtDef(common::ObIAllocator &alloc) + : ObDASAttachCtDef(alloc, DAS_OP_IR_SCAN), search_text_(nullptr), inv_scan_doc_id_col_(nullptr), - relevance_expr_(nullptr), + inv_scan_doc_length_col_(nullptr), match_filter_(nullptr), + relevance_expr_(nullptr), relevance_proj_col_(nullptr), - inv_idx_scan_ctdef_(alloc), - inv_idx_loc_meta_(alloc), - inv_idx_agg_ctdef_(nullptr), - doc_id_idx_whole_agg_ctdef_(nullptr), - fwd_idx_scan_ctdef_(nullptr), - fwd_idx_loc_meta_(nullptr), - need_relevance_(false) {} - virtual ~ObDASIRCtDef() {} - ObExpr *get_inv_scan_doc_id() const { return inv_scan_doc_id_col_; } - ObExpr *get_relevance_expr() const { return relevance_expr_; } - ObExpr *get_relevance_proj_expr() const { return relevance_proj_col_; } + estimated_total_doc_cnt_(0), + flags_(0) + { + } + bool need_calc_relevance() const { return nullptr != relevance_expr_; } bool need_proj_relevance_score() const { return nullptr != relevance_proj_col_; } - TO_STRING_KV( - KPC_(search_text), - KPC_(inv_scan_doc_id_col), - KPC_(relevance_expr), - KPC_(match_filter), - KPC_(relevance_proj_col), - K_(inv_idx_scan_ctdef), - K_(inv_idx_loc_meta), - KPC_(inv_idx_agg_ctdef), - KPC_(doc_id_idx_whole_agg_ctdef), - KPC_(fwd_idx_scan_ctdef), - KPC_(fwd_idx_loc_meta), - K_(need_relevance)); + const ObDASScanCtDef *get_inv_idx_scan_ctdef() const + { + const ObDASScanCtDef *idx_scan_ctdef = nullptr; + if (children_cnt_ > 0 && children_ != nullptr) { + idx_scan_ctdef = static_cast(children_[get_inv_scan_idx()]); + } + return idx_scan_ctdef; + } + const ObDASScanCtDef *get_inv_idx_agg_ctdef() const + { + const ObDASScanCtDef *idx_agg_ctdef = nullptr; + const int64_t ctdef_idx = get_inv_agg_idx(); + if (children_cnt_ > ctdef_idx && ctdef_idx > 0 && children_ != nullptr) { + const ObDASScanCtDef *child = static_cast(children_[ctdef_idx]); + if (child->ir_scan_type_ == ObTSCIRScanType::OB_IR_INV_IDX_AGG) { + idx_agg_ctdef = child; + } + } + return idx_agg_ctdef; + } + const ObDASScanCtDef *get_doc_id_idx_agg_ctdef() const + { + const ObDASScanCtDef *doc_id_idx_agg_ctdef = nullptr; + const int64_t ctdef_idx = get_doc_agg_idx(); + if (children_cnt_ > ctdef_idx && ctdef_idx > 0 && children_ != nullptr) { + const ObDASScanCtDef *child = static_cast(children_[ctdef_idx]); + if (child->ir_scan_type_ == ObTSCIRScanType::OB_IR_DOC_ID_IDX_AGG) { + doc_id_idx_agg_ctdef = child; + } + } + return doc_id_idx_agg_ctdef; + } + const ObDASScanCtDef *get_fwd_idx_agg_ctdef() const + { + const ObDASScanCtDef *fwd_idx_agg_ctdef = nullptr; + const int64_t ctdef_idx = get_fwd_agg_idx(); + if (children_cnt_ > ctdef_idx && ctdef_idx > 0 && children_ != nullptr) { + const ObDASScanCtDef *child = static_cast(children_[ctdef_idx]); + if (child->ir_scan_type_ == ObTSCIRScanType::OB_IR_FWD_IDX_AGG) { + fwd_idx_agg_ctdef = child; + } + } + return fwd_idx_agg_ctdef; + } + int64_t get_inv_scan_idx() const { return 0; } + int64_t get_inv_agg_idx() const { return has_inv_agg_ ? 1 : -1; } + int64_t get_doc_agg_idx() const { return has_doc_id_agg_ ? (1 + has_inv_agg_) : -1; } + int64_t get_fwd_agg_idx() const { return has_fwd_agg_ ? (1 + has_inv_agg_ + has_doc_id_agg_) : -1; } + bool need_do_total_doc_cnt() const { return 0 == estimated_total_doc_cnt_; } + + INHERIT_TO_STRING_KV("ObDASBaseCtDef", ObDASBaseCtDef, + K_(flags), + KPC_(search_text), + KPC_(inv_scan_doc_id_col), + KPC_(inv_scan_doc_length_col), + KPC_(match_filter), + KPC_(relevance_expr), + KPC_(relevance_proj_col), + K_(estimated_total_doc_cnt)); - common::ObIAllocator &allocator_; ObExpr *search_text_; ObExpr *inv_scan_doc_id_col_; - ObExpr *relevance_expr_; + ObExpr *inv_scan_doc_length_col_; ObExpr *match_filter_; + ObExpr *relevance_expr_; ObExpr *relevance_proj_col_; - ObDASScanCtDef inv_idx_scan_ctdef_; - ObDASTableLocMeta inv_idx_loc_meta_; - ObDASScanCtDef *inv_idx_agg_ctdef_; - ObDASScanCtDef *doc_id_idx_whole_agg_ctdef_; // aggregate on inverted index with whole range - ObDASScanCtDef *fwd_idx_scan_ctdef_; - ObDASTableLocMeta *fwd_idx_loc_meta_; - bool need_relevance_; + int64_t estimated_total_doc_cnt_; + union + { + uint8_t flags_; + struct + { + uint8_t has_inv_agg_:1; + uint8_t has_doc_id_agg_:1; + uint8_t has_fwd_agg_:1; + uint8_t reserved_:5; + }; + }; }; -struct ObDASIRRtDef +struct ObDASIRScanRtDef : ObDASAttachRtDef { OB_UNIS_VERSION(1); public: - ObDASIRRtDef(common::ObIAllocator &alloc) - : allocator_(alloc), - inv_idx_scan_rtdef_(), - inv_idx_agg_rtdef_(nullptr), - doc_id_idx_whole_agg_rtdef_(nullptr), - fwd_idx_rtdef_(nullptr), - eval_ctx_(nullptr), - need_relevance_(false) {} - virtual ~ObDASIRRtDef() {} + ObDASIRScanRtDef() + : ObDASAttachRtDef(DAS_OP_IR_SCAN) {} - TO_STRING_KV(K_(inv_idx_scan_rtdef), KPC_(inv_idx_agg_rtdef), KPC_(doc_id_idx_whole_agg_rtdef), - KPC_(fwd_idx_rtdef), KPC_(eval_ctx), K_(need_relevance)); - common::ObIAllocator &allocator_; - ObDASScanRtDef inv_idx_scan_rtdef_; - ObDASScanRtDef *inv_idx_agg_rtdef_; - ObDASScanRtDef *doc_id_idx_whole_agg_rtdef_; - ObDASScanRtDef *fwd_idx_rtdef_; - ObEvalCtx *eval_ctx_; - bool need_relevance_; + virtual ~ObDASIRScanRtDef() {} + + ObDASScanRtDef *get_inv_idx_scan_rtdef() + { + const ObDASIRScanCtDef *ctdef = static_cast(ctdef_); + const int64_t rtdef_idx = ctdef->get_inv_scan_idx(); + ObDASScanRtDef *idx_scan_rtdef = nullptr; + if (children_cnt_ > rtdef_idx && children_ != nullptr) { + idx_scan_rtdef = static_cast(children_[rtdef_idx]); + } + return idx_scan_rtdef; + } + ObDASScanRtDef *get_inv_idx_agg_rtdef() + { + const ObDASIRScanCtDef *ctdef = static_cast(ctdef_); + const int64_t rtdef_idx = ctdef->get_inv_agg_idx(); + ObDASScanRtDef *idx_agg_rtdef = nullptr; + if (children_cnt_ > rtdef_idx && rtdef_idx > 0 && children_ != nullptr) { + idx_agg_rtdef = static_cast(children_[rtdef_idx]); + } + return idx_agg_rtdef; + } + ObDASScanRtDef *get_doc_id_idx_agg_rtdef() + { + const ObDASIRScanCtDef *ctdef = static_cast(ctdef_); + const int64_t rtdef_idx = ctdef->get_doc_agg_idx(); + ObDASScanRtDef *doc_id_idx_agg_rtdef = nullptr; + if (children_cnt_ > rtdef_idx && rtdef_idx > 0 && children_ != nullptr) { + doc_id_idx_agg_rtdef = static_cast(children_[rtdef_idx]); + } + return doc_id_idx_agg_rtdef; + } + ObDASScanRtDef *get_fwd_idx_agg_rtdef() const + { + const ObDASIRScanCtDef *ctdef = static_cast(ctdef_); + const int64_t rtdef_idx = ctdef->get_fwd_agg_idx(); + ObDASScanRtDef *fwd_idx_agg_rtdef = nullptr; + if (children_cnt_ > rtdef_idx && rtdef_idx > 0 && children_ != nullptr) { + fwd_idx_agg_rtdef = static_cast(children_[rtdef_idx]); + } + return fwd_idx_agg_rtdef; + } }; +struct ObDASIRAuxLookupCtDef : ObDASAttachCtDef +{ + OB_UNIS_VERSION(1); +public: + ObDASIRAuxLookupCtDef(common::ObIAllocator &alloc) + : ObDASAttachCtDef(alloc, DAS_OP_IR_AUX_LOOKUP), + relevance_proj_col_(nullptr) + { } + + const ObDASBaseCtDef *get_doc_id_scan_ctdef() const + { + OB_ASSERT(2 == children_cnt_ && children_ != nullptr); + return children_[0]; + } + const ObDASScanCtDef *get_lookup_scan_ctdef() const + { + OB_ASSERT(children_cnt_ == 2 && children_ != nullptr); + return static_cast(children_[1]); + } + + ObExpr *relevance_proj_col_; +}; + +struct ObDASIRAuxLookupRtDef : ObDASAttachRtDef +{ + OB_UNIS_VERSION(1); +public: + ObDASIRAuxLookupRtDef() + : ObDASAttachRtDef(DAS_OP_IR_AUX_LOOKUP) + {} + + virtual ~ObDASIRAuxLookupRtDef() {} + + ObDASBaseRtDef *get_doc_id_scan_rtdef() + { + OB_ASSERT(2 == children_cnt_ && children_ != nullptr); + return children_[0]; + } + ObDASScanRtDef *get_lookup_scan_rtdef() + { + OB_ASSERT(children_cnt_ == 2 && children_ != nullptr); + return static_cast(children_[1]); + } +}; + +struct ObIRIterLoserTreeItem +{ + ObIRIterLoserTreeItem(); + ~ObIRIterLoserTreeItem() = default; + + TO_STRING_KV(K_(iter_idx), K_(relevance), K_(doc_id), K(doc_id_.get_string())); + + double relevance_; + ObDocId doc_id_; + int64_t iter_idx_; +}; + +struct ObIRIterLoserTreeCmp +{ + ObIRIterLoserTreeCmp(); + virtual ~ObIRIterLoserTreeCmp(); + + int init(); + int cmp(const ObIRIterLoserTreeItem &l, const ObIRIterLoserTreeItem &r, int64_t &cmp_ret); +private: + common::ObDatumCmpFuncType cmp_func_; + bool is_inited_; +}; + +typedef common::ObLoserTree ObIRIterLoserTree; + +class ObTextRetrievalMerge : public common::ObNewRowIterator +{ +public: + enum TokenRelationType + { + DISJUNCTIVE = 0, + // CONJUNCTIVE = 1, + // BOOLEAN = 2, + MAX_RELATION_TYPE + }; + enum RetrievalProcType + { + DAAT = 0, + // TAAT = 1, + // VAAT = 2, + MAX_PROC_TYPE + }; +public: + ObTextRetrievalMerge(); + virtual ~ObTextRetrievalMerge(); + + int init( + const share::ObLSID &ls_id, + const ObTabletID &inv_idx_tablet_id, + const ObTabletID &fwd_idx_tablet_id, + const ObTabletID &doc_id_idx_tablet_id, + const ObDASIRScanCtDef *ir_ctdef, + ObDASIRScanRtDef *ir_rtdef, + transaction::ObTxDesc *tx_desc, + transaction::ObTxReadSnapshot *snapshot, + ObIAllocator &allocator); + int rescan( + const share::ObLSID &ls_id, + const ObTabletID &inv_idx_tablet_id, + const ObTabletID &fwd_idx_tablet_id, + const ObTabletID &doc_id_idx_tablet_id, + const ObDASIRScanCtDef *ir_ctdef, + ObDASIRScanRtDef *ir_rtdef, + transaction::ObTxDesc *tx_desc, + transaction::ObTxReadSnapshot *snapshot, + ObIAllocator &allocator); + + virtual int get_next_row(ObNewRow *&row) override; + virtual int get_next_row() override { ObNewRow *r = nullptr; return get_next_row(r); } + virtual int get_next_rows(int64_t &count, int64_t capacity) override; + virtual void reset() override; +private: + int init_iter_params( + const share::ObLSID &ls_id, + const ObTabletID &inv_idx_tablet_id, + const ObTabletID &fwd_idx_tablet_id, + const ObTabletID &doc_id_idx_tablet_id, + const ObDASIRScanCtDef *ir_ctdef, + ObDASIRScanRtDef *ir_rtdef); + int init_iters( + transaction::ObTxDesc *tx_desc, + transaction::ObTxReadSnapshot *snapshot, + const ObIArray &query_tokens); + int init_query_tokens(const ObDASIRScanCtDef *ir_ctdef, ObDASIRScanRtDef *ir_rtdef); + void release_iters(); + int pull_next_batch_rows(); + int fill_loser_tree_item( + storage::ObTextRetrievalIterator &iter, + const int64_t iter_idx, + ObIRIterLoserTreeItem &item); + int next_disjunctive_document(); + int init_total_doc_cnt_param(transaction::ObTxDesc *tx_desc, transaction::ObTxReadSnapshot *snapshot); + int do_total_doc_cnt(); + int project_result(const ObIRIterLoserTreeItem &item, const double relevance); + void clear_evaluated_infos(); +private: + static const int64_t OB_DEFAULT_QUERY_TOKEN_ITER_CNT = 4; + typedef ObSEArray ObTokenRetrievalIterArray; + TokenRelationType relation_type_; + RetrievalProcType processing_type_; + ObIAllocator *allocator_; + ObTokenRetrievalParam retrieval_param_; + ObArray query_tokens_; + ObTokenRetrievalIterArray token_iters_; + ObIRIterLoserTreeCmp loser_tree_cmp_; + ObIRIterLoserTree *iter_row_heap_; + ObFixedArray next_batch_iter_idxes_; + int64_t next_batch_cnt_; + common::ObNewRowIterator *whole_doc_cnt_iter_; + ObTableScanParam whole_doc_agg_param_; + bool doc_cnt_calculated_; + bool is_inited_; +}; + + +class ObTextRetrievalOp : public common::ObNewRowIterator +{ +public: + ObTextRetrievalOp(); + virtual ~ObTextRetrievalOp(); + + int init( + const share::ObLSID &ls_id, + const ObTabletID &inv_idx_tablet_id, + const ObTabletID &fwd_idx_tablet_id, + const ObTabletID &doc_id_idx_tablet_id, + const ObDASIRScanCtDef *ir_ctdef, + ObDASIRScanRtDef *ir_rtdef, + const ObDASSortCtDef *sort_ctdef, + ObDASSortRtDef *sort_rtdef, + transaction::ObTxDesc *tx_desc, + transaction::ObTxReadSnapshot *snapshot); + int rescan( + const share::ObLSID &ls_id, + const ObTabletID &inv_idx_tablet_id, + const ObTabletID &fwd_idx_tablet_id, + const ObTabletID &doc_id_idx_tablet_id, + const ObDASIRScanCtDef *ir_ctdef, + ObDASIRScanRtDef *ir_rtdef, + const ObDASSortCtDef *sort_ctdef, + ObDASSortRtDef *sort_rtdef, + transaction::ObTxDesc *tx_desc, + transaction::ObTxReadSnapshot *snapshot); + + virtual int get_next_row(ObNewRow *&row) override; + virtual int get_next_row() override { ObNewRow *r = nullptr; return get_next_row(r); } + virtual int get_next_rows(int64_t &count, int64_t capacity) override; + virtual void reset() override; +private: + int inner_get_next_row_for_output(); + int init_sort( + const ObDASIRScanCtDef *ir_ctdef, + const ObDASSortCtDef *sort_ctdef, + ObDASSortRtDef *sort_rtdef); + int init_limit( + const ObDASIRScanCtDef *ir_ctdef, + ObDASIRScanRtDef *ir_rtdef, + const ObDASSortCtDef *sort_ctdef, + ObDASSortRtDef *sort_rtdef); + int do_sort(); +private: + lib::MemoryContext mem_context_; + ObTextRetrievalMerge token_merge_; + common::ObLimitParam limit_param_; + int64_t input_row_cnt_; + int64_t output_row_cnt_; + ObSortOpImpl *sort_impl_; + ObSEArray sort_row_; + bool sort_finished_; + bool is_inited_; +}; + + } // namespace sql } // namespace oceanbase diff --git a/src/sql/engine/basic/ob_group_join_buffer.cpp b/src/sql/engine/basic/ob_group_join_buffer.cpp index 91f2d17149..c99a06dbe3 100644 --- a/src/sql/engine/basic/ob_group_join_buffer.cpp +++ b/src/sql/engine/basic/ob_group_join_buffer.cpp @@ -113,10 +113,10 @@ ObGroupJoinBufffer::ObGroupJoinBufffer() above_left_group_params_(), above_right_group_params_(), group_params_(), above_group_params_(), last_row_(), last_batch_(), - right_cnt_(0), cur_group_idx_(0), left_store_read_(0), + right_cnt_(0), cur_group_idx_(-1), left_store_read_(0), above_group_idx_for_expand_(0), above_group_idx_for_read_(0), above_group_size_(0), max_group_size_(0), - group_scan_size_(0), flags_(0) + group_scan_size_(0), group_rescan_cnt_(0), rescan_params_info_(), flags_(0) { need_check_above_ = true; } @@ -298,9 +298,6 @@ int ObGroupJoinBufffer::fill_cur_row_group_param() } } } - if (OB_SUCC(ret)) { - cur_group_idx_++; - } return ret; } @@ -463,7 +460,13 @@ int ObGroupJoinBufffer::rescan_right() if (skip_rescan_right_) { skip_rescan_right_ = false; } else { + cur_group_idx_++; + if (OB_FAIL(fill_cur_row_group_param())) { + LOG_WARN("failed to fill cur row group param"); + } for (int64_t i = 0; OB_SUCC(ret) && i < right_cnt_; i++) { + GroupParamBackupGuard guard(right_[i].get_exec_ctx().get_das_ctx()); + guard.bind_batch_rescan_params(cur_group_idx_, group_rescan_cnt_, &rescan_params_info_); int cur_ret = right_[i].rescan(); if (OB_SUCC(cur_ret) || OB_ITER_END == cur_ret) { if (0 == i) { @@ -507,6 +510,7 @@ int ObGroupJoinBufffer::fill_group_buffer() } if (OB_SUCC(ret)) { reset_buffer_state(); + group_rescan_cnt_++; if (OB_FAIL(last_row_.init( mem_context_->get_malloc_allocator(), left_->get_spec().output_.count()))) { LOG_WARN("failed to init right last row", KR(ret)); @@ -557,8 +561,6 @@ int ObGroupJoinBufffer::fill_group_buffer() LOG_WARN("finish add row to row store failed", KR(ret)); } else if (OB_FAIL(left_store_.begin(left_store_iter_))) { LOG_WARN("begin iterator for chunk row store failed", KR(ret)); - } else if (OB_FAIL(bind_group_params_to_store())) { - LOG_WARN("bind group params to store failed", KR(ret)); } else if (OB_FAIL(rescan_right())) { ret = (OB_ITER_END == ret) ? OB_ERR_UNEXPECTED : ret; LOG_WARN("rescan right failed", KR(ret)); @@ -600,11 +602,13 @@ int ObGroupJoinBufffer::batch_fill_group_buffer(const int64_t max_row_cnt, } batch_rows = &left_->get_brs(); reset_buffer_state(); + group_rescan_cnt_++; while (OB_SUCC(ret) && !is_full() && !batch_rows->end_) { op_->clear_evaluated_flag(); if (!rescan_params_->empty()) { op_->set_pushdown_param_null(*rescan_params_); } + DASGroupScanMarkGuard mark_guard(ctx_->get_das_ctx(), true); if (OB_FAIL(left_->get_next_batch(max_row_cnt, batch_rows))) { LOG_WARN("get next batch from left failed", KR(ret)); } @@ -658,8 +662,6 @@ int ObGroupJoinBufffer::batch_fill_group_buffer(const int64_t max_row_cnt, LOG_WARN("finish add row to row store failed", KR(ret)); } else if (OB_FAIL(left_store_.begin(left_store_iter_))) { LOG_WARN("begin iterator for chunk row store failed", KR(ret)); - } else if (OB_FAIL(bind_group_params_to_store())) { - LOG_WARN("bind group params to store failed", KR(ret)); } else if (OB_FAIL(rescan_right())) { ret = (OB_ITER_END == ret) ? OB_ERR_UNEXPECTED : ret; LOG_WARN("rescan right failed", KR(ret)); @@ -831,6 +833,24 @@ int ObGroupJoinBufffer::init_group_params() } } } + + // collect batch nlj params needed by rescan right op + if (OB_FAIL(ret) || (group_params_.empty())) { + // do nothing + } else if (rescan_params_info_.empty()) { // only perform once + int64_t rescan_params_info_cnt = group_params_.count(); + if (OB_FAIL(rescan_params_info_.allocate_array(ctx_->get_allocator(),rescan_params_info_cnt))) { + LOG_WARN("failed to allocate group param info", K(ret), K(rescan_params_info_cnt)); + } else { + // collect rescan params of current nlj op + int64_t j = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < group_params_.count() && j < rescan_params_info_.count(); ++i, ++j) { + int64_t param_idx = rescan_params_->at(i).param_idx_; + rescan_params_info_.at(j).param_idx_ = param_idx; + rescan_params_info_.at(j).gr_param_ = &group_params_.at(i); + } + } + } return ret; } @@ -880,39 +900,6 @@ int ObGroupJoinBufffer::deep_copy_dynamic_obj() return ret; } -int ObGroupJoinBufffer::bind_group_params_to_store() -{ - int ret = OB_SUCCESS; - int64_t param_cnt = rescan_params_->count(); - ParamStore ¶m_store = GET_PHY_PLAN_CTX(*ctx_)->get_param_store_for_update(); - if (OB_UNLIKELY(param_cnt != group_params_.count())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("param count is invalid", KR(ret), K(param_cnt), K(group_params_.count())); - } - for (int64_t i = 0; OB_SUCC(ret) && i < param_cnt; i++) { - const ObDynamicParamSetter &rescan_param = rescan_params_->at(i); - int64_t param_idx = rescan_param.param_idx_; - int64_t array_obj_addr = reinterpret_cast(&group_params_.at(i)); - param_store.at(param_idx).set_extend(array_obj_addr, T_EXT_SQL_ARRAY); - } - if (OB_FAIL(ret)) { - // do nothing - } else if (is_multi_level_) { - if (OB_UNLIKELY(above_group_params_.count() != right_rescan_params_->count())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("param counts do not match", KR(ret), - K(above_group_params_.count()), - K(right_rescan_params_->count())); - } - for (int64_t i = 0; OB_SUCC(ret) && i < right_rescan_params_->count(); i++) { - int64_t param_idx = right_rescan_params_->at(i).param_idx_; - int64_t array_obj_addr = reinterpret_cast(&above_group_params_.at(i)); - param_store.at(param_idx).set_extend(array_obj_addr, T_EXT_SQL_ARRAY); - } - } - return ret; -} - int ObGroupJoinBufffer::prepare_rescan_params() { int ret = OB_SUCCESS; @@ -943,24 +930,39 @@ int ObGroupJoinBufffer::build_above_group_params( int ret = OB_SUCCESS; group_size = 0; ObPhysicalPlanCtx *plan_ctx = GET_PHY_PLAN_CTX(*ctx_); - for (int64_t i = 0; OB_SUCC(ret) && i < above_rescan_params.count(); i++) { - int64_t param_idx = above_rescan_params.at(i).param_idx_; - const ObObjParam &obj_param = plan_ctx->get_param_store().at(param_idx); - ObSqlArrayObj *array_obj = NULL; - if (obj_param.is_ext_sql_array()) { - array_obj = reinterpret_cast(obj_param.get_ext()); - if (0 == group_size) { - group_size = array_obj->count_; - } else if (OB_UNLIKELY(group_size != array_obj->count_)) { + const GroupParamArray* group_params_above = nullptr; + if (OB_ISNULL(group_params_above = ctx_->get_das_ctx().get_group_params())) { + // the above operator of this nlj don't use batch rescan, do nothing + } else if (OB_ISNULL(ctx_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("exec ctx is nullptr", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < above_rescan_params.count(); i++) { + int64_t param_idx = above_rescan_params.at(i).param_idx_; + ObSqlArrayObj *array_obj = NULL; + uint64_t array_idx = OB_INVALID_ID; + bool exist = false; + if (OB_FAIL(ctx_->get_das_ctx().find_group_param_by_param_idx(param_idx, exist, array_idx))) { + LOG_WARN("failed to find group param by param idx", K(ret), K(i), K(param_idx)); + } else if (!exist || array_idx == OB_INVALID_ID || array_idx > group_params_above->count()) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("group sizes do not match", KR(ret), - K(group_size), K(array_obj->count_)); + LOG_WARN("failed to find group param", K(ret), K(exist), K(i), K(array_idx)); + } else { + const GroupRescanParam &group_param = group_params_above->at(array_idx); + array_obj = group_param.gr_param_; + if (0 == group_size) { + group_size = array_obj->count_; + } else if (OB_UNLIKELY(group_size != array_obj->count_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("group sizes do not match", KR(ret), + K(group_size), K(array_obj->count_)); + } + } + if (OB_FAIL(ret)) { + // do nothing + } else if (OB_FAIL(above_group_params.push_back(array_obj))) { + LOG_WARN("push array obj failed", KR(ret), K(i), KP(array_obj)); } - } - if (OB_FAIL(ret)) { - // do nothing - } else if (OB_FAIL(above_group_params.push_back(array_obj))) { - LOG_WARN("push array obj failed", KR(ret), K(i), KP(array_obj)); } } return ret; @@ -988,7 +990,7 @@ int ObGroupJoinBufffer::set_above_group_size() { void ObGroupJoinBufffer::reset_buffer_state() { - cur_group_idx_ = 0; + cur_group_idx_ = -1; left_store_read_ = 0; left_store_iter_.reset(); left_store_.reset(); @@ -1050,5 +1052,38 @@ int ObGroupJoinBufffer::restore_above_params(common::ObIArray &left_ } return ret; } + +int ObGroupJoinBufffer::get_next_batch_from_right(int64_t max_batch_size, const ObBatchRows *brs) +{ + int ret = OB_SUCCESS; + if (right_cnt_ == 1) { + GroupParamBackupGuard guard(right_[0].get_exec_ctx().get_das_ctx()); + guard.bind_batch_rescan_params(cur_group_idx_, group_rescan_cnt_, &rescan_params_info_); + if (OB_FAIL(right_[0].get_next_batch(max_batch_size, brs))) { + LOG_WARN("failed to get next batch from right op in batch NLJ", K(ret)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("the right child cnt of NLJ is not 1", K(ret)); + } + return ret; +} + +int ObGroupJoinBufffer::get_next_row_from_right() +{ + int ret = OB_SUCCESS; + if (right_cnt_ == 1) { + GroupParamBackupGuard guard(right_[0].get_exec_ctx().get_das_ctx()); + guard.bind_batch_rescan_params(cur_group_idx_, group_rescan_cnt_, &rescan_params_info_); + if (OB_FAIL(right_[0].get_next_row())) { + LOG_WARN("failed to get next row from right op in batch NLJ", K(ret)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("the right child cnt of NLJ is not 1", K(ret)); + } + return ret; +} + } // end namespace sql } // end namespace oceanbase diff --git a/src/sql/engine/basic/ob_group_join_buffer.h b/src/sql/engine/basic/ob_group_join_buffer.h index 2fdd6d8218..af88596b50 100644 --- a/src/sql/engine/basic/ob_group_join_buffer.h +++ b/src/sql/engine/basic/ob_group_join_buffer.h @@ -15,6 +15,7 @@ #include "sql/engine/basic/ob_chunk_datum_store.h" #include "sql/engine/ob_operator.h" +#include "sql/das/ob_das_context.h" namespace oceanbase { @@ -86,10 +87,11 @@ public: int get_next_batch_from_store(int64_t max_rows, int64_t &read_rows); ObBatchRowDatums &get_last_batch() { return last_batch_; } void destroy(); + int get_next_row_from_right(); + int get_next_batch_from_right(int64_t max_batch_size, const ObBatchRows *brs); private: int init_group_params(); int deep_copy_dynamic_obj(); - int bind_group_params_to_store(); int prepare_rescan_params(); int get_next_left_iter(); int add_row_to_store(); @@ -153,6 +155,8 @@ private: int64_t above_group_size_; int64_t max_group_size_; int64_t group_scan_size_; + int64_t group_rescan_cnt_; + GroupParamArray rescan_params_info_; union { uint64_t flags_; struct { diff --git a/src/sql/engine/cmd/ob_table_executor.cpp b/src/sql/engine/cmd/ob_table_executor.cpp index 610aa40b5e..b11e43bd9d 100644 --- a/src/sql/engine/cmd/ob_table_executor.cpp +++ b/src/sql/engine/cmd/ob_table_executor.cpp @@ -853,8 +853,11 @@ int ObAlterTableExecutor::alter_table_rpc_v2( } else if (OB_ISNULL(create_index_arg = static_cast(index_arg))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("create index arg is null", KR(ret), K(i)); - } else if (INDEX_TYPE_PRIMARY == create_index_arg->index_type_) { - // do nothing + } else if (INDEX_TYPE_PRIMARY == create_index_arg->index_type_ || + is_fts_index(create_index_arg->index_type_) || + is_multivalue_index(create_index_arg->index_type_)) { + // TODO hanxuan temporary bypass, since res.res_arg_array_ is empty + // TODO yunyi temporary bypass, since res.res_arg_array_ is empty } else if (!is_sync_ddl_user) { // 只考虑非备份恢复时的索引同步检查 create_index_arg->index_schema_.set_table_id(res.res_arg_array_.at(i).schema_id_); diff --git a/src/sql/engine/dml/ob_dml_service.cpp b/src/sql/engine/dml/ob_dml_service.cpp index 71f2eb86e2..b7144aadbc 100644 --- a/src/sql/engine/dml/ob_dml_service.cpp +++ b/src/sql/engine/dml/ob_dml_service.cpp @@ -1232,6 +1232,7 @@ int ObDMLService::init_das_dml_rtdef(ObDMLRtCtx &dml_rtctx, ObDASCtx &das_ctx = dml_rtctx.get_exec_ctx().get_das_ctx(); uint64_t table_loc_id = das_ctdef.table_id_; uint64_t ref_table_id = das_ctdef.index_tid_; + das_rtdef.ctdef_ = &das_ctdef; das_rtdef.timeout_ts_ = plan_ctx->get_ps_timeout_timestamp(); das_rtdef.prelock_ = my_session->get_prelock(); das_rtdef.tenant_schema_version_ = plan_ctx->get_tenant_schema_version(); @@ -2023,7 +2024,7 @@ int ObDMLService::check_local_index_affected_rows(int64_t table_affected_rows, int ret = OB_SUCCESS; if (GCONF.enable_defensive_check()) { if (table_affected_rows != index_affected_rows - && !related_ctdef.table_param_.get_data_table().is_spatial_index() + && !related_ctdef.table_param_.get_data_table().is_domain_index() && !related_ctdef.table_param_.get_data_table().is_mlog_table()) { ret = OB_ERR_DEFENSIVE_CHECK; ObString func_name = ObString::make_string("check_local_index_affected_rows"); diff --git a/src/sql/engine/expr/ob_expr_autoinc_nextval.cpp b/src/sql/engine/expr/ob_expr_autoinc_nextval.cpp index 6751748d49..09a469cfd4 100644 --- a/src/sql/engine/expr/ob_expr_autoinc_nextval.cpp +++ b/src/sql/engine/expr/ob_expr_autoinc_nextval.cpp @@ -37,6 +37,27 @@ ObExprAutoincNextval::ObExprAutoincNextval(ObIAllocator &alloc) } +ObExprAutoincNextval::ObExprAutoincNextval( + common::ObIAllocator &alloc, + ObExprOperatorType type, + const char *name, + int32_t param_num, + ObValidForGeneratedColFlag valid_for_generated_col, + int32_t dimension, + bool is_internal_for_mysql/* = false */, + bool is_internal_for_oracle/* = false */) + : ObFuncExprOperator(alloc, + type, + name, + param_num, + valid_for_generated_col, + dimension, + is_internal_for_mysql, + is_internal_for_oracle) +{ + disable_operand_auto_cast(); +} + ObExprAutoincNextval::~ObExprAutoincNextval() { } @@ -392,7 +413,7 @@ int ObExprAutoincNextval::eval_nextval( // this column with column_index is auto-increment column if (OB_ISNULL(autoinc_param)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("should find auto-increment param", K(ret)); + LOG_WARN("should find auto-increment param", K(ret), K(autoinc_table_id), K(autoinc_col_id), K(autoinc_params)); } // sync last user specified value first(compatible with MySQL) diff --git a/src/sql/engine/expr/ob_expr_autoinc_nextval.h b/src/sql/engine/expr/ob_expr_autoinc_nextval.h index aadc947cc4..9ed7e3c250 100644 --- a/src/sql/engine/expr/ob_expr_autoinc_nextval.h +++ b/src/sql/engine/expr/ob_expr_autoinc_nextval.h @@ -29,6 +29,15 @@ class ObExprAutoincNextval : public ObFuncExprOperator OB_UNIS_VERSION_V(1); public: explicit ObExprAutoincNextval(common::ObIAllocator &alloc); + ObExprAutoincNextval( + common::ObIAllocator &alloc, + ObExprOperatorType type, + const char *name, + int32_t param_num, + ObValidForGeneratedColFlag valid_for_generated_col, + int32_t dimension, + bool is_internal_for_mysql = false, + bool is_internal_for_oracle = false); virtual ~ObExprAutoincNextval(); virtual int calc_result_typeN(ObExprResType &type, diff --git a/src/sql/engine/expr/ob_expr_bm25.cpp b/src/sql/engine/expr/ob_expr_bm25.cpp new file mode 100644 index 0000000000..3d9e827ce9 --- /dev/null +++ b/src/sql/engine/expr/ob_expr_bm25.cpp @@ -0,0 +1,116 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_ENG +#include +#include "sql/engine/expr/ob_expr_bm25.h" +#include "sql/resolver/expr/ob_raw_expr.h" + +namespace oceanbase +{ +namespace sql +{ +ObExprBM25::ObExprBM25(ObIAllocator &alloc) + : ObFuncExprOperator(alloc, T_FUN_SYS_BM25, N_BM25, 4, VALID_FOR_GENERATED_COL, NOT_ROW_DIMENSION) +{ +} + +int ObExprBM25::calc_result_typeN( + ObExprResType &result_type, + ObExprResType *types, + int64_t param_num, + common::ObExprTypeCtx &type_ctx) const +{ + int ret = OB_SUCCESS; + UNUSED(type_ctx); + if (OB_UNLIKELY(param_num != 5)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("BM25 expr should have 4 parameters", K(ret), K(param_num)); + } else { + types[TOKEN_DOC_CNT_PARAM_IDX].set_calc_type(ObIntType); + types[TOTAL_DOC_CNT_PARAM_IDX].set_calc_type(ObIntType); + types[DOC_TOKEN_CNT_PARAM_IDX].set_calc_type(ObIntType); + types[AVG_DOC_CNT_PARAM_IDX].set_calc_type(ObDoubleType); + types[RELATED_TOKEN_CNT_PARAM_IDX].set_calc_type(ObUInt64Type); + result_type.set_double(); + } + return ret; +} + +int ObExprBM25::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const +{ + int ret = OB_SUCCESS; + UNUSED(expr_cg_ctx); + CK(5 == raw_expr.get_param_count()); + rt_expr.eval_func_ = eval_bm25_relevance_expr; + return ret; +} + +int ObExprBM25::eval_bm25_relevance_expr(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum) +{ + int ret = OB_SUCCESS; + ObDatum *token_doc_cnt_datum = nullptr; + ObDatum *total_doc_cnt_datum = nullptr; + ObDatum *doc_token_cnt_datum = nullptr; + ObDatum *avg_doc_token_cnt_datum = nullptr; + ObDatum *related_token_cnt_datum = nullptr; + if (OB_FAIL(expr.eval_param_value( + ctx, + token_doc_cnt_datum, + total_doc_cnt_datum, + doc_token_cnt_datum, + avg_doc_token_cnt_datum, + related_token_cnt_datum))) { + LOG_WARN("evaluate parameter value failed", K(ret)); + } else if (OB_UNLIKELY(token_doc_cnt_datum->is_null() || total_doc_cnt_datum->is_null() + || doc_token_cnt_datum->is_null() || avg_doc_token_cnt_datum->is_null() || related_token_cnt_datum->is_null())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null datum", K(ret), KPC(token_doc_cnt_datum), KPC(total_doc_cnt_datum), + KPC(doc_token_cnt_datum), KPC(avg_doc_token_cnt_datum), KPC(related_token_cnt_datum)); + } else { + const int64_t token_doc_cnt = token_doc_cnt_datum->get_int(); + const int64_t total_doc_cnt = total_doc_cnt_datum->get_int(); + const int64_t related_token_cnt = related_token_cnt_datum->get_uint(); + const int64_t doc_token_cnt = doc_token_cnt_datum->get_int(); + const double avg_doc_token_cnt = avg_doc_token_cnt_datum->get_double(); + const double norm_len = doc_token_cnt / avg_doc_token_cnt; + const double token_weight = query_token_weight(token_doc_cnt, total_doc_cnt); + const double doc_weight = doc_token_weight(related_token_cnt, norm_len); + const double relevance = token_weight * doc_weight; + res_datum.set_double(relevance); + LOG_DEBUG("show bm25 parameters for current document", + K(token_doc_cnt), K(total_doc_cnt), K(related_token_cnt), K(doc_token_cnt), K(avg_doc_token_cnt), + K(norm_len), K(token_weight), K(doc_weight), K(relevance)); + } + return ret; +} + +double ObExprBM25::doc_token_weight(const int64_t token_freq, const double norm_len) +{ + const double tf = static_cast(token_freq); + return tf / (tf + p_k1 * (1.0 - p_b + p_b * norm_len)); +} + +double ObExprBM25::query_token_weight(const int64_t doc_freq, const int64_t doc_cnt) +{ + const double df = static_cast(doc_freq); + const double len = static_cast(doc_cnt); + // Since we might use approximate count statistic for total doc cnt, possibilities there are + // document frequencies larger than total doc cnt + const double diff = (len - df) > 0 ? (len - df) : 0; + const double idf = std::log((diff + 0.5) / (df + 0.5)); + return MAX(p_epsilon, idf) * (1.0 + p_k1); +} + + +} // namespace sql +} // namespace oceanbase diff --git a/src/sql/engine/expr/ob_expr_bm25.h b/src/sql/engine/expr/ob_expr_bm25.h new file mode 100644 index 0000000000..01e6bb9957 --- /dev/null +++ b/src/sql/engine/expr/ob_expr_bm25.h @@ -0,0 +1,70 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef _OB_EXPR_BM25_H_ +#define _OB_EXPR_BM25_H_ + +#include "sql/engine/expr/ob_expr_operator.h" + +namespace oceanbase +{ +namespace sql +{ + +/** + * An implementation of Okapi BM25 relevance estimation / ranking algorithm + * + * Params: + * token document count: count of documents contains query token + * total document count: count of all documents in retrieval domain + * document token count: count of tokens in specific document + * average document token count: average count of tokens in document in retrieval domain + * related token count: count of query token in specific document + * + * p_k1, p_b, p_epsilon: parameters to tune bm25 score, hard coded for now. + */ +class ObExprBM25 : public ObFuncExprOperator +{ +public: + explicit ObExprBM25(common::ObIAllocator &alloc); + virtual ~ObExprBM25() {} + + virtual int calc_result_typeN( + ObExprResType &result_type, + ObExprResType *types, + int64_t param_num, + common::ObExprTypeCtx &type_ctx) const override; + virtual int cg_expr( + ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const override; + + static int eval_bm25_relevance_expr(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum); +public: + static constexpr int TOKEN_DOC_CNT_PARAM_IDX = 0; + static constexpr int TOTAL_DOC_CNT_PARAM_IDX = 1; + static constexpr int DOC_TOKEN_CNT_PARAM_IDX = 2; + static constexpr int AVG_DOC_CNT_PARAM_IDX = 3; + static constexpr int RELATED_TOKEN_CNT_PARAM_IDX = 4; +private: + static double doc_token_weight(const int64_t token_freq, const double norm_len); + static double query_token_weight(const int64_t doc_freq, const int64_t doc_cnt); + static constexpr double p_k1 = 1.2; + static constexpr double p_b = 0.75; + static constexpr double p_epsilon = 0.25; + DISALLOW_COPY_AND_ASSIGN(ObExprBM25); +}; + +} // namespace sql +} // namespace oceanbase + +#endif \ No newline at end of file diff --git a/src/sql/engine/expr/ob_expr_doc_id.cpp b/src/sql/engine/expr/ob_expr_doc_id.cpp new file mode 100644 index 0000000000..1038ce811e --- /dev/null +++ b/src/sql/engine/expr/ob_expr_doc_id.cpp @@ -0,0 +1,102 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE_FTS + +#include "sql/engine/expr/ob_expr_doc_id.h" +#include "sql/engine/ob_exec_context.h" +#include "share/ob_tablet_autoincrement_service.h" + +namespace oceanbase +{ +using namespace common; +namespace sql +{ +ObExprDocID::ObExprDocID(ObIAllocator &allocator) + : ObFuncExprOperator(allocator, T_FUN_SYS_DOC_ID, N_DOC_ID, ZERO_OR_ONE, VALID_FOR_GENERATED_COL, NOT_ROW_DIMENSION) +{ + need_charset_convert_ = false; + STORAGE_FTS_LOG(DEBUG, "construct doc id expr", K(common::lbt())); +} + +int ObExprDocID::calc_result_typeN(ObExprResType &type, + ObExprResType *types, + int64_t param_num, + ObExprTypeCtx &type_ctx) const +{ + int ret = OB_SUCCESS; + UNUSEDx(param_num, types); + type.set_varbinary(); + type.set_length(sizeof(ObDocId)); + return ret; +} + +int ObExprDocID::calc_resultN(ObObj &result, + const ObObj *objs_array, + int64_t param_num, + ObExprCtx &expr_ctx) const +{ + return OB_NOT_SUPPORTED; +} + +int ObExprDocID::cg_expr( + ObExprCGCtx &cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(rt_expr.arg_cnt_ != 1) || OB_ISNULL(rt_expr.args_) || ObVarcharType != rt_expr.datum_meta_.type_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected params", K(rt_expr.arg_cnt_), K(rt_expr.args_), K(rt_expr.type_)); + } else { + rt_expr.eval_func_ = generate_doc_id; + } + return ret; +} + +/*static*/ int ObExprDocID::generate_doc_id( + const ObExpr &raw_ctx, + ObEvalCtx &eval_ctx, + ObDatum &expr_datum) +{ + int ret = OB_SUCCESS; + common::ObDatum *datum = nullptr; + if (OB_UNLIKELY(1 != raw_ctx.arg_cnt_) || OB_ISNULL(raw_ctx.args_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(raw_ctx), KP(raw_ctx.args_)); + } else if (OB_FAIL(raw_ctx.args_[0]->eval(eval_ctx, datum))) { + LOG_WARN("fail to eval tablet id", K(ret), K(raw_ctx), K(eval_ctx)); + } else if (OB_ISNULL(datum)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null datum ptr", K(ret), KP(datum)); + } else { + share::ObTabletAutoincrementService &auto_inc = share::ObTabletAutoincrementService::get_instance(); + const ObTabletID tablet_id(datum->get_int()); + uint64_t seq_id = 0; + uint64_t buf_len = sizeof(ObDocId); + uint64_t *buf = reinterpret_cast(raw_ctx.get_str_res_mem(eval_ctx, buf_len)); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate memory", K(ret), KP(buf)); + } else if (OB_FAIL(auto_inc.get_autoinc_seq(MTL_ID(), tablet_id, seq_id))) { + LOG_WARN("fail to get tablet autoinc seq", K(ret), K(tablet_id)); + } else { + ObDocId *doc_id = new (buf) ObDocId(tablet_id.id(), seq_id); + expr_datum.set_string(doc_id->get_string()); + FLOG_INFO("succeed to genearte document id", K(tablet_id), K(seq_id)); + } + } + return ret; +} + +} // namespace sql +} // namespace oceanbase diff --git a/src/sql/engine/expr/ob_expr_doc_id.h b/src/sql/engine/expr/ob_expr_doc_id.h new file mode 100644 index 0000000000..a6f0982b6f --- /dev/null +++ b/src/sql/engine/expr/ob_expr_doc_id.h @@ -0,0 +1,52 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_DOC_ID_H +#define OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_DOC_ID_H + +#include "sql/engine/basic/ob_chunk_datum_store.h" +#include "sql/das/ob_das_scan_op.h" +#include "sql/engine/dml/ob_dml_ctx_define.h" + +namespace oceanbase +{ +namespace sql +{ +class ObExprDocID final : public ObFuncExprOperator +{ +public: + explicit ObExprDocID(common::ObIAllocator &alloc); + virtual ~ObExprDocID() {} + virtual int calc_result_typeN(ObExprResType &type, + ObExprResType *types, + int64_t param_num, + common::ObExprTypeCtx &type_ctx) const override; + virtual int calc_resultN(common::ObObj &result, + const common::ObObj *objs_array, + int64_t param_num, + common::ObExprCtx &expr_ctx) const override; + virtual common::ObCastMode get_cast_mode() const override { return CM_NULL_ON_WARN;} + virtual int cg_expr( + ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const override; + static int generate_doc_id( + const ObExpr &raw_ctx, + ObEvalCtx &eval_ctx, + ObDatum &expr_datum); +private: + DISALLOW_COPY_AND_ASSIGN(ObExprDocID); +}; + +} // end namespace sql +} // end namespace oceanbase +#endif /* OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_DOC_ID_H */ diff --git a/src/sql/engine/expr/ob_expr_doc_length.cpp b/src/sql/engine/expr/ob_expr_doc_length.cpp new file mode 100644 index 0000000000..29d054ed9f --- /dev/null +++ b/src/sql/engine/expr/ob_expr_doc_length.cpp @@ -0,0 +1,82 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE_FTS + +#include "sql/engine/expr/ob_expr_doc_length.h" + +namespace oceanbase +{ +using namespace common; +namespace sql +{ +ObExprDocLength::ObExprDocLength(ObIAllocator &allocator) + : ObFuncExprOperator(allocator, T_FUN_SYS_DOC_LENGTH, N_DOC_LENGTH, MORE_THAN_ZERO, VALID_FOR_GENERATED_COL, NOT_ROW_DIMENSION) +{ + need_charset_convert_ = false; +} + +int ObExprDocLength::calc_result_typeN(ObExprResType &type, + ObExprResType *types, + int64_t param_num, + ObExprTypeCtx &type_ctx) const +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(param_num < 1) || OB_ISNULL(types)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument for fulltext expr", K(ret), K(param_num), KP(types)); + } else { + types[0].set_uint64(); + type.set_scale(ObAccuracy::DDL_DEFAULT_ACCURACY[ObUInt64Type].scale_); + type.set_precision(ObAccuracy::DDL_DEFAULT_ACCURACY[ObUInt64Type].precision_); + type.set_result_flag(NOT_NULL_FLAG); + } + return ret; +} + +int ObExprDocLength::calc_resultN(ObObj &result, + const ObObj *objs_array, + int64_t param_num, + ObExprCtx &expr_ctx) const +{ + return OB_NOT_SUPPORTED; +} + +int ObExprDocLength::cg_expr( + ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const +{ + int ret = OB_SUCCESS; + UNUSED(raw_expr); + UNUSED(expr_cg_ctx); + if (OB_UNLIKELY(rt_expr.arg_cnt_ < 1) || OB_ISNULL(rt_expr.args_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(rt_expr.arg_cnt_), KP(rt_expr.args_), K(rt_expr.type_)); + } else { + rt_expr.eval_func_ = generate_doc_length; + } + return ret; +} + +int ObExprDocLength::generate_doc_length( + const ObExpr &raw_ctx, + ObEvalCtx &eval_ctx, + ObDatum &expr_datum) +{ + int ret = OB_SUCCESS; + UNUSEDx(raw_ctx, eval_ctx); + expr_datum.set_null(); + return ret; +} +} // namespace sql +} // namespace oceanbase diff --git a/src/sql/engine/expr/ob_expr_doc_length.h b/src/sql/engine/expr/ob_expr_doc_length.h new file mode 100644 index 0000000000..3884d78b56 --- /dev/null +++ b/src/sql/engine/expr/ob_expr_doc_length.h @@ -0,0 +1,49 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef SQL_ENGINE_EXPR_OB_EXPR_DOC_LENGTH_H_ +#define SQL_ENGINE_EXPR_OB_EXPR_DOC_LENGTH_H_ +#include "sql/engine/expr/ob_expr_operator.h" + +namespace oceanbase +{ +namespace sql +{ +class ObExprDocLength : public ObFuncExprOperator +{ +public: + explicit ObExprDocLength(common::ObIAllocator &alloc); + virtual ~ObExprDocLength() = default; + virtual int calc_result_typeN(ObExprResType &type, + ObExprResType *types, + int64_t param_num, + common::ObExprTypeCtx &type_ctx) const override; + virtual int calc_resultN(common::ObObj &result, + const common::ObObj *objs_array, + int64_t param_num, + common::ObExprCtx &expr_ctx) const override; + virtual common::ObCastMode get_cast_mode() const override { return CM_NULL_ON_WARN;} + virtual int cg_expr( + ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const override; + static int generate_doc_length( + const ObExpr &raw_ctx, + ObEvalCtx &eval_ctx, + ObDatum &expr_datum); +private : + //disallow copy + DISALLOW_COPY_AND_ASSIGN(ObExprDocLength); +}; +} +} +#endif //SQL_ENGINE_EXPR_OB_EXPR_DOC_LENGTH_H_ diff --git a/src/sql/engine/expr/ob_expr_eval_functions.cpp b/src/sql/engine/expr/ob_expr_eval_functions.cpp index b04638c244..2d88d98e61 100644 --- a/src/sql/engine/expr/ob_expr_eval_functions.cpp +++ b/src/sql/engine/expr/ob_expr_eval_functions.cpp @@ -343,6 +343,11 @@ #include "ob_expr_temp_table_ssid.h" #include "ob_expr_between.h" #include "ob_expr_align_date4cmp.h" +#include "ob_expr_word_count.h" +#include "ob_expr_word_segment.h" +#include "ob_expr_doc_id.h" +#include "ob_expr_doc_length.h" +#include "ob_expr_bm25.h" #include "ob_expr_lock_func.h" #include "ob_expr_extract_cert_expired_time.h" #include "ob_expr_transaction_id.h" @@ -1137,10 +1142,10 @@ static ObExpr::EvalFunc g_expr_eval_functions[] = { NULL, //ObExprXmlForest::eval_xml_forest, /* 663 */ NULL, //ObExprExistsNodeXml::eval_existsnode_xml, /* 664 */ NULL, //ObExprPassword::eval_password, /* 665 */ - NULL, // ObExprDocID::generate_doc_id, /* 666 */ - NULL, // ObExprWordSegment::generate_fulltext_column, /* 667 */ - NULL, // ObExprWordCount::generate_word_count, /* 668 */ - NULL, // ObExprBM25::eval_bm25_relevance_expr, /* 669 */ + ObExprDocID::generate_doc_id, /* 666 */ + ObExprWordSegment::generate_fulltext_column, /* 667 */ + ObExprWordCount::generate_word_count, /* 668 */ + ObExprBM25::eval_bm25_relevance_expr, /* 669 */ ObExprTransactionId::eval_transaction_id, /* 670 */ NULL, //ObExprInnerTableOptionPrinter::eval_inner_table_option_printer, /* 671 */ NULL, //ObExprInnerTableSequenceGetter::eval_inner_table_sequence_getter, /* 672 */ @@ -1162,7 +1167,7 @@ static ObExpr::EvalFunc g_expr_eval_functions[] = { NULL, // ObExprWaitForExecutedGTIDSet::eval_wait_for_executed_gtid_set, /* 688 */ NULL, // ObExprWaitUntilSQLThreadAfterGTIDs::eval_wait_until_sql_thread_after_gtids /* 689 */ ObExprLastRefreshScn::eval_last_refresh_scn, /* 690 */ - NULL, // ObExprDocLength::generate_doc_length, /*691*/ + ObExprDocLength::generate_doc_length, /* 691 */ NULL, // ObExprTopNFilter::eval_topn_filter, /* 692 */ NULL, // ObExprIsEnabledRole::eval_is_enabled_role, /* 693 */ NULL, // ObExprCanAccessTrigger::can_access_trigger, /* 694 */ diff --git a/src/sql/engine/expr/ob_expr_json_contains.cpp b/src/sql/engine/expr/ob_expr_json_contains.cpp index e1e11a1366..401e7d175c 100644 --- a/src/sql/engine/expr/ob_expr_json_contains.cpp +++ b/src/sql/engine/expr/ob_expr_json_contains.cpp @@ -48,13 +48,6 @@ int ObExprJsonContains::calc_result_typeN(ObExprResType& type, type.set_int32(); type.set_precision(DEFAULT_PRECISION_FOR_BOOL); type.set_scale(ObAccuracy::DDL_DEFAULT_ACCURACY[ObIntType].scale_); - - // set type for json_doc and json_candidate - for (int64_t i = 0; OB_SUCC(ret) && i < 2; i++) { - if (OB_FAIL(ObJsonExprHelper::is_valid_for_json(types_stack, i, N_JSON_CONTAINS))) { - LOG_WARN("wrong type for json doc.", K(ret), K(types_stack[i].get_type())); - } - } // set type for json_path if (OB_SUCC(ret) && param_num == 3) { diff --git a/src/sql/engine/expr/ob_expr_json_func_helper.cpp b/src/sql/engine/expr/ob_expr_json_func_helper.cpp index 1ae90cc143..e90e071e50 100644 --- a/src/sql/engine/expr/ob_expr_json_func_helper.cpp +++ b/src/sql/engine/expr/ob_expr_json_func_helper.cpp @@ -381,7 +381,8 @@ int ObJsonExprHelper::get_json_for_partial_update( return ret; } -int ObJsonExprHelper::get_json_val(const common::ObObj &data, ObExprCtx &ctx, +// ToDo: refine +int ObJsonExprHelper::get_json_val(const common::ObObj &data, ObExecContext *ctx, bool is_bool, common::ObIAllocator *allocator, ObIJsonBase*& j_base, bool to_bin) { @@ -425,7 +426,7 @@ int ObJsonExprHelper::get_json_val(const common::ObObj &data, ObExprCtx &ctx, LOG_WARN("failed: parse value to jsonBase", K(ret), K(val_type)); } } else { - ObBasicSessionInfo *session = ctx.exec_ctx_->get_my_session(); + ObBasicSessionInfo *session = ctx->get_my_session(); if (OB_ISNULL(session)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("session is NULL", K(ret)); @@ -674,6 +675,7 @@ int ObJsonExprHelper::get_json_val(const ObExpr &expr, ObEvalCtx &ctx, } return ret; } + int ObJsonExprHelper::eval_oracle_json_val(ObExpr *expr, ObEvalCtx &ctx, common::ObIAllocator *allocator, diff --git a/src/sql/engine/expr/ob_expr_json_func_helper.h b/src/sql/engine/expr/ob_expr_json_func_helper.h index 3ffe3d2870..75067d09b1 100644 --- a/src/sql/engine/expr/ob_expr_json_func_helper.h +++ b/src/sql/engine/expr/ob_expr_json_func_helper.h @@ -52,6 +52,8 @@ public: error_type_(0), is_empty_default_const_(false), is_error_default_const_(false), + is_alias_(false), + is_multivalue_(false), empty_val_(), error_val_(), on_mismatch_(), @@ -74,6 +76,8 @@ public: int8_t error_type_; bool is_empty_default_const_; bool is_error_default_const_; + bool is_alias_; + bool is_multivalue_; ObDatum *empty_val_; ObDatum *error_val_; common::ObSEArray on_mismatch_; @@ -260,9 +264,9 @@ public: @param[out] j_base the pointer to JsonBase @return Returns OB_SUCCESS on success, error code otherwise. */ - static int get_json_val(const common::ObObj &data, ObExprCtx &ctx, + static int get_json_val(const common::ObObj &data, ObExecContext *ctx, bool is_bool, common::ObIAllocator *allocator, - ObIJsonBase*& j_base, bool to_bin = false); + ObIJsonBase*& j_base, bool to_bin= false); static int get_json_val(const common::ObDatum &data, ObExecContext &ctx, ObExpr* expr, diff --git a/src/sql/engine/expr/ob_expr_json_overlaps.cpp b/src/sql/engine/expr/ob_expr_json_overlaps.cpp index 8777b8379a..41cc20f638 100644 --- a/src/sql/engine/expr/ob_expr_json_overlaps.cpp +++ b/src/sql/engine/expr/ob_expr_json_overlaps.cpp @@ -44,9 +44,7 @@ int ObExprJsonOverlaps::calc_result_type2(ObExprResType &type, type.set_precision(DEFAULT_PRECISION_FOR_BOOL); type.set_scale(ObAccuracy::DDL_DEFAULT_ACCURACY[ObIntType].scale_); - if (OB_FAIL(ObJsonExprHelper::is_valid_for_json(type1, 1, N_JSON_OVERLAPS))) { - LOG_WARN("wrong type for json doc.", K(ret), K(type1.get_type())); - } else if (OB_FAIL(ObJsonExprHelper::is_valid_for_json(type2, 2, N_JSON_OVERLAPS))) { + if (OB_FAIL(ObJsonExprHelper::is_valid_for_json(type2, 2, N_JSON_OVERLAPS))) { LOG_WARN("wrong type for json doc.", K(ret), K(type2.get_type())); } return ret; diff --git a/src/sql/engine/expr/ob_expr_json_query.cpp b/src/sql/engine/expr/ob_expr_json_query.cpp index 411efaa058..a8faf92274 100644 --- a/src/sql/engine/expr/ob_expr_json_query.cpp +++ b/src/sql/engine/expr/ob_expr_json_query.cpp @@ -41,7 +41,7 @@ namespace sql { ObExprJsonQuery::ObExprJsonQuery(ObIAllocator &alloc) - : ObFuncExprOperator(alloc, T_FUN_SYS_JSON_QUERY, N_JSON_QUERY, MORE_THAN_TWO, VALID_FOR_GENERATED_COL, NOT_ROW_DIMENSION) + : ObFuncExprOperator(alloc, T_FUN_SYS_JSON_QUERY, N_JSON_QUERY, PARAM_NUM_UNKNOWN, VALID_FOR_GENERATED_COL, NOT_ROW_DIMENSION) { } @@ -54,13 +54,15 @@ int ObExprJsonQuery::calc_result_typeN(ObExprResType& type, int64_t param_num, ObExprTypeCtx& type_ctx) const { - UNUSED(type_ctx); INIT_SUCC(ret); common::ObArenaAllocator allocator; - if (OB_UNLIKELY(param_num != 11)) { + if (OB_UNLIKELY(param_num != JSN_QUE_MAX)) { ret = OB_ERR_PARAM_SIZE; LOG_WARN("invalid param number", K(ret), K(param_num)); } else { + // [0:json_text][1:json_path][2:returning_type][3:truncate][4:scalars][5:pretty][6:ascii] + // [7:wrapper][8:asis][9:error_type][10:empty_type][11:mismatch][12:multivalue] + bool is_json_input = false; if (OB_FAIL(ObExprJsonValue::calc_input_type(types_stack[JSN_QUE_DOC], is_json_input))) { LOG_WARN("fail to calc input type", K(ret)); @@ -75,13 +77,17 @@ int ObExprJsonQuery::calc_result_typeN(ObExprResType& type, types_stack[JSN_QUE_PATH].set_calc_type(ObLongTextType); types_stack[JSN_QUE_PATH].set_calc_collation_type(CS_TYPE_UTF8MB4_BIN); } + // returning type : 2 ObExprResType dst_type; if (OB_SUCC(ret) && OB_FAIL(calc_returning_type(type, types_stack, type_ctx, dst_type, &allocator, is_json_input))) { LOG_WARN("fail to calc returning type", K(ret)); } - // truncate 3 , scalars 4, pretty 5, ascii 6, wrapper 7, error 8, empty 9, mismatch 10 + // old: truncate 3 , scalars 4, pretty 5, ascii 6, wrapper 7, error 8, empty 9, mismatch 10 + // new: + // [0:json_text] [1:json_path] [2:returning_type] [3:truncate] [4:scalars] [5:pretty] [6:ascii] + // [7:wrapper] [8:asis] [9:error_type] [10:empty_type] [11:mismatch] [12:multivalue] for (int64_t i = JSN_QUE_TRUNC; i < param_num && OB_SUCC(ret); ++i) { if (types_stack[i].get_type() == ObNullType) { ret = OB_ERR_UNEXPECTED; @@ -101,14 +107,21 @@ int ObExprJsonQuery::calc_result_typeN(ObExprResType& type, } int ObExprJsonQuery::calc_returning_type(ObExprResType& type, - ObExprResType* types_stack, - ObExprTypeCtx& type_ctx, - ObExprResType& dst_type, - common::ObIAllocator *allocator, - bool is_json_input) + ObExprResType* types_stack, + ObExprTypeCtx& type_ctx, + ObExprResType& dst_type, + common::ObIAllocator *allocator, + bool is_json_input) { INIT_SUCC(ret); - if (types_stack[JSN_QUE_RET].get_type() == ObNullType) { + bool is_asis = types_stack[JSN_QUE_ASIS].get_param().get_int() > 0; + bool is_multivalue = types_stack[JSN_QUE_MULTIVALUE].get_param().get_int() > 0; + + if (is_asis && is_multivalue) { + dst_type.set_type(ObJsonType); + dst_type.set_collation_type(CS_TYPE_UTF8MB4_BIN); + dst_type.set_length((ObAccuracy::DDL_DEFAULT_ACCURACY[ObJsonType]).get_length()); + } else if (types_stack[JSN_QUE_RET].get_type() == ObNullType) { ObString j_path_text(types_stack[JSN_QUE_PATH].get_param().get_string().length(), types_stack[JSN_QUE_PATH].get_param().get_string().ptr()); ObJsonPath j_path(j_path_text, allocator); @@ -136,7 +149,8 @@ int ObExprJsonQuery::calc_returning_type(ObExprResType& type, } } else if (OB_FAIL(ObJsonExprHelper::get_cast_type(types_stack[JSN_QUE_RET], dst_type, type_ctx))) { LOG_WARN("get cast dest type failed", K(ret)); - } else if (dst_type.get_type() != ObVarcharType + } else if (!is_asis && + dst_type.get_type() != ObVarcharType && dst_type.get_type() != ObLongTextType && dst_type.get_type() != ObJsonType) { ret = OB_ERR_INVALID_DATA_TYPE_RETURNING; @@ -212,6 +226,8 @@ int ObExprJsonQuery::eval_json_query(const ObExpr &expr, ObEvalCtx &ctx, ObDatum is_cover_by_error))) { // get clause param value, set into param_ctx LOG_WARN("fail to parse clause value", K(ret)); + } else if (!param_ctx->json_param_.is_multivalue_ && param_ctx->json_param_.is_alias_) { + is_null_result = true; } else if (OB_FAIL(ObJsonUtil::get_json_doc(expr.args_[JSN_QUE_DOC], ctx, temp_allocator, j_base, is_null_result, is_cover_by_error, true))) { // parse json doc @@ -227,6 +243,13 @@ int ObExprJsonQuery::eval_json_query(const ObExpr &expr, ObEvalCtx &ctx, ObDatum LOG_WARN("fail to seek result", K(ret)); } + // Todo: refine + ObCollationType in_coll_type = expr.args_[JSN_QUE_DOC]->datum_meta_.cs_type_; + ObCollationType dst_coll_type = expr.datum_meta_.cs_type_; + ObJsonCastParam cast_param(param_ctx->json_param_.dst_type_, in_coll_type, dst_coll_type, 0); + bool is_multivalue = param_ctx->json_param_.is_multivalue_; + bool is_alias = param_ctx->json_param_.is_alias_; + // fill output if (OB_FAIL(ret)) { if (is_cover_by_error) { @@ -236,14 +259,14 @@ int ObExprJsonQuery::eval_json_query(const ObExpr &expr, ObEvalCtx &ctx, ObDatum } LOG_WARN("json_query failed", K(ret)); } else if (is_null_result) { + // ToDo: need check is_multivalue? res.set_null(); } else if (param_ctx->json_param_.on_mismatch_[0] == JSN_QUERY_MISMATCH_DOT && hits.size() == 1 && param_ctx->json_param_.dst_type_ != ObJsonType) { // dot notation - ObCollationType in_coll_type = expr.args_[JSN_QUE_DOC]->datum_meta_.cs_type_; - ObCollationType dst_coll_type = expr.datum_meta_.cs_type_; + // ObCollationType in_coll_type = expr.args_[JSN_QUE_DOC]->datum_meta_.cs_type_; + // ObCollationType dst_coll_type = expr.datum_meta_.cs_type_; param_ctx->json_param_.error_type_ = JSN_QUERY_NULL; - ObJsonCastParam cast_param(param_ctx->json_param_.dst_type_, in_coll_type, dst_coll_type, 0); ret = ObJsonUtil::cast_to_res(&temp_allocator, ctx, hits[0], param_ctx->json_param_.accuracy_, cast_param, res, is_type_mismatch); if (OB_FAIL(ret)) { @@ -251,6 +274,7 @@ int ObExprJsonQuery::eval_json_query(const ObExpr &expr, ObEvalCtx &ctx, ObDatum } else if (OB_FAIL(ObJsonUtil::set_lob_datum(&temp_allocator, expr, ctx, param_ctx->json_param_.dst_type_, 0, res))) { LOG_WARN("fail to set lob datum from string val", K(ret)); } + // ToDo: need set_multivalue_result? } else if (use_wrapper == 1) { size_t hit_size = hits.size(); ObJsonArray j_arr_res(&temp_allocator); @@ -268,6 +292,14 @@ int ObExprJsonQuery::eval_json_query(const ObExpr &expr, ObEvalCtx &ctx, ObDatum } if (try_set_error_val(&temp_allocator, ctx, ¶m_ctx->json_param_, expr, res, ret)) { + } else if (is_multivalue) { + if (OB_FAIL(set_multivalue_result(ctx, temp_allocator, jb_res, expr, + param_ctx->json_param_.error_type_, + in_coll_type, dst_coll_type, + param_ctx->json_param_.error_val_, + param_ctx->json_param_.accuracy_, cast_param, res))) { + LOG_WARN("multi value result set fail", K(ret)); + } } else if (OB_FAIL(set_result(¶m_ctx->json_param_, jb_res, &temp_allocator, ctx, expr, res))) { LOG_WARN("result set fail", K(ret)); @@ -275,13 +307,49 @@ int ObExprJsonQuery::eval_json_query(const ObExpr &expr, ObEvalCtx &ctx, ObDatum } else if (is_json_arr) { ObJsonArray j_arr_var(&temp_allocator); jb_empty = &j_arr_var; - ret = set_result(¶m_ctx->json_param_, jb_empty, &temp_allocator, ctx, expr, res); + if (is_multivalue || is_alias) { + if (!is_multivalue) { + res.set_null(); + } else { + ret = set_multivalue_result(ctx, temp_allocator, jb_empty, expr, + param_ctx->json_param_.error_type_, + in_coll_type, dst_coll_type, + param_ctx->json_param_.error_val_, + param_ctx->json_param_.accuracy_, cast_param, res); + } + } else { + ret = set_result(¶m_ctx->json_param_, jb_empty, &temp_allocator, ctx, expr, res); + } } else if (is_json_obj) { ObJsonObject j_obj_var(&temp_allocator); jb_empty = &j_obj_var; - ret = set_result(¶m_ctx->json_param_, jb_empty, &temp_allocator, ctx, expr, res); + if (is_multivalue || is_alias) { + if (!is_multivalue) { + res.set_null(); + } else { + ret = set_multivalue_result(ctx, temp_allocator, jb_empty, expr, + param_ctx->json_param_.error_type_, + in_coll_type, dst_coll_type, + param_ctx->json_param_.error_val_, + param_ctx->json_param_.accuracy_, cast_param, res); + } + } else { + ret = set_result(¶m_ctx->json_param_, jb_empty, &temp_allocator, ctx, expr, res); + } } else { - ret = set_result(¶m_ctx->json_param_, hits[0], &temp_allocator, ctx, expr, res); + if (is_multivalue || is_alias) { + if (!is_multivalue) { + res.set_null(); + } else { + ret = set_multivalue_result(ctx, temp_allocator, hits[0], expr, + param_ctx->json_param_.error_type_, + in_coll_type, dst_coll_type, + param_ctx->json_param_.error_val_, + param_ctx->json_param_.accuracy_, cast_param, res); + } + } else { + ret = set_result(¶m_ctx->json_param_, hits[0], &temp_allocator, ctx, expr, res); + } } if (OB_SUCC(ret)) { param_ctx->is_first_exec_ = false; @@ -289,6 +357,208 @@ int ObExprJsonQuery::eval_json_query(const ObExpr &expr, ObEvalCtx &ctx, ObDatum return ret; } + +int ObExprJsonQuery::check_enable_cast_index_array(ObIJsonBase* json_base, bool disable_container) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(json_base)) { + } else if (json_base->json_type() == ObJsonNodeType::J_NULL) { + ret = OB_ERR_JSON_VALUE_CAST_FUNCTION_INDEX; + LOG_USER_ERROR(OB_ERR_JSON_VALUE_CAST_FUNCTION_INDEX); + } else if (json_base->json_type() == ObJsonNodeType::J_BOOLEAN) { + ret = OB_ERR_CAST_ARRAY_SUPPORT; + LOG_USER_ERROR(OB_ERR_CAST_ARRAY_SUPPORT, "CAST-ing JSON BOOLEAN type to array"); + } else if (!disable_container && json_base->json_type() == ObJsonNodeType::J_OBJECT) { + ret = OB_ERR_CAST_ARRAY_SUPPORT; + LOG_USER_ERROR(OB_ERR_CAST_ARRAY_SUPPORT, "CAST-ing JSON OBJECT type to array"); + } + + if (OB_FAIL(ret)) { + } else if (disable_container && + (json_base->json_type() == ObJsonNodeType::J_ARRAY || json_base->json_type() == ObJsonNodeType::J_ARRAY)) { + ret = OB_ERR_JSON_CONTAINER_CAST_SCALAR; + LOG_USER_ERROR(OB_ERR_JSON_CONTAINER_CAST_SCALAR); + } + + return ret; +} + +int ObExprJsonQuery::set_multivalue_result(ObEvalCtx& ctx, + ObIAllocator& allocator, + ObIJsonBase* origin_result, + const ObExpr &expr, + uint8_t opt_error, + ObCollationType in_collation, + ObCollationType dst_collation, + ObDatum *on_error, + ObAccuracy &origin_accuracy, + ObJsonCastParam &cast_param, + ObDatum &res) +{ + INIT_SUCC(ret); + + ParseNode node; + ObDatum *opt_type = nullptr; + int32_t dst_len = 0; + ObObjType dest_type; + + ObVector opt_mismatch_value; + ObVector opt_mismatch_type; + ObAccuracy accuracy = origin_accuracy; + + ObIJsonBase* json_base = nullptr; + + if (OB_FAIL(expr.args_[2]->eval(ctx, opt_type))) { + LOG_WARN("eval dst type datum failed", K(ret)); + } else if (OB_NOT_NULL(origin_result) + && OB_FAIL(ObJsonBaseFactory::transform(&allocator, origin_result, ObJsonInType::JSON_BIN, json_base))) { // to tree + LOG_WARN("fail to transform to tree", K(ret)); + } else { + node.value_ = opt_type->get_int(); + dest_type = static_cast(node.int16_values_[0]); + dst_len = node.int32_values_[OB_NODE_CAST_C_LEN_IDX]; + ObLengthSemantics length_semantics = 0; + if (ob_is_string_tc(dest_type) && + OB_FAIL(ObJsonUtil::get_accuracy_internal(accuracy, ctx, dest_type, node.value_, length_semantics))) { + LOG_WARN("failed to get accuracy", K(ret)); + } + } + + ObString str_bin; + ObJsonOpaque apaque(str_bin, ObUserDefinedSQLType); + ObStringBuffer str_buff(&allocator); + + uint32_t element_count = json_base == nullptr ? 0 : json_base->element_count(); + uint8_t is_cast = false; + uint64_t reserve_len = 0; + + + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(check_enable_cast_index_array(json_base, false))) { + LOG_WARN("failed to check index array size", K(ret)); + } else if (OB_FAIL(str_buff.reserve(sizeof(uint32_t)))) { + LOG_WARN("failed to reserve size", K(ret)); + } else if (FALSE_IT((*reinterpret_cast(str_buff.ptr()) = element_count))) { + } else if (str_buff.set_length(sizeof(uint32_t))) { + } else if (OB_NOT_NULL(json_base) && json_base->json_type() == ObJsonNodeType::J_ARRAY) { + /* + * need remove duplicate element + */ + + // 1. allocator element container + ObJsonBin* bin_array = nullptr; + bin_array = static_cast(allocator.alloc(sizeof(ObJsonBin) * element_count)); + if (element_count > 0 && OB_ISNULL(bin_array)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate json bin array.", K(ret)); + } + + for (int i = 0; OB_SUCC(ret) && i < element_count; ++i) { + ObJsonBin* tmp = new (bin_array + i) ObJsonBin(); + ObIJsonBase* iter = tmp; + + if (OB_FAIL(json_base->get_array_element(i, iter))) { + LOG_WARN("get json array element failed", K(ret)); + } + } + + // 2. do sort + if (OB_SUCC(ret) && OB_NOT_NULL(bin_array)) { + ObJsonBinCompare cmp; + std::sort(bin_array, bin_array + element_count, cmp); + } + + uint32_t real_store_count = 0; + ObIJsonBase* last = nullptr; + for (int i = 0; OB_SUCC(ret) && i < element_count; ++i) { + ObIJsonBase* iter = bin_array + i; + ObObj tmp_obj; + int64_t pos = str_buff.length(); + tmp_obj.set_collation_type(dst_collation); + if (ob_is_numeric_type(dest_type) || ob_is_temporal_type(dest_type)) { + tmp_obj.set_collation_level(CS_LEVEL_NUMERIC); + } else { + tmp_obj.set_collation_level(CS_LEVEL_IMPLICIT); + } + + int cmp_ret = 0; + if (OB_ISNULL(last)) { + last = iter; + ++real_store_count; + } else if (OB_FAIL(last->compare(*iter, cmp_ret))) { + LOG_WARN("failed compare bin data", K(ret)); + } else if (cmp_ret == 0) { + continue; + } else { + last = iter; + ++real_store_count; + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(check_enable_cast_index_array(iter, true))) { + LOG_WARN("failed to check index array size", K(ret)); + } else if (OB_FAIL(ObJsonUtil::cast_json_scalar_to_sql_obj(&allocator, ctx, iter, + dst_collation, accuracy, dest_type, + tmp_obj))) { + LOG_WARN("failed to cast to res", K(ret), K(dest_type)); + ret = OB_ERR_JSON_VALUE_CAST_FUNCTION_INDEX; + LOG_USER_ERROR(OB_ERR_JSON_VALUE_CAST_FUNCTION_INDEX); + } else if (FALSE_IT(reserve_len = tmp_obj.get_serialize_size())) { + } else if (OB_FAIL(str_buff.reserve(reserve_len + 128))) { + LOG_WARN("failed to reserve size", K(ret), K(reserve_len)); + } else if (OB_FAIL(tmp_obj.serialize(str_buff.ptr(), str_buff.capacity(), pos))) { + LOG_WARN("failed to serialize datum", K(ret), K(reserve_len)); + } else { + str_buff.set_length(pos); + } + } + + if (OB_SUCC(ret)) { + // update real element count + *reinterpret_cast(str_buff.ptr()) = real_store_count; + } + } else if (element_count > 0) { + ObObj tmp_obj; + int64_t pos = str_buff.length(); + tmp_obj.set_collation_type(dst_collation); + if (ob_is_numeric_type(dest_type) || ob_is_temporal_type(dest_type)) { + tmp_obj.set_collation_level(CS_LEVEL_NUMERIC); + } else { + tmp_obj.set_collation_level(CS_LEVEL_IMPLICIT); + } + + if (OB_FAIL(check_enable_cast_index_array(json_base, true))) { + LOG_WARN("failed to check index array size", K(ret)); + } else if (OB_FAIL(ObJsonUtil::cast_json_scalar_to_sql_obj(&allocator, ctx, json_base, + dst_collation, accuracy, dest_type, + tmp_obj))) { + LOG_WARN("failed to cast to res", K(ret), K(dest_type)); + ret = OB_ERR_JSON_VALUE_CAST_FUNCTION_INDEX; + LOG_USER_ERROR(OB_ERR_JSON_VALUE_CAST_FUNCTION_INDEX); + } else if (FALSE_IT(reserve_len = tmp_obj.get_serialize_size())) { + } else if (OB_FAIL(str_buff.reserve(reserve_len + 128))) { + LOG_WARN("failed to reserve size", K(ret), K(reserve_len)); + } else if (OB_FAIL(tmp_obj.serialize(str_buff.ptr(), str_buff.capacity(), pos))) { + LOG_WARN("failed to serialize datum", K(ret), K(reserve_len)); + } else { + str_buff.set_length(pos); + } + } + + if (OB_SUCC(ret)) { + apaque.set_value(str_buff.string()); + if (OB_FAIL(apaque.get_raw_binary(str_bin, &allocator))) { + LOG_WARN("json extarct get result binary failed", K(ret)); + } else if (OB_FAIL(ObJsonExprHelper::pack_json_str_res(expr, ctx, res, str_bin))) { + LOG_WARN("fail to pack json result", K(ret)); + } + } + + return ret; +} + int ObExprJsonQuery::init_ctx_var(ObJsonParamCacheCtx*& param_ctx, const ObExpr &expr) { INIT_SUCC(ret); @@ -404,7 +674,8 @@ int ObExprJsonQuery::check_params_valid(const ObExpr &expr, json_param->dst_type_, expr.datum_meta_.cs_type_, JSON_QUERY_EXPR))) { is_cover_by_error = false; LOG_WARN("check item func with return type fail", K(ret)); - } else if (json_param->dst_type_ != ObVarcharType + } else if ( !json_param->is_alias_ + &&json_param->dst_type_ != ObVarcharType && json_param->dst_type_ != ObLongTextType && json_param->dst_type_ != ObJsonType) { is_cover_by_error = false; @@ -446,19 +717,23 @@ int ObExprJsonQuery::get_clause_param_value(const ObExpr &expr, } else { ret = ObJsonUtil::get_accuracy(expr, ctx, json_param->accuracy_, json_param->dst_type_, is_cover_by_error); } - // truncate 3, scalars 4, pretty 5, ascii 6, wrapper 7, error 8, empty 9, mismatch 10 - for (size_t i = JSN_QUE_TRUNC; OB_SUCC(ret) && i <= JSN_QUE_MISMATCH; i ++) { + // old: truncate 3, scalars 4, pretty 5, ascii 6, wrapper 7, error 8, empty 9, mismatch 10 + // new: + // [0:json_text] [1:json_path] [2:returning_type] [3:truncate] [4:scalars] [5:pretty] [6:ascii] + // [7:wrapper] [8:asis] [9:error_type] [10:empty_type] [11:mismatch] [12:multivalue] + for (size_t i = JSN_QUE_TRUNC; OB_SUCC(ret) && i < JSN_QUE_MAX; i ++) { if (OB_FAIL(ObJsonExprHelper::get_clause_opt(expr.args_[i], ctx, val))) { LOG_WARN("fail to get clause option", K(ret)); } else if (OB_FAIL(param_vec.push_back(val))) { LOG_WARN("fail to push val into array", K(ret)); } } + if (OB_FAIL(ret) && is_cover_by_error) { is_cover_by_error = false; ret = ObJsonExprHelper::get_clause_opt(expr.args_[JSN_QUE_ERROR], ctx, json_param->error_type_); } else if (OB_FAIL(ret)) { - } else if (param_vec.size() == 8) { + } else if (param_vec.size() == 10) { json_param->truncate_ = param_vec[JSN_QUE_TRUNC_OPT]; json_param->scalars_type_ = param_vec[JSN_QUE_SCALAR_OPT]; json_param->pretty_type_ = param_vec[JSN_QUE_PRETTY_OPT]; @@ -467,6 +742,13 @@ int ObExprJsonQuery::get_clause_param_value(const ObExpr &expr, json_param->error_type_ = param_vec[JSN_QUE_ERROR_OPT]; json_param->empty_type_ = param_vec[JSN_QUE_EMPTY_OPT]; json_param->on_mismatch_.push_back(param_vec[JSN_QUE_MISMATCH_OPT]); + // ToDo: double check + json_param->is_alias_ = param_vec[JSN_QUE_ASIS_OPT] > 0; + json_param->is_multivalue_ = param_vec[JSN_QUE_MULTIVALUE_OPT] > 0; + if (json_param->is_multivalue_) { + json_param->dst_type_ = ObJsonType; + json_param->accuracy_.set_length(0); + } } else { ret = OB_ERR_UNEXPECTED; LOG_WARN("fail to get param value", K(ret)); @@ -523,7 +805,9 @@ int ObExprJsonQuery::doc_do_seek(ObIJsonBase* j_base, } } else if (hits.size() > 1) { // return val decide by wrapper option - if (OB_FAIL(get_multi_scalars_wrapper_type(json_param->wrapper_, use_wrapper))) { + if (json_param->is_multivalue_) { + use_wrapper = 1; + } else if (OB_FAIL(get_multi_scalars_wrapper_type(json_param->wrapper_, use_wrapper))) { is_cover_by_error = true; LOG_WARN("error occur in wrapper type", K(ret), K(hits.size())); } @@ -621,7 +905,7 @@ int ObExprJsonQueryParamInfo::init_jsn_query_expr_param(ObIAllocator &alloc, ObE uint32_t pos = -1; // parse clause node // truncate 3, scalars 4, pretty 5, ascii 6, wrapper 7, error 8, empty 9, mismatch 10 - for (int64_t i = JSN_QUE_TRUNC; OB_SUCC(ret) && i <= JSN_QUE_MISMATCH; i ++) { + for (int64_t i = JSN_QUE_TRUNC; OB_SUCC(ret) && i < JSN_QUE_MAX; i ++) { if (OB_FAIL(get_int_val_from_raw(alloc, exec_ctx, raw_expr->get_param_expr(i), const_data))) { LOG_WARN("failed to calc offset expr", K(ret)); } else if (OB_FAIL(param_vec.push_back(const_data.get_tinyint()))) { diff --git a/src/sql/engine/expr/ob_expr_json_query.h b/src/sql/engine/expr/ob_expr_json_query.h index 8a60a0eb2f..a57f6437e9 100644 --- a/src/sql/engine/expr/ob_expr_json_query.h +++ b/src/sql/engine/expr/ob_expr_json_query.h @@ -61,6 +61,67 @@ private: ObEvalCtx &ctx, const ObExpr &expr, ObDatum &res); + static int get_dest_type(const ObExpr &expr, + int32_t &dst_len, + ObEvalCtx& ctx, + ObObjType &dest_type, + bool &is_cover_by_error); + static int set_multivalue_result(ObEvalCtx& ctx, + ObIAllocator& allocator, + ObIJsonBase* json_base, + const ObExpr &expr, + uint8_t opt_error, + ObCollationType in_coll_type, + ObCollationType dst_coll_type, + ObDatum *on_error, + ObAccuracy &accuracy, + ObJsonCastParam &cast_param, + ObDatum &res); + static int get_clause_opt(const ObExpr &expr, + ObEvalCtx &ctx, + uint8_t index, + bool &is_cover_by_error, + uint8_t &type, + uint8_t size_para); + /* + oracle mode get json path to JsonBase in static_typing_engine + @param[in] expr the input arguments + @param[in] ctx the eval context + @param[in] allocator the Allocator in context + @param[in] index the input arguments index + @param[out] j_path the pointer to JsonPath + @param[out] is_null the flag for null situation + @param[out] is_cover_by_error the flag for whether need cover by error clause + @return Returns OB_SUCCESS on success, error code otherwise. + */ + static int get_ora_json_path(const ObExpr &expr, ObEvalCtx &ctx, + common::ObArenaAllocator &allocator, ObJsonPath*& j_path, + uint16_t index, bool &is_null, bool &is_cover_by_error, + ObDatum*& json_datum); + + /* + oracle mode get json doc to JsonBase in static_typing_engine + @param[in] expr the input arguments + @param[in] ctx the eval context + @param[in] allocator the Allocator in context + @param[in] index the input arguments index + @param[out] j_base the pointer to JsonBase + @param[out] j_in_type the pointer to input type + @param[out] is_null the flag for null situation + @param[out] is_cover_by_error the flag for whether need cover by error clause + @return Returns OB_SUCCESS on success, error code otherwise. + */ + static int get_ora_json_doc(const ObExpr &expr, ObEvalCtx &ctx, + common::ObArenaAllocator &allocator, + uint16_t index, ObIJsonBase*& j_base, + ObObjType dst_type, + bool &is_null, bool &is_cover_by_error); + + static int get_clause_pre_asc_sca_opt(const ObExpr &expr, ObEvalCtx &ctx, + bool &is_cover_by_error, uint8_t &pretty_type, + uint8_t &ascii_type, uint8_t &scalars_type); + static int check_enable_cast_index_array(ObIJsonBase* json_base, bool disable_container); + public: static int get_empty_option(bool &is_cover_by_error, int8_t empty_type, bool &is_null_result, @@ -92,10 +153,8 @@ public: bool use_wrapper); static int get_error_option(int8_t &error_type, ObIJsonBase *&error_val, ObIJsonBase *jb_arr, ObIJsonBase *jb_obj, bool &is_null); static int get_mismatch_option(int8_t &mismatch_type, int &ret); - static int init_ctx_var(ObJsonParamCacheCtx*& param_ctx, const ObExpr &expr); - - static int extract_plan_cache_param(const ObExprJsonQueryParamInfo *info, ObJsonExprParam& json_param); -/* code from ob_expr_cast for cal_result_type */ + static int init_ctx_var(ObJsonParamCacheCtx*& param_ctx, const ObExpr &expr); static int extract_plan_cache_param(const ObExprJsonQueryParamInfo *info, ObJsonExprParam& json_param); + /* code from ob_expr_cast for cal_result_type */ const static int32_t OB_LITERAL_MAX_INT_LEN = 21; DISALLOW_COPY_AND_ASSIGN(ObExprJsonQuery); diff --git a/src/sql/engine/expr/ob_expr_json_utils.cpp b/src/sql/engine/expr/ob_expr_json_utils.cpp index 18ae6caf7f..58e7aaacc5 100644 --- a/src/sql/engine/expr/ob_expr_json_utils.cpp +++ b/src/sql/engine/expr/ob_expr_json_utils.cpp @@ -16,6 +16,7 @@ #include "sql/engine/expr/ob_expr_cast.h" #include "sql/engine/ob_exec_context.h" #include "sql/engine/expr/ob_datum_cast.h" +#include "sql/engine/expr/ob_json_param_type.h" #include "ob_expr_json_func_helper.h" #include "lib/encode/ob_base64_encode.h" // for ObBase64Encoder #include "lib/utility/ob_fast_convert.h" // ObFastFormatInt::format_unsigned @@ -712,6 +713,62 @@ int cast_to_int(common::ObIAllocator *allocator, return ret; } +template<> +void ObJsonUtil::wrapper_set_uint(ObObjType type, uint64_t val, ObObj& obj) +{ + obj.set_uint(type, val); +} + +template<> +void ObJsonUtil::wrapper_set_uint(ObObjType type, uint64_t val, ObDatum& obj) +{ + obj.set_uint(val); +} + +template<> +void ObJsonUtil::wrapper_set_timestamp_tz(ObObjType type, ObOTimestampData val, ObObj& res) +{ + if (type == ObTimestampTZType) { + res.set_timestamp_tz(val); + } else { + res.set_timestamp_ltz(val); + } +} + +template<> +void ObJsonUtil::wrapper_set_timestamp_tz(ObObjType type, ObOTimestampData val, ObDatum& res) +{ + if (type == ObTimestampTZType) { + res.set_otimestamp_tz(val); + } else { + res.set_otimestamp_tiny(val); + } +} + +template<> +void ObJsonUtil::wrapper_set_decimal_int(const common::ObDecimalInt *decint, ObScale scale, int32_t int_bytes, ObDatum& res) +{ + res.set_decimal_int(decint, int_bytes); +} + +template<> +void ObJsonUtil::wrapper_set_decimal_int(const common::ObDecimalInt *decint, ObScale scale, int32_t int_bytes, ObObj& res) +{ + res.set_decimal_int(int_bytes, scale, const_cast(decint)); +} + +template<> +void ObJsonUtil::wrapper_set_string(ObObjType type, ObString& val, ObObj& obj) +{ + obj.set_string(type, val); +} + +template<> +void ObJsonUtil::wrapper_set_string(ObObjType type, ObString& val, ObDatum& obj) +{ + obj.set_string(val); +} + int cast_to_uint(common::ObIAllocator *allocator, ObEvalCtx &ctx, ObIJsonBase *j_base, @@ -739,6 +796,7 @@ int cast_to_uint(common::ObIAllocator *allocator, LOG_WARN("uint_upper_check failed", K(ret)); } else if (!cast_param.is_only_check_) { if (cast_param.dst_type_ == ObUInt64Type) { + ObJsonUtil::wrapper_set_uint(cast_param.dst_type_, val, res); res.set_uint(val); } else { res.set_uint32(static_cast(val)); @@ -903,6 +961,7 @@ int cast_to_string(common::ObIAllocator *allocator, } } if (OB_SUCC(ret) && !cast_param.is_only_check_) { + ObJsonUtil::wrapper_set_string(cast_param.dst_type_, val, res); res.set_string(val); } } @@ -1342,6 +1401,7 @@ int cast_to_bit(common::ObIAllocator *allocator, if (OB_FAIL(ObJsonUtil::bit_length_check(accuracy, val))) { LOG_WARN("fail to check bit range", K(ret)); } else if (!cast_param.is_only_check_) { + ObJsonUtil::wrapper_set_uint(cast_param.dst_type_, val, res); res.set_uint(val); } } @@ -1376,6 +1436,7 @@ int cast_to_json(common::ObIAllocator *allocator, MEMCPY(buf, val.ptr(), val.length()); val.assign_ptr(buf, val.length()); if (!cast_param.is_only_check_) { + ObJsonUtil::wrapper_set_string(cast_param.dst_type_, val, res); res.set_string(val); } } @@ -1413,7 +1474,8 @@ int ObJsonUtil::cast_to_res(common::ObIAllocator *allocator, { INIT_SUCC(ret); ObJsonUtil::ObJsonCastSqlScalar cast_func_ = get_json_cast_func(cast_param.dst_type_); - if (OB_ISNULL(j_base)) { + if (OB_ISNULL(j_base) + || (lib::is_mysql_mode() && j_base->json_type() == common::ObJsonNodeType::J_NULL)) { res.set_null(); } else if (OB_ISNULL(cast_func_)) { ret = OB_ERR_UNEXPECTED; @@ -1429,6 +1491,166 @@ int ObJsonUtil::cast_to_res(common::ObIAllocator *allocator, return ret; } +int ObJsonUtil::cast_json_scalar_to_sql_obj(common::ObIAllocator *allocator, + ObEvalCtx& ctx, + ObIJsonBase *j_base, + ObCollationType collation, + ObAccuracy &accuracy, + ObObjType obj_type, + ObObj &res_obj) +{ + INIT_SUCC(ret); + if (OB_ISNULL(allocator) || OB_ISNULL(j_base)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("expr is null.", K(ret)); + } else { + + ObDatum res_datum; + char datum_buffer[OBJ_DATUM_STRING_RES_SIZE] = {0}; + res_datum.ptr_ = datum_buffer; + ObJsonCastParam cast_param(obj_type, ObCollationType::CS_TYPE_UTF8MB4_BIN, collation, false); + uint8_t is_type_mismatch = false; + if (OB_FAIL(cast_to_res(allocator, ctx, j_base, accuracy, cast_param, res_datum, is_type_mismatch))) { + LOG_WARN("fail to cast.", K(ret)); + } + res_datum.to_obj(res_obj, res_obj.meta_); + res_obj.set_collation_type(collation); + } + return ret; +} + +int ObJsonUtil::cast_json_scalar_to_sql_obj(common::ObIAllocator *allocator, + ObExecContext* exec_ctx, + ObIJsonBase *j_base, + ObExprResType col_res_type, + ObObj &res_obj) { + // ToDo: refine + INIT_SUCC(ret); + if (OB_ISNULL(allocator) || OB_ISNULL(j_base)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("expr is null.", K(ret)); + } else { + ObEvalCtx ctx(*exec_ctx); + ObAccuracy temp_accuracy = col_res_type.get_accuracy(); + ret = cast_json_scalar_to_sql_obj(allocator, ctx, j_base, + col_res_type.get_collation_type(), + temp_accuracy, + col_res_type.get_type(), + res_obj); + } + return ret; +} + +/* +ObJsonUtil::ObJsonCastSqlDatum OB_JSON_CAST_DATUM_EXPLICIT[ObMaxTC] = +{ + // ObNullTC = 0, // null + cast_to_null, + // ObIntTC = 1, // int8, int16, int24, int32, int64. + cast_to_int, + // ObUIntTC = 2, // uint8, uint16, uint24, uint32, uint64. + cast_to_uint, + // ObFloatTC = 3, // float, ufloat. + cast_to_float, + // ObDoubleTC = 4, // double, udouble. + cast_to_double, + // ObNumberTC = 5, // number, unumber. + cast_to_number, + // ObDateTimeTC = 6, // datetime, timestamp. + cast_to_date_time, + // ObDateTC = 7, // date + cast_to_date, + // ObTimeTC = 8, // time + cast_to_time, + // ObYearTC = 9, // year + cast_to_year, + // ObStringTC = 10, // varchar, char, varbinary, binary. + cast_to_string, + // ObExtendTC = 11, // extend + cast_not_expected, + // ObUnknownTC = 12, // unknown + cast_not_expected, + // ObTextTC = 13, // TinyText,MediumText, Text ,LongText, TinyBLOB,MediumBLOB, // BLOB ,LongBLOB + cast_to_string, + // ObBitTC = 14, // bit + cast_to_bit, + // ObEnumSetTC = 15, // enum, set + cast_not_expected, + // ObEnumSetInnerTC = 16, + cast_not_expected, + // ObOTimestampTC = 17, //timestamp with time zone + cast_to_timstamp, + // ObRawTC = 18, // raw + cast_to_string, + // ObIntervalTC = 19, //oracle interval type class include interval year to month and interval day to second + cast_not_expected, + // ObRowIDTC = 20, // oracle rowid typeclass, includes urowid and rowid + cast_not_expected, + // ObLobTC = 21, //oracle lob typeclass ObLobType not use + cast_not_expected, + // ObJsonTC = 22, // json type class + cast_to_json, + // ObGeometryTC = 23, // geometry type class + cast_not_expected, + // ObUserDefinedSQLTC = 24, // user defined type class in SQL + cast_not_expected, +}; + +ObJsonUtil::ObJsonCastSqlObj OB_JSON_CAST_OBJ_EXPLICIT[ObMaxTC] = +{ + // ObNullTC = 0, // null + cast_to_null, + // ObIntTC = 1, // int8, int16, int24, int32, int64. + cast_to_int, + // ObUIntTC = 2, // uint8, uint16, uint24, uint32, uint64. + cast_to_uint, + // ObFloatTC = 3, // float, ufloat. + cast_to_float, + // ObDoubleTC = 4, // double, udouble. + cast_to_double, + // ObNumberTC = 5, // number, unumber. + cast_to_number, + // ObDateTimeTC = 6, // datetime, timestamp. + cast_to_date_time, + // ObDateTC = 7, // date + cast_to_date, + // ObTimeTC = 8, // time + cast_to_time, + // ObYearTC = 9, // year + cast_to_year, + // ObStringTC = 10, // varchar, char, varbinary, binary. + cast_to_string, + // ObExtendTC = 11, // extend + cast_not_expected, + // ObUnknownTC = 12, // unknown + cast_not_expected, + // ObTextTC = 13, // TinyText,MediumText, Text ,LongText, TinyBLOB,MediumBLOB, // BLOB ,LongBLOB + cast_to_string, + // ObBitTC = 14, // bit + cast_to_bit, + // ObEnumSetTC = 15, // enum, set + cast_not_expected, + // ObEnumSetInnerTC = 16, + cast_not_expected, + // ObOTimestampTC = 17, //timestamp with time zone + cast_to_timstamp, + // ObRawTC = 18, // raw + cast_to_string, + // ObIntervalTC = 19, //oracle interval type class include interval year to month and interval day to second + cast_not_expected, + // ObRowIDTC = 20, // oracle rowid typeclass, includes urowid and rowid + cast_not_expected, + // ObLobTC = 21, //oracle lob typeclass ObLobType not use + cast_not_expected, + // ObJsonTC = 22, // json type class + cast_to_json, + // ObGeometryTC = 23, // geometry type class + cast_not_expected, + // ObUserDefinedSQLTC = 24, // user defined type class in SQL + cast_not_expected, +}; +*/ + ObJsonUtil::ObJsonCastSqlScalar OB_JSON_CAST_SQL_EXPLICIT[ObMaxTC] = { // ObNullTC = 0, // null @@ -2897,6 +3119,18 @@ ObJsonUtil::ObItemMethodValid ObJsonUtil::get_item_method_cast_res_func(ObJsonPa return OB_JSON_VALUE_ITEM_METHOD_CAST_FUNC[item_method - ObJsonPathNodeType::JPN_ABS][json_type]; } +/* +ObJsonUtil::ObJsonCastSqlObj ObJsonUtil::get_json_obj_cast_func(ObObjType dst_type) +{ + return OB_JSON_CAST_OBJ_EXPLICIT[OBJ_TYPE_TO_CLASS[dst_type]]; +} + +ObJsonUtil::ObJsonCastSqlDatum ObJsonUtil::get_json_datum_cast_func(ObObjType dst_type) +{ + return OB_JSON_CAST_DATUM_EXPLICIT[OBJ_TYPE_TO_CLASS[dst_type]]; +} +*/ + ObJsonUtil::ObJsonCastSqlScalar ObJsonUtil::get_json_cast_func(ObObjType dst_type) { return OB_JSON_CAST_SQL_EXPLICIT[OBJ_TYPE_TO_CLASS[dst_type]]; diff --git a/src/sql/engine/expr/ob_expr_json_utils.h b/src/sql/engine/expr/ob_expr_json_utils.h index 20e478b140..77bcbdc77f 100644 --- a/src/sql/engine/expr/ob_expr_json_utils.h +++ b/src/sql/engine/expr/ob_expr_json_utils.h @@ -149,10 +149,41 @@ public: ObJsonCastParam &cast_param, uint8_t &is_type_mismatch, number::ObNumber &val); + + static int cast_json_scalar_to_sql_obj(common::ObIAllocator *allocator, + ObExecContext* exec_ctx, + ObIJsonBase *j_base, + ObExprResType col_res_type, + ObObj &res_obj); + + static int cast_json_scalar_to_sql_obj(common::ObIAllocator *allocator, + ObEvalCtx& eval_ctx, + ObIJsonBase *j_base, + ObCollationType collation, + ObAccuracy &accuracy, + ObObjType obj_type, + ObObj &res_obj); + typedef int (*ObItemMethodValid)(ObIJsonBase*& in, bool &is_null_result, common::ObIAllocator *allocator, uint8_t &is_type_mismatch); + typedef int (*ObJsonCastSqlDatum)(common::ObIAllocator *allocator, + ObEvalCtx &ctx, + ObIJsonBase *j_base, + common::ObAccuracy &accuracy, + ObJsonCastParam &cast_param, + ObDatum &res, + uint8_t &is_type_mismatch, + const ObExpr *rt_expr); + typedef int (*ObJsonCastSqlObj)(common::ObIAllocator *allocator, + ObEvalCtx &ctx, + ObIJsonBase *j_base, + common::ObAccuracy &accuracy, + ObJsonCastParam &cast_param, + ObObj &res, + uint8_t &is_type_mismatch, + const ObExpr *rt_expr); typedef int (*ObJsonCastSqlScalar)(common::ObIAllocator *allocator, ObEvalCtx &ctx, ObIJsonBase *j_base, @@ -179,6 +210,8 @@ public: bool strict = false); static ObJsonUtil::ObItemMethodValid get_item_method_cast_res_func(ObJsonPath* j_path, ObIJsonBase* j_base); + static ObJsonUtil::ObJsonCastSqlObj get_json_obj_cast_func(ObObjType dst_type); + static ObJsonUtil::ObJsonCastSqlDatum get_json_datum_cast_func(ObObjType dst_type); static ObJsonUtil::ObJsonCastSqlScalar get_json_cast_func(ObObjType dst_type); static int get_json_path(ObExpr* expr, ObEvalCtx &ctx, @@ -200,6 +233,15 @@ public: ObIAllocator &alloc, ObString &padding_res); static int set_mismatch_val(ObIArray& val, ObIArray& type, int64_t& opt_val, uint32_t& pos); + template + static void wrapper_set_uint(ObObjType type, uint64_t val, T& obj); + template + static void wrapper_set_string(ObObjType type, ObString& val, T& obj); + template + static void wrapper_set_timestamp_tz(ObObjType type, ObOTimestampData val, T& obj); + template + static void wrapper_set_decimal_int(const ObDecimalInt *decint, ObScale scale, int32_t int_bytes, T& obj); + static int init_json_path(ObIAllocator &alloc, ObExprCGCtx &op_cg_ctx, const ObRawExpr* path, ObExprJsonQueryParamInfo& res); @@ -209,4 +251,4 @@ private: } // sql } // oceanbase -#endif // OCEANBASE_SQL_OB_EXPR_JSON_UTILS_H_ \ No newline at end of file +#endif // OCEANBASE_SQL_OB_EXPR_JSON_UTILS_H_ diff --git a/src/sql/engine/expr/ob_expr_json_value.cpp b/src/sql/engine/expr/ob_expr_json_value.cpp index 62ae68fa19..ac98cfc3f0 100644 --- a/src/sql/engine/expr/ob_expr_json_value.cpp +++ b/src/sql/engine/expr/ob_expr_json_value.cpp @@ -25,7 +25,10 @@ #include "lib/oblog/ob_log_module.h" #include "ob_expr_json_func_helper.h" #include "lib/charset/ob_charset.h" -#include "ob_expr_json_utils.h" +#include "sql/engine/expr/ob_expr_json_utils.h" + +// from sql_parser_base.h +#define DEFAULT_STR_LENGTH -1 using namespace oceanbase::common; using namespace oceanbase::sql; @@ -835,6 +838,175 @@ void ObExprJsonValue::get_error_option(int8_t error_type, } } +/* +template<> +void ObExprJsonValue::wrapper_set_error_result( + const ObExpr &expr, + ObEvalCtx &ctx, + ObObj &res, int &ret, uint8_t &error_type, + ObDatum *&error_val, ObVector &mismatch_val, + ObVector &mismatch_type, + uint8_t &is_type_cast, + const ObAccuracy &accuracy, + ObObjType dst_type, + ObObjMeta& meta) +{ + bool has_lob_header = is_lob_storage(dst_type); + ObTextStringObObjResult text_result(dst_type, nullptr, &res, has_lob_header); + if (!try_set_error_val(expr, ctx, res, ret, error_type, error_val, mismatch_val, mismatch_type, is_type_cast, accuracy, dst_type, meta)) { + text_result.set_result(); + } +} + +template<> +void ObExprJsonValue::wrapper_set_error_result( + const ObExpr &expr, + ObEvalCtx &ctx, + ObDatum &res, int &ret, uint8_t &error_type, + ObDatum *&error_val, ObVector &mismatch_val, + ObVector &mismatch_type, + uint8_t &is_type_cast, + const ObAccuracy &accuracy, + ObObjType dst_type, + ObObjMeta& meta) +{ + ObTextStringDatumResult text_result(expr.datum_meta_.type_, &expr, &ctx, &res); + if (!try_set_error_val(expr, ctx, res, ret, error_type, error_val, mismatch_val, mismatch_type, is_type_cast, accuracy, dst_type, meta)) { + text_result.set_result(); + } +} + +template<> +int ObExprJsonValue::wrapper_text_string_result(common::ObIAllocator *allocator, + const ObExpr &expr, + ObEvalCtx &ctx, + ObString& result_value, + uint8_t error_type, + ObDatum *error_val, + ObAccuracy &accuracy, + ObObjType dst_type, + ObDatum &res, + ObVector &mismatch_val, + ObVector &mismatch_type, + uint8_t &is_type_cast, + uint8_t ascii_type, + ObObjMeta& res_meta) +{ + int ret = OB_SUCCESS; + ObTextStringDatumResult text_result(expr.datum_meta_.type_, &expr, &ctx, &res); + + if (dst_type == ObJsonType) { + if (OB_FAIL(text_result.init(result_value.length()))) { + LOG_WARN("init lob result failed"); + } else if (OB_FAIL(text_result.append(result_value))) { + LOG_WARN("failed to append realdata", K(ret), K(result_value), K(text_result)); + } + } else { + if (ascii_type == 0) { + if (OB_FAIL(text_result.init(result_value.length()))) { + LOG_WARN("init lob result failed"); + } else if (OB_FAIL(text_result.append(result_value))) { + LOG_WARN("failed to append realdata", K(ret), K(result_value), K(text_result)); + } + } else { + char *buf = NULL; + int64_t buf_len = result_value.length() * ObCharset::MAX_MB_LEN * 2; + int64_t reserve_len = 0; + int32_t length = 0; + + if (OB_FAIL(text_result.init(buf_len))) { + LOG_WARN("init lob result failed"); + } else if (OB_FAIL(text_result.get_reserved_buffer(buf, reserve_len))) { + LOG_WARN("fail to get reserved buffer", K(ret)); + } else if (reserve_len != buf_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get reserve len is invalid", K(ret), K(reserve_len), K(buf_len)); + } else if (OB_FAIL(ObJsonExprHelper::calc_asciistr_in_expr(result_value, expr.args_[0]->datum_meta_.cs_type_, + expr.datum_meta_.cs_type_, + buf, reserve_len, length))) { + LOG_WARN("fail to calc unistr", K(ret)); + } else if (OB_FAIL(text_result.lseek(length, 0))) { + LOG_WARN("text_result lseek failed", K(ret), K(text_result), K(length)); + } + } + } + + if (!try_set_error_val(expr, ctx, res, ret, error_type, error_val, mismatch_val, mismatch_type, is_type_cast, accuracy, dst_type, res_meta)) { + // old engine set same alloctor for wrapper, so we can use val without copy + text_result.set_result(); + } + + return ret; +} + +template<> +int ObExprJsonValue::wrapper_text_string_result(common::ObIAllocator *allocator, + const ObExpr &expr, + ObEvalCtx &ctx, + ObString& result_value, + uint8_t error_type, + ObDatum *error_val, + ObAccuracy &accuracy, + ObObjType dst_type, + ObObj &res, + ObVector &mismatch_val, + ObVector &mismatch_type, + uint8_t &is_type_cast, + uint8_t ascii_type, + ObObjMeta& res_meta) +{ + int ret = OB_SUCCESS; + bool has_lob_header = is_lob_storage(dst_type); + ObTextStringObObjResult text_result(dst_type, nullptr, &res, has_lob_header); + if (dst_type == ObJsonType) { + if (OB_FAIL(text_result.init(result_value.length(), allocator))) { + LOG_WARN("init lob result failed"); + } else if (OB_FAIL(text_result.append(result_value))) { + LOG_WARN("failed to append realdata", K(ret), K(result_value), K(text_result)); + } else { + text_result.set_result(); + } + } else { + if (ascii_type == 0) { + if (OB_FAIL(text_result.init(result_value.length(), allocator))) { + LOG_WARN("init lob result failed"); + } else if (OB_FAIL(text_result.append(result_value))) { + LOG_WARN("failed to append realdata", K(ret), K(result_value), K(text_result)); + } else { + text_result.set_result(); + } + } else { + char *buf = NULL; + int64_t buf_len = result_value.length() * ObCharset::MAX_MB_LEN * 2; + int64_t reserve_len = 0; + int32_t length = 0; + + if (OB_FAIL(text_result.init(buf_len, allocator))) { + LOG_WARN("init lob result failed"); + } else if (OB_FAIL(text_result.get_reserved_buffer(buf, reserve_len))) { + LOG_WARN("fail to get reserved buffer", K(ret)); + } else if (reserve_len != buf_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get reserve len is invalid", K(ret), K(reserve_len), K(buf_len)); + } else if (OB_FAIL(ObJsonExprHelper::calc_asciistr_in_expr(result_value, expr.args_[0]->datum_meta_.cs_type_, + expr.datum_meta_.cs_type_, + buf, reserve_len, length))) { + LOG_WARN("fail to calc unistr", K(ret)); + } else if (OB_FAIL(text_result.lseek(length, 0))) { + LOG_WARN("text_result lseek failed", K(ret), K(text_result), K(length)); + } + } + } + + if (!try_set_error_val(expr, ctx, res, ret, error_type, error_val, mismatch_val, mismatch_type, is_type_cast, accuracy, dst_type, res_meta)) { + // old engine set same alloctor for wrapper, so we can use val without copy + text_result.set_result(); + } + + return ret; +} +*/ + bool ObExprJsonValue::try_set_error_val(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res, int &ret, @@ -874,7 +1046,7 @@ bool ObExprJsonValue::try_set_error_val(const ObExpr &expr, ret = OB_ERR_UNEXPECTED; LOG_WARN("fail to get error val", K(ret)); } else { - set_val(res, json_param->error_val_); + res.set_datum(*json_param->error_val_); ret = OB_SUCCESS; } } diff --git a/src/sql/engine/expr/ob_expr_json_value.h b/src/sql/engine/expr/ob_expr_json_value.h index 898b7081fb..1ee2e75b27 100644 --- a/src/sql/engine/expr/ob_expr_json_value.h +++ b/src/sql/engine/expr/ob_expr_json_value.h @@ -46,6 +46,73 @@ public: static int eval_json_value(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); static int eval_ora_json_value(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); + static int calc_input_type(ObExprResType& types_stack, bool &is_json_input); + + static int deal_item_method_in_seek(ObIJsonBase*& in, + bool &is_null_result, + ObJsonPath *j_path, + ObIAllocator *allocator, + uint8_t &is_type_mismatch); + + /* code for cast accuracy check */ + template + static int check_default_val_accuracy(const ObAccuracy &accuracy, + const ObObjType &type, + const Obj *obj); + + DECLARE_SET_LOCAL_SESSION_VARS; + +private: + /* cast wrapper to dst type with accuracy check*/ + static int get_cast_ret(int ret); + static int cast_to_int(ObIJsonBase *j_base, ObObjType dst_type, int64_t &val); + static int cast_to_uint(ObIJsonBase *j_base, ObObjType dst_type, uint64_t &val); + static int cast_to_datetime(ObIJsonBase *j_base, + common::ObIAllocator *allocator, + const ObBasicSessionInfo *session, + ObEvalCtx &ctx, + const ObExpr *expr, + common::ObAccuracy &accuracy, + int64_t &val, + uint8_t &is_type_cast); + static bool type_cast_to_string(ObString &json_string, + common::ObIAllocator *allocator, + ObIJsonBase *j_base, + ObAccuracy &accuracy); + static int cast_to_otimstamp(ObIJsonBase *j_base, + const ObBasicSessionInfo *session, + ObEvalCtx &ctx, + const ObExpr *expr, + common::ObAccuracy &accuracy, + ObObjType dst_type, + ObOTimestampData &out_val, + uint8_t &is_type_cast); + static int cast_to_date(ObIJsonBase *j_base, int32_t &val, uint8_t &is_type_cast); + static int cast_to_time(ObIJsonBase *j_base, + common::ObAccuracy &accuracy, + int64_t &val); + static int cast_to_year(ObIJsonBase *j_base, uint8_t &val); + static int cast_to_float(ObIJsonBase *j_base, ObObjType dst_type, float &val); + static int cast_to_double(ObIJsonBase *j_base, ObObjType dst_type, double &val); + static int cast_to_number(common::ObIAllocator *allocator, + ObIJsonBase *j_base, + common::ObAccuracy &accuracy, + ObObjType dst_type, + number::ObNumber &val, + uint8_t &is_type_cast); + static int cast_to_string(common::ObIAllocator *allocator, + ObIJsonBase *j_base, + ObCollationType in_cs_type, + ObCollationType dst_cs_type, + common::ObAccuracy &accuracy, + ObObjType dst_type, + ObString &val, + uint8_t &is_type_cast, + uint8_t is_truncate); + static int cast_to_bit(ObIJsonBase *j_base, uint64_t &val); + static int cast_to_json(common::ObIAllocator *allocator, ObIJsonBase *j_base, + ObString &val, uint8_t &is_type_cast); + static int get_empty_or_error_type(const ObExpr &expr, ObEvalCtx &ctx, uint8_t index, @@ -53,23 +120,12 @@ public: const ObAccuracy &accuracy, uint8_t &type, ObObjType dst_type); - static int deal_item_method_in_seek(ObIJsonBase*& in, - bool &is_null_result, - ObJsonPath *j_path, - ObIAllocator *allocator, - uint8_t &is_type_mismatch); - /* code for cast accuracy check */ - template - static int check_default_val_accuracy(const ObAccuracy &accuracy, - const ObObjType &type, - const Obj *obj); virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const override; virtual bool need_rt_ctx() const override { return true; } virtual common::ObCastMode get_cast_mode() const { return CM_ERROR_ON_SCALE_OVER;} static int calc_empty_error_type(ObExprResType* types_stack, uint8_t pos, ObExprResType &dst_type, ObExprTypeCtx& type_ctx); - static int calc_input_type(ObExprResType& types_stack, bool &is_json_input); - DECLARE_SET_LOCAL_SESSION_VARS; + private: static bool try_set_error_val(const ObExpr &expr, ObEvalCtx &ctx, @@ -77,9 +133,10 @@ private: ObJsonExprParam* json_param, uint8_t &is_type_mismatch); static int doc_do_seek(ObJsonSeekResult &hits, bool &is_null_result, ObJsonExprParam* json_param, - ObIJsonBase *j_base, const ObExpr &expr, ObEvalCtx &ctx, bool &is_cover_by_error, - ObDatum *&return_val, - uint8_t &is_type_mismatch); + ObIJsonBase *j_base, const ObExpr &expr, ObEvalCtx &ctx, bool &is_cover_by_error, + ObDatum *&return_val, + uint8_t &is_type_mismatch); + // new sql engine static inline void set_val(ObDatum &res, ObDatum *val) { res.set_datum(*val); } @@ -87,7 +144,6 @@ private: // old sql engine static inline void set_val(ObObj &res, ObObj *val) { res = *val; } - static int get_default_value(ObExpr *expr, ObEvalCtx &ctx, const ObAccuracy &accuracy, diff --git a/src/sql/engine/expr/ob_expr_operator_factory.cpp b/src/sql/engine/expr/ob_expr_operator_factory.cpp index 2c2e2d6f7e..0ebeef149d 100644 --- a/src/sql/engine/expr/ob_expr_operator_factory.cpp +++ b/src/sql/engine/expr/ob_expr_operator_factory.cpp @@ -36,6 +36,7 @@ #include "sql/engine/expr/ob_expr_bit_neg.h" #include "sql/engine/expr/ob_expr_bit_left_shift.h" #include "sql/engine/expr/ob_expr_bit_right_shift.h" +#include "sql/engine/expr/ob_expr_bm25.h" #include "sql/engine/expr/ob_expr_case.h" #include "sql/engine/expr/ob_expr_oracle_decode.h" #include "sql/engine/expr/ob_expr_oracle_trunc.h" @@ -194,6 +195,10 @@ #include "sql/engine/expr/ob_expr_part_id.h" #include "sql/engine/expr/ob_expr_timestamp_add.h" #include "sql/engine/expr/ob_expr_des_hex_str.h" +#include "sql/engine/expr/ob_expr_doc_id.h" +#include "sql/engine/expr/ob_expr_doc_length.h" +#include "sql/engine/expr/ob_expr_word_segment.h" +#include "sql/engine/expr/ob_expr_word_count.h" #include "sql/engine/expr/ob_expr_ascii.h" #include "sql/engine/expr/ob_expr_truncate.h" #include "sql/engine/expr/ob_expr_bit_count.h" @@ -866,6 +871,10 @@ void ObExprOperatorFactory::register_expr_operators() REG_OP(ObExprPartId); REG_OP(ObExprLastTraceId); REG_OP(ObExprLastExecId); + REG_OP(ObExprDocID); + REG_OP(ObExprDocLength); + REG_OP(ObExprWordSegment); + REG_OP(ObExprWordCount); REG_OP(ObExprObjAccess); REG_OP(ObExprEnumToStr); REG_OP(ObExprSetToStr); @@ -1040,6 +1049,8 @@ void ObExprOperatorFactory::register_expr_operators() REG_OP(ObExprPrivSTEquals); REG_OP(ObExprPrivSTTouches); REG_OP(ObExprAlignDate4Cmp); + REG_OP(ObExprJsonQuery); + REG_OP(ObExprBM25); REG_OP(ObExprGetLock); REG_OP(ObExprIsFreeLock); @@ -1501,9 +1512,15 @@ void ObExprOperatorFactory::get_function_alias_name(const ObString &origin_name, // don't alias "power" to "pow" in oracle mode, because oracle has no // "pow" function. alias_name = ObString::make_string(N_POW); + } else if (0 == origin_name.case_compare("DOC_ID")) { + alias_name = ObString::make_string(N_DOC_ID); } else if (0 == origin_name.case_compare("ws")) { // ws is synonym for word_segment alias_name = ObString::make_string(N_WORD_SEGMENT); + } else if (0 == origin_name.case_compare("WORD_COUNT")) { + alias_name = ObString::make_string(N_WORD_COUNT); + } else if (0 == origin_name.case_compare("DOC_LENGTH")) { + alias_name = ObString::make_string(N_DOC_LENGTH); } else if (0 == origin_name.case_compare("inet_ntoa")) { // inet_ntoa is synonym for int2ip alias_name = ObString::make_string(N_INT2IP); diff --git a/src/sql/engine/expr/ob_expr_word_count.cpp b/src/sql/engine/expr/ob_expr_word_count.cpp new file mode 100644 index 0000000000..e13582f4aa --- /dev/null +++ b/src/sql/engine/expr/ob_expr_word_count.cpp @@ -0,0 +1,82 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE_FTS + +#include "sql/engine/expr/ob_expr_word_count.h" + +namespace oceanbase +{ +using namespace common; +namespace sql +{ +ObExprWordCount::ObExprWordCount(ObIAllocator &allocator) + : ObFuncExprOperator(allocator, T_FUN_SYS_WORD_COUNT, N_WORD_COUNT, MORE_THAN_ZERO, VALID_FOR_GENERATED_COL, NOT_ROW_DIMENSION) +{ + need_charset_convert_ = false; +} + +int ObExprWordCount::calc_result_typeN(ObExprResType &type, + ObExprResType *types, + int64_t param_num, + ObExprTypeCtx &type_ctx) const +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(param_num < 1) || OB_ISNULL(types)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument for fulltext expr", K(ret), K(param_num), KP(types)); + } else { + types[0].set_uint64(); + type.set_scale(ObAccuracy::DDL_DEFAULT_ACCURACY[ObUInt64Type].scale_); + type.set_precision(ObAccuracy::DDL_DEFAULT_ACCURACY[ObUInt64Type].precision_); + type.set_result_flag(NOT_NULL_FLAG); + } + return ret; +} + +int ObExprWordCount::calc_resultN(ObObj &result, + const ObObj *objs_array, + int64_t param_num, + ObExprCtx &expr_ctx) const +{ + return OB_NOT_SUPPORTED; +} + +int ObExprWordCount::cg_expr( + ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const +{ + int ret = OB_SUCCESS; + UNUSED(raw_expr); + UNUSED(expr_cg_ctx); + if (OB_UNLIKELY(rt_expr.arg_cnt_ < 1) || OB_ISNULL(rt_expr.args_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(rt_expr.arg_cnt_), KP(rt_expr.args_), K(rt_expr.type_)); + } else { + rt_expr.eval_func_ = generate_word_count; + } + return ret; +} + +/*static*/ int ObExprWordCount::generate_word_count( + const ObExpr &raw_ctx, + ObEvalCtx &eval_ctx, + ObDatum &expr_datum) +{ + int ret = OB_SUCCESS; + UNUSEDx(raw_ctx, eval_ctx); + expr_datum.set_null(); + return ret; +} +} // namespace sql +} // namespace oceanbase diff --git a/src/sql/engine/expr/ob_expr_word_count.h b/src/sql/engine/expr/ob_expr_word_count.h new file mode 100644 index 0000000000..9882d67304 --- /dev/null +++ b/src/sql/engine/expr/ob_expr_word_count.h @@ -0,0 +1,49 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_WORD_COUNT_H_ +#define OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_WORD_COUNT_H_ +#include "sql/engine/expr/ob_expr_operator.h" + +namespace oceanbase +{ +namespace sql +{ +class ObExprWordCount : public ObFuncExprOperator +{ +public: + explicit ObExprWordCount(common::ObIAllocator &alloc); + virtual ~ObExprWordCount() {} + virtual int calc_result_typeN(ObExprResType &type, + ObExprResType *types, + int64_t param_num, + common::ObExprTypeCtx &type_ctx) const override; + virtual int calc_resultN(common::ObObj &result, + const common::ObObj *objs_array, + int64_t param_num, + common::ObExprCtx &expr_ctx) const override; + virtual common::ObCastMode get_cast_mode() const override { return CM_NULL_ON_WARN;} + virtual int cg_expr( + ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const override; + static int generate_word_count( + const ObExpr &raw_ctx, + ObEvalCtx &eval_ctx, + ObDatum &expr_datum); +private : + //disallow copy + DISALLOW_COPY_AND_ASSIGN(ObExprWordCount); +}; +} +} +#endif /* OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_WORD_COUNT_H_ */ diff --git a/src/sql/engine/expr/ob_expr_word_segment.cpp b/src/sql/engine/expr/ob_expr_word_segment.cpp new file mode 100644 index 0000000000..aa9dda18de --- /dev/null +++ b/src/sql/engine/expr/ob_expr_word_segment.cpp @@ -0,0 +1,153 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE_FTS + +#include "sql/engine/expr/ob_expr_word_segment.h" +#include "sql/engine/expr/ob_expr_lob_utils.h" + +namespace oceanbase +{ +using namespace common; +namespace sql +{ +ObExprWordSegment::ObExprWordSegment(ObIAllocator &allocator) + : ObFuncExprOperator(allocator, T_FUN_SYS_WORD_SEGMENT, N_WORD_SEGMENT, MORE_THAN_ZERO, VALID_FOR_GENERATED_COL, NOT_ROW_DIMENSION) +{ + need_charset_convert_ = false; +} + +int ObExprWordSegment::calc_result_typeN(ObExprResType &type, + ObExprResType *types, + int64_t param_num, + ObExprTypeCtx &type_ctx) const +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(param_num < 1) || OB_ISNULL(types)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument for fulltext expr", K(ret), K(param_num), KP(types)); + } else { + ObLength max_len = 0; + for (int64_t i = 0; i < param_num; ++i) { + max_len += types[i].get_length(); + } + type.set_varchar(); + type.set_length(max_len); + type.set_collation_type(types[0].get_collation_type()); + } + return ret; +} + +int ObExprWordSegment::calc_resultN(ObObj &result, + const ObObj *objs_array, + int64_t param_num, + ObExprCtx &expr_ctx) const +{ + return OB_NOT_SUPPORTED; +} + +int ObExprWordSegment::cg_expr( + ObExprCGCtx &cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const +{ + int ret = OB_SUCCESS; + UNUSED(raw_expr); + UNUSED(cg_ctx); + if (OB_UNLIKELY(rt_expr.arg_cnt_ < 1) || OB_ISNULL(rt_expr.args_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(rt_expr.arg_cnt_), KP(rt_expr.args_), K(rt_expr.type_)); + } else { + rt_expr.eval_func_ = generate_fulltext_column; + } + return ret; +} + +/*static*/ int ObExprWordSegment::generate_fulltext_column( + const ObExpr &raw_ctx, + ObEvalCtx &eval_ctx, + ObDatum &expr_datum) +{ + int ret = OB_SUCCESS; + const ObCharsetInfo *cs = nullptr; + if (OB_UNLIKELY(raw_ctx.arg_cnt_ <= 0) || OB_ISNULL(raw_ctx.args_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(raw_ctx), KP(raw_ctx.args_)); + } else if (OB_ISNULL(cs = ObCharset::get_charset(raw_ctx.args_[0]->obj_meta_.get_collation_type()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, charset info is nullptr", K(ret), KP(cs), K(raw_ctx.args_[0]->obj_meta_)); + } else { + ObEvalCtx::TempAllocGuard alloc_guard(eval_ctx); + int64_t res_str_len = 0; + ObSEArray ft_parts; + const int64_t mb_max_len = cs->mbmaxlen; + char mb_separator[mb_max_len]; + int32_t length_of_separator = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < raw_ctx.arg_cnt_; ++i) { + ObString res; + common::ObDatum *datum = nullptr; + if (OB_ISNULL(raw_ctx.args_[i])) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, nullptr", K(ret), KP(raw_ctx.args_[i]), K(i), K(raw_ctx.arg_cnt_)); + } else if (OB_FAIL(raw_ctx.args_[i]->eval(eval_ctx, datum))) { + LOG_WARN("fail to eval expr", K(ret), K(raw_ctx), K(eval_ctx)); + } else if (OB_ISNULL(datum)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, datum is nullptr", K(ret), KP(datum)); + } else if (FALSE_IT(res = datum->get_string())) { + } else if (OB_FAIL(ObTextStringHelper::read_real_string_data(alloc_guard.get_allocator(), *datum, raw_ctx.args_[i]->datum_meta_, + raw_ctx.args_[i]->obj_meta_.has_lob_header(), res))) { + LOG_WARN("fail to get real data.", K(ret), K(res)); + } else if (OB_FAIL(ft_parts.push_back(res))) { + LOG_WARN("fail to push back ft part array", K(ret), K(res)); + } else { + res_str_len += ft_parts.at(ft_parts.count() - 1).length(); + } + } + if (OB_SUCC(ret)) { + wchar_t wide_char = L' '; + if (OB_FAIL(ObCharset::wc_mb(raw_ctx.args_[0]->obj_meta_.get_collation_type(), wide_char, mb_separator, + mb_max_len, length_of_separator))) { + LOG_WARN("fail to wc_mb", K(ret), K(mb_max_len), KPHEX(mb_separator, mb_max_len)); + } else { + res_str_len = res_str_len + length_of_separator * (ft_parts.count() - 1); + } + } + if (OB_SUCC(ret)) { + ObExprStrResAlloc res_alloc(raw_ctx, eval_ctx); + char *ptr = static_cast(res_alloc.alloc(res_str_len)); + if (OB_UNLIKELY(NULL == ptr)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory", K(ret), K(res_str_len)); + } else { + char* cur_ptr = ptr; + for (int64_t i = 0; OB_SUCC(ret) && i < ft_parts.count(); ++i) { + if (0 != i) { + MEMCPY(cur_ptr, mb_separator, length_of_separator); + cur_ptr += length_of_separator; + } + MEMCPY(cur_ptr, ft_parts.at(i).ptr(), ft_parts.at(i).length()); + cur_ptr += ft_parts.at(i).length(); + } + if (OB_SUCC(ret)) { + ObString str(res_str_len, ptr); + expr_datum.set_string(str); + LOG_INFO("generate fulltext column", K(str), K(raw_ctx), K(eval_ctx), K(expr_datum)); + } + } + } + } + return OB_SUCCESS; +} + +} // namespace sql +} // namespace oceanbase diff --git a/src/sql/engine/expr/ob_expr_word_segment.h b/src/sql/engine/expr/ob_expr_word_segment.h new file mode 100644 index 0000000000..5ecfa91f0b --- /dev/null +++ b/src/sql/engine/expr/ob_expr_word_segment.h @@ -0,0 +1,49 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_WORD_SEGMENT_H_ +#define OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_WORD_SEGMENT_H_ +#include "sql/engine/expr/ob_expr_operator.h" + +namespace oceanbase +{ +namespace sql +{ +class ObExprWordSegment : public ObFuncExprOperator +{ +public: + explicit ObExprWordSegment(common::ObIAllocator &alloc); + virtual ~ObExprWordSegment() {} + virtual int calc_result_typeN(ObExprResType &type, + ObExprResType *types, + int64_t param_num, + common::ObExprTypeCtx &type_ctx) const override; + virtual int calc_resultN(common::ObObj &result, + const common::ObObj *objs_array, + int64_t param_num, + common::ObExprCtx &expr_ctx) const override; + virtual common::ObCastMode get_cast_mode() const override { return CM_NULL_ON_WARN;} + virtual int cg_expr( + ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const override; + static int generate_fulltext_column( + const ObExpr &raw_ctx, + ObEvalCtx &eval_ctx, + ObDatum &expr_datum); +private : + //disallow copy + DISALLOW_COPY_AND_ASSIGN(ObExprWordSegment); +}; +} +} +#endif /* OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_WORD_SEGMENT_H_ */ diff --git a/src/sql/engine/expr/ob_json_param_type.h b/src/sql/engine/expr/ob_json_param_type.h index 69e7e17276..435d998d22 100644 --- a/src/sql/engine/expr/ob_json_param_type.h +++ b/src/sql/engine/expr/ob_json_param_type.h @@ -68,29 +68,37 @@ typedef enum JsnQueryAsc { } JsnQueryAsc; // json query clause position +// modify JsnQueryOpt if modify JsnQueryClause typedef enum JsnQueryClause { - JSN_QUE_DOC, // 0 - JSN_QUE_PATH, // 1 - JSN_QUE_RET, // 2 - JSN_QUE_TRUNC, // 3 - JSN_QUE_SCALAR, // 4 - JSN_QUE_PRETTY, // 5 - JSN_QUE_ASCII, // 6 - JSN_QUE_WRAPPER, // 7 - JSN_QUE_ERROR, // 8 - JSN_QUE_EMPTY, // 9 - JSN_QUE_MISMATCH, //10 + JSN_QUE_DOC, // [0:json_text] + JSN_QUE_PATH, // [1:json_path] + JSN_QUE_RET, // [2:returning_type] + JSN_QUE_TRUNC, // [3:truncate] + JSN_QUE_SCALAR, // [4:scalars] + JSN_QUE_PRETTY, // [5:pretty] + JSN_QUE_ASCII, // [6:ascii] + JSN_QUE_WRAPPER, // [7:wrapper] + JSN_QUE_ASIS, // [8:asis] + JSN_QUE_ERROR, // [9:error_type] + JSN_QUE_EMPTY, // [10:empty_type] + JSN_QUE_MISMATCH, // [11:mismatch] + JSN_QUE_MULTIVALUE, // [12:multivalue] + + JSN_QUE_MAX, // end } JsnQueryClause; typedef enum JsnQueryOpt { - JSN_QUE_TRUNC_OPT, // 0 - JSN_QUE_SCALAR_OPT, // 1 + JSN_QUE_TRUNC_OPT, // 0 + JSN_QUE_SCALAR_OPT, // 1 JSN_QUE_PRETTY_OPT, // 2 JSN_QUE_ASCII_OPT, // 3 - JSN_QUE_WRAPPER_OPT, // 4 - JSN_QUE_ERROR_OPT, // 5 - JSN_QUE_EMPTY_OPT, // 6 - JSN_QUE_MISMATCH_OPT, // 7 + JSN_QUE_WRAPPER_OPT, // 4 + JSN_QUE_ASIS_OPT, // 5 + JSN_QUE_ERROR_OPT, // 6 + JSN_QUE_EMPTY_OPT, // 7 + JSN_QUE_MISMATCH_OPT, // 8 + JSN_QUE_MULTIVALUE_OPT,// 9 + JSN_QUE_MAX_OPT, // end } JsnQueryOpt; // json_value @@ -125,4 +133,4 @@ typedef enum JsnValueOpt { } // namespace sql } // namespace oceanbase -#endif /* OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_JSON_PARAM_TYPE_H */ \ No newline at end of file +#endif /* OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_JSON_PARAM_TYPE_H */ diff --git a/src/sql/engine/join/ob_nested_loop_join_op.cpp b/src/sql/engine/join/ob_nested_loop_join_op.cpp index b08a7ba9ac..6fbab010d1 100644 --- a/src/sql/engine/join/ob_nested_loop_join_op.cpp +++ b/src/sql/engine/join/ob_nested_loop_join_op.cpp @@ -589,8 +589,6 @@ int ObNestedLoopJoinOp::group_read_left_operate() ret = OB_ERR_UNEXPECTED; } LOG_WARN("rescan right failed", KR(ret)); - } else if (OB_FAIL(group_join_buffer_.fill_cur_row_group_param())) { - LOG_WARN("fill group param failed", KR(ret)); } } else { ret = OB_ITER_END; @@ -635,7 +633,7 @@ int ObNestedLoopJoinOp::read_right_operate() { int ret = OB_SUCCESS; clear_evaluated_flag(); - if (OB_FAIL(get_next_right_row()) && OB_ITER_END != ret) { + if (OB_FAIL(get_next_row_from_right()) && OB_ITER_END != ret) { LOG_WARN("failed to get next right row", K(ret)); } @@ -916,7 +914,8 @@ int ObNestedLoopJoinOp::process_right_batch() const ObBatchRows *right_brs = &right_->get_brs(); const ObIArray &conds = get_spec().other_join_conds_; clear_evaluated_flag(); - if (OB_FAIL(right_->get_next_batch(op_max_batch_size_, right_brs))) { + DASGroupScanMarkGuard mark_guard(ctx_.get_das_ctx(), MY_SPEC.group_rescan_); + if (OB_FAIL(get_next_batch_from_right(right_brs))) { LOG_WARN("fail to get next right batch", K(ret), K(MY_SPEC)); } else if (0 == right_brs->size_ && right_brs->end_) { match_right_batch_end_ = true; @@ -1170,5 +1169,28 @@ int ObNestedLoopJoinOp::calc_other_conds(bool &is_match) return ret; } + +int ObNestedLoopJoinOp::get_next_batch_from_right(const ObBatchRows *right_brs) +{ + int ret = OB_SUCCESS; + if (!MY_SPEC.group_rescan_) { + ret = right_->get_next_batch(op_max_batch_size_, right_brs); + } else { + ret = group_join_buffer_.get_next_batch_from_right(op_max_batch_size_, right_brs); + } + return ret; +} + +int ObNestedLoopJoinOp::get_next_row_from_right() +{ + int ret = OB_SUCCESS; + if (!MY_SPEC.group_rescan_) { + ret = right_->get_next_row(); + } else { + ret = group_join_buffer_.get_next_row_from_right(); + } + return ret; +} + } // end namespace sql } // end namespace oceanbase diff --git a/src/sql/engine/join/ob_nested_loop_join_op.h b/src/sql/engine/join/ob_nested_loop_join_op.h index 6710d17075..2623614196 100644 --- a/src/sql/engine/join/ob_nested_loop_join_op.h +++ b/src/sql/engine/join/ob_nested_loop_join_op.h @@ -123,6 +123,10 @@ public: ObBatchRescanCtl &get_batch_rescan_ctl() { return batch_rescan_ctl_; } int fill_cur_row_rescan_param(); int calc_other_conds(bool &is_match); + + int get_next_batch_from_right(const ObBatchRows *right_brs); + int get_next_row_from_right(); + int do_drain_exch_multi_lvel_bnlj(); private: // state operation and transfer function type. diff --git a/src/sql/engine/sort/ob_sort_op_impl.cpp b/src/sql/engine/sort/ob_sort_op_impl.cpp index a9d075e4c1..220d48b1df 100644 --- a/src/sql/engine/sort/ob_sort_op_impl.cpp +++ b/src/sql/engine/sort/ob_sort_op_impl.cpp @@ -563,6 +563,26 @@ bool ObSortOpImpl::Compare::operator()( return less; } +ObSortOpImpl::ObSortOpImpl() + : inited_(false), local_merge_sort_(false), need_rewind_(false), + got_first_row_(false), sorted_(false), enable_encode_sortkey_(false), mem_context_(NULL), + mem_entify_guard_(mem_context_), tenant_id_(OB_INVALID_ID), sort_collations_(nullptr), + sort_cmp_funs_(nullptr), eval_ctx_(nullptr), datum_store_(ObModIds::OB_SQL_SORT_ROW), inmem_row_size_(0), mem_check_interval_mask_(1), + row_idx_(0), heap_iter_begin_(false), imms_heap_(NULL), ems_heap_(NULL), + next_stored_row_func_(&ObSortOpImpl::array_next_stored_row), + input_rows_(OB_INVALID_ID), input_width_(OB_INVALID_ID), + profile_(ObSqlWorkAreaType::SORT_WORK_AREA), self_monitor_info_(), + op_monitor_info_(&self_monitor_info_), sql_mem_processor_(profile_, *op_monitor_info_), + op_type_(PHY_INVALID), op_id_(UINT64_MAX), exec_ctx_(nullptr), stored_rows_(nullptr), + io_event_observer_(nullptr), buckets_(NULL), max_bucket_cnt_(0), part_hash_nodes_(NULL), + max_node_cnt_(0), part_cnt_(0), topn_cnt_(INT64_MAX), outputted_rows_cnt_(0), + is_fetch_with_ties_(false), topn_heap_(NULL), ties_array_pos_(0), + last_ties_row_(NULL), pt_buckets_(NULL), use_partition_topn_sort_(false), heap_nodes_(), cur_heap_idx_(0), + rows_(NULL), sort_exprs_(nullptr), + compress_type_(NONE_COMPRESSOR) +{ +} + ObSortOpImpl::ObSortOpImpl(ObMonitorNode &op_monitor_info) : inited_(false), local_merge_sort_(false), need_rewind_(false), got_first_row_(false), sorted_(false), enable_encode_sortkey_(false), mem_context_(NULL), @@ -571,7 +591,8 @@ ObSortOpImpl::ObSortOpImpl(ObMonitorNode &op_monitor_info) row_idx_(0), heap_iter_begin_(false), imms_heap_(NULL), ems_heap_(NULL), next_stored_row_func_(&ObSortOpImpl::array_next_stored_row), input_rows_(OB_INVALID_ID), input_width_(OB_INVALID_ID), - profile_(ObSqlWorkAreaType::SORT_WORK_AREA), op_monitor_info_(op_monitor_info), sql_mem_processor_(profile_, op_monitor_info_), + profile_(ObSqlWorkAreaType::SORT_WORK_AREA), self_monitor_info_(), + op_monitor_info_(&op_monitor_info), sql_mem_processor_(profile_, *op_monitor_info_), op_type_(PHY_INVALID), op_id_(UINT64_MAX), exec_ctx_(nullptr), stored_rows_(nullptr), io_event_observer_(nullptr), buckets_(NULL), max_bucket_cnt_(0), part_hash_nodes_(NULL), max_node_cnt_(0), part_cnt_(0), topn_cnt_(INT64_MAX), outputted_rows_cnt_(0), @@ -761,8 +782,8 @@ int ObSortOpImpl::init( datum_store_.set_allocator(mem_context_->get_malloc_allocator()); datum_store_.set_io_event_observer(io_event_observer_); profile_.set_exec_ctx(exec_ctx); - op_monitor_info_.otherstat_2_id_ = ObSqlMonitorStatIds::SORT_MERGE_SORT_ROUND; - op_monitor_info_.otherstat_2_value_ = 1; + op_monitor_info_->otherstat_2_id_ = ObSqlMonitorStatIds::SORT_MERGE_SORT_ROUND; + op_monitor_info_->otherstat_2_value_ = 1; ObPhysicalPlanCtx *plan_ctx = NULL; const ObPhysicalPlan *phy_plan = nullptr; if (OB_ISNULL(plan_ctx = GET_PHY_PLAN_CTX(*exec_ctx))) { @@ -772,8 +793,8 @@ int ObSortOpImpl::init( ret = OB_ERR_UNEXPECTED; LOG_WARN("error unexpected, phy plan must not be nullptr", K(ret)); } else if (phy_plan->get_ddl_task_id() > 0) { - op_monitor_info_.otherstat_5_id_ = ObSqlMonitorStatIds::DDL_TASK_ID; - op_monitor_info_.otherstat_5_value_ = phy_plan->get_ddl_task_id(); + op_monitor_info_->otherstat_5_id_ = ObSqlMonitorStatIds::DDL_TASK_ID; + op_monitor_info_->otherstat_5_value_ = phy_plan->get_ddl_task_id(); } } if (OB_SUCC(ret)) { @@ -952,8 +973,8 @@ int ObSortOpImpl::build_chunk(const int64_t level, Input &input, int64_t extra_s LOG_WARN("copy row to row store failed"); } else { stored_row_cnt++; - op_monitor_info_.otherstat_1_id_ = ObSqlMonitorStatIds::SORT_SORTED_ROW_COUNT; - op_monitor_info_.otherstat_1_value_ += 1; + op_monitor_info_->otherstat_1_id_ = ObSqlMonitorStatIds::SORT_SORTED_ROW_COUNT; + op_monitor_info_->otherstat_1_value_ += 1; total_size += src_store_row->row_size_; } } @@ -965,8 +986,8 @@ int ObSortOpImpl::build_chunk(const int64_t level, Input &input, int64_t extra_s LOG_WARN("finish add row failed", K(ret)); } else { const int64_t sort_io_time = ObTimeUtility::fast_current_time() - curr_time; - op_monitor_info_.otherstat_4_id_ = ObSqlMonitorStatIds::SORT_DUMP_DATA_TIME; - op_monitor_info_.otherstat_4_value_ += sort_io_time; + op_monitor_info_->otherstat_4_id_ = ObSqlMonitorStatIds::SORT_DUMP_DATA_TIME; + op_monitor_info_->otherstat_4_value_ += sort_io_time; LOG_TRACE("dump sort file", "level", level, "rows", chunk->datum_store_.get_row_cnt(), @@ -1085,7 +1106,7 @@ int ObSortOpImpl::before_add_row() if (OB_FAIL(sql_mem_processor_.init( &mem_context_->get_malloc_allocator(), tenant_id_, - size, op_monitor_info_.op_type_, op_monitor_info_.op_id_, exec_ctx_))) { + size, op_monitor_info_->op_type_, op_monitor_info_->op_id_, exec_ctx_))) { LOG_WARN("failed to init sql mem processor", K(ret)); } else { datum_store_.set_dir_id(sql_mem_processor_.get_dir_id()); @@ -1570,8 +1591,8 @@ int ObSortOpImpl::do_partition_topn_sort() { } else { std::sort(&heap_rows.at(0), &heap_rows.at(0) + heap_rows.count(), CopyableComparer(comp_)); } - op_monitor_info_.otherstat_1_id_ = ObSqlMonitorStatIds::SORT_SORTED_ROW_COUNT; - op_monitor_info_.otherstat_1_value_ += cur_heap->heap_.count(); + op_monitor_info_->otherstat_1_id_ = ObSqlMonitorStatIds::SORT_SORTED_ROW_COUNT; + op_monitor_info_->otherstat_1_value_ += cur_heap->heap_.count(); } } else { //partition limit, do nothing @@ -1950,8 +1971,8 @@ int ObSortOpImpl::sort_inmem_data() ret = comp_.ret_; LOG_WARN("compare failed", K(ret)); } - op_monitor_info_.otherstat_1_id_ = ObSqlMonitorStatIds::SORT_SORTED_ROW_COUNT; - op_monitor_info_.otherstat_1_value_ += rows_->count(); + op_monitor_info_->otherstat_1_id_ = ObSqlMonitorStatIds::SORT_SORTED_ROW_COUNT; + op_monitor_info_->otherstat_1_value_ += rows_->count(); } if (OB_SUCC(ret) && need_imms()) { if (NULL == imms_heap_) { @@ -1984,16 +2005,16 @@ int ObSortOpImpl::sort_inmem_data() LOG_WARN("heap push back failed", K(ret)); } } - op_monitor_info_.otherstat_1_id_ = ObSqlMonitorStatIds::SORT_SORTED_ROW_COUNT; - op_monitor_info_.otherstat_1_value_ += 1; + op_monitor_info_->otherstat_1_id_ = ObSqlMonitorStatIds::SORT_SORTED_ROW_COUNT; + op_monitor_info_->otherstat_1_value_ += 1; prev = &rows_->at(i); } heap_iter_begin_ = false; } } const int64_t sort_cpu_time = ObTimeUtility::fast_current_time() - curr_time; - op_monitor_info_.otherstat_3_id_ = ObSqlMonitorStatIds::SORT_INMEM_SORT_TIME; - op_monitor_info_.otherstat_3_value_ += sort_cpu_time; + op_monitor_info_->otherstat_3_id_ = ObSqlMonitorStatIds::SORT_INMEM_SORT_TIME; + op_monitor_info_->otherstat_3_value_ += sort_cpu_time; } return ret; } @@ -2067,8 +2088,8 @@ int ObSortOpImpl::sort() return ret; }; const int64_t level = sort_chunks_.get_first()->level_ + 1; - op_monitor_info_.otherstat_2_id_ = ObSqlMonitorStatIds::SORT_MERGE_SORT_ROUND; - op_monitor_info_.otherstat_2_value_ = level; + op_monitor_info_->otherstat_2_id_ = ObSqlMonitorStatIds::SORT_MERGE_SORT_ROUND; + op_monitor_info_->otherstat_2_value_ = level; if (OB_FAIL(build_chunk(level, input))) { LOG_WARN("build chunk failed", K(ret)); } else { @@ -2343,7 +2364,7 @@ int ObSortOpImpl::add_heap_sort_row(const common::ObIArray &exprs, int64_t size = OB_INVALID_ID == input_rows_ ? 0 : input_rows_ * input_width_ * 2; if (OB_FAIL(sql_mem_processor_.init( &mem_context_->get_malloc_allocator(), - tenant_id_, size, op_monitor_info_.op_type_, op_monitor_info_.op_id_, &eval_ctx_->exec_ctx_))) { + tenant_id_, size, op_monitor_info_->op_type_, op_monitor_info_->op_id_, &eval_ctx_->exec_ctx_))) { LOG_WARN("failed to init sql mem processor", K(ret)); } } else { diff --git a/src/sql/engine/sort/ob_sort_op_impl.h b/src/sql/engine/sort/ob_sort_op_impl.h index 0a709bb250..301647f8e7 100644 --- a/src/sql/engine/sort/ob_sort_op_impl.h +++ b/src/sql/engine/sort/ob_sort_op_impl.h @@ -246,6 +246,7 @@ public: static const int64_t INMEMORY_MERGE_SORT_WARN_WAYS = 10000; explicit ObSortOpImpl(ObMonitorNode &op_monitor_info); + ObSortOpImpl(); virtual ~ObSortOpImpl(); // if rewind id not needed, we will release the resource after iterate end. @@ -402,16 +403,16 @@ public: void set_operator_id(uint64_t op_id) { op_id_ = op_id; } void collect_memory_dump_info(ObMonitorNode &info) { - info.otherstat_1_id_ = op_monitor_info_.otherstat_1_id_; - info.otherstat_1_value_ = op_monitor_info_.otherstat_1_value_; - info.otherstat_2_id_ = op_monitor_info_.otherstat_2_id_; - info.otherstat_2_value_ = op_monitor_info_.otherstat_2_value_; - info.otherstat_3_id_ = op_monitor_info_.otherstat_3_id_; - info.otherstat_3_value_ = op_monitor_info_.otherstat_3_value_; - info.otherstat_4_id_ = op_monitor_info_.otherstat_4_id_; - info.otherstat_4_value_ = op_monitor_info_.otherstat_4_value_; - info.otherstat_6_id_ = op_monitor_info_.otherstat_6_id_; - info.otherstat_6_value_ = op_monitor_info_.otherstat_6_value_; + info.otherstat_1_id_ = op_monitor_info_->otherstat_1_id_; + info.otherstat_1_value_ = op_monitor_info_->otherstat_1_value_; + info.otherstat_2_id_ = op_monitor_info_->otherstat_2_id_; + info.otherstat_2_value_ = op_monitor_info_->otherstat_2_value_; + info.otherstat_3_id_ = op_monitor_info_->otherstat_3_id_; + info.otherstat_3_value_ = op_monitor_info_->otherstat_3_value_; + info.otherstat_4_id_ = op_monitor_info_->otherstat_4_id_; + info.otherstat_4_value_ = op_monitor_info_->otherstat_4_value_; + info.otherstat_6_id_ = op_monitor_info_->otherstat_6_id_; + info.otherstat_6_value_ = op_monitor_info_->otherstat_6_value_; } inline void set_io_event_observer(ObIOEventObserver *observer) { @@ -833,7 +834,8 @@ protected: int64_t input_rows_; int64_t input_width_; ObSqlWorkAreaProfile profile_; - ObMonitorNode &op_monitor_info_; + ObMonitorNode self_monitor_info_; + ObMonitorNode *op_monitor_info_; ObSqlMemMgrProcessor sql_mem_processor_; ObPhyOperatorType op_type_; uint64_t op_id_; diff --git a/src/sql/engine/subquery/ob_subplan_filter_op.cpp b/src/sql/engine/subquery/ob_subplan_filter_op.cpp index 08b2d8defc..629b632caf 100644 --- a/src/sql/engine/subquery/ob_subplan_filter_op.cpp +++ b/src/sql/engine/subquery/ob_subplan_filter_op.cpp @@ -59,8 +59,6 @@ ObSubQueryIterator::ObSubQueryIterator(ObOperator &op) batch_size_(0), batch_row_pos_(0), iter_end_(false), - is_new_batch_(false), - current_group_(0), das_batch_params_recovery_() { } @@ -71,10 +69,19 @@ int ObSubQueryIterator::get_next_row() bool is_from_store = init_plan_ && inited_; if (is_from_store) { ret = store_it_.get_next_row(get_output(), op_.get_eval_ctx()); + } else if (parent_->enable_left_das_batch()) { + uint64_t parent_spf_group = 0; + int64_t parent_group_rescan_cnt = 0; + const GroupParamArray *group_params = nullptr; + parent_->get_current_group(parent_spf_group); + parent_->get_current_batch_cnt(parent_group_rescan_cnt); + group_params = parent_->get_rescan_params_info(); + GroupParamBackupGuard guard(op_.get_exec_ctx().get_das_ctx()); + guard.bind_batch_rescan_params(parent_spf_group, parent_group_rescan_cnt, group_params); + ret = op_.get_next_row(); } else { ret = op_.get_next_row(); } - return ret; } @@ -99,77 +106,17 @@ int ObSubQueryIterator::rewind(const bool reset_onetime_plan /* = false */) if (OB_FAIL(alloc_das_batch_store())) { LOG_WARN("Alloc DAS batch parameter store fail.", K(ret)); } else { - //We use GroupParamBackupGuard to save and resume data in param store. - //1.For SPF operator, we have multiple right child, every time rescan we - //need switch the param in param store, But all of param store index in - //the same array, So we switch all of the param store, but resume them - //after we rescan. - //2.For SPF operator, we nedd to support jump read. Difference child params - //may in the difference group id, current child rescan should not influence - //other child's param store stage. - //3.For nesting SPF with other SPF, parent and child SPF may in difference - //stage rescan or get_next_row, the expr may reuse, So child SPF rescan - //should not change the param store stage. - //So we implement the GroupParamBackupGuard to make Paramstore like a stack, - //protect every time change the Paramstore will be resume. - bool new_group = is_new_batch_; - if (OB_SUCC(ret) && is_new_batch_) { - GroupParamBackupGuard guard(eval_ctx_, - das_batch_params_recovery_, - parent_->get_spec().rescan_params_, - parent_->get_spec().rescan_params_.count()); - - ret = parent_->bind_das_batch_params_to_store(); - if (OB_SUCC(ret) && OB_FAIL(op_.rescan())) { - if(OB_ITER_END == ret) { - ret = OB_ERR_UNEXPECTED; - } - LOG_WARN("failed to do rescan", K(ret)); - } - current_group_ = 0; - is_new_batch_ = 0; - } - uint64_t parent_spf_group = 0; + int64_t parent_group_rescan_cnt = 0; + const GroupParamArray *group_params = nullptr; if(OB_SUCC(ret)) { parent_->get_current_group(parent_spf_group); - } - if (OB_SUCC(ret) && current_group_ < parent_spf_group) { - if (new_group) { - /** - * Since the initialization of the lookup Iterator is done within index_lookup.get_next_row(), - * when SPF enters skip scan, - * we need to mark the jump_read_group_id for each scan Iterator - * and determine the number of rows to skip based on the current group_id. - * The issue with the lookup process here is that when SPF performs multiple skip scans, - * the lookup process may not be executed at all, - * resulting in uninitialized lookup Iterators. - * As a result, when subsequent skip reads occur, - * the lookup Iterator will not be set with the correct jum_read_group_id. - * Therefore, it is necessary to ensure that get_next_row() is called at least once to - * initialize the lookup Iterator on the right branch. - **/ - if (OB_FAIL(op_.get_next_row())) { - if (OB_ITER_END == ret) { - //ignore OB_ITER_END - ret = OB_SUCCESS; - } else { - LOG_WARN("get next row from child failed", K(ret)); - } - } - } - if (OB_SUCC(ret)) { - int64_t old_jump_read_group_id; - old_jump_read_group_id = op_.get_exec_ctx().get_das_ctx().jump_read_group_id_; - op_.get_exec_ctx().get_das_ctx().jump_read_group_id_ = parent_spf_group; - if (OB_FAIL(op_.rescan())) { - if(OB_ITER_END == ret) { - ret = OB_ERR_UNEXPECTED; - } - LOG_WARN("Das jump read rescan fail.", K(ret)); - } - op_.get_exec_ctx().get_das_ctx().jump_read_group_id_ = old_jump_read_group_id; - current_group_ = parent_spf_group; + parent_->get_current_batch_cnt(parent_group_rescan_cnt); + group_params = parent_->get_rescan_params_info(); + GroupParamBackupGuard guard(op_.get_exec_ctx().get_das_ctx()); + guard.bind_batch_rescan_params(parent_spf_group, parent_group_rescan_cnt, group_params); + if (OB_FAIL(op_.rescan())) { + LOG_WARN("failed to do rescan", K(ret)); } } } @@ -201,8 +148,6 @@ void ObSubQueryIterator::reuse() batch_size_ = 0; batch_row_pos_ = 0; iter_end_ = false; - is_new_batch_ = false; - current_group_ = 0; das_batch_params_recovery_.reset(); } @@ -365,38 +310,6 @@ int ObSubQueryIterator::alloc_das_batch_store() return ret; } - -void GroupParamBackupGuard::save_das_batch_store() -{ - //int64_t params_count = 0; - //params_count = parent_->get_spec().rescan_params_.count(); - OB_ASSERT(!das_batch_params_recovery_.empty()); - OB_ASSERT(das_batch_params_recovery_.count() == params_count_); - ObPhysicalPlanCtx *phy_ctx = eval_ctx_.exec_ctx_.get_physical_plan_ctx(); - ParamStore ¶m_store = phy_ctx->get_param_store_for_update(); - for (int64_t i = 0; i < params_count_; ++i) { - const ObDynamicParamSetter &rescan_param = rescan_params_.at(i); - //Always shallow copy for state save. - das_batch_params_recovery_.at(i) = param_store.at(rescan_param.param_idx_); - } -} - -void GroupParamBackupGuard::resume_das_batch_store() -{ - OB_ASSERT(!das_batch_params_recovery_.empty()); - OB_ASSERT(das_batch_params_recovery_.count() == params_count_); - ObPhysicalPlanCtx *phy_ctx = eval_ctx_.exec_ctx_.get_physical_plan_ctx(); - ParamStore ¶m_store = phy_ctx->get_param_store_for_update(); - for (int64_t i = 0; i < params_count_; ++i) { - const ObDynamicParamSetter &rescan_param = rescan_params_.at(i); - //Always shallow copy for state resume. - param_store.at(rescan_param.param_idx_) = das_batch_params_recovery_.at(i); - ObExpr *dst = rescan_param.dst_; - ObDatum ¶m_datum = dst->locate_datum_for_write(eval_ctx_); - param_datum.from_obj(das_batch_params_recovery_.at(i), dst->obj_datum_map_); - } -} - ObSubPlanFilterSpec::ObSubPlanFilterSpec(ObIAllocator &alloc, const ObPhyOperatorType type) : ObOpSpec(alloc, type), rescan_params_(alloc), @@ -467,7 +380,9 @@ ObSubPlanFilterOp::ObSubPlanFilterOp( cur_params_(), cur_param_idxs_(), cur_param_expr_idxs_(), - last_store_row_mem_(NULL) + last_store_row_mem_(NULL), + group_rescan_cnt_(0), + rescan_params_info_() { } @@ -665,8 +580,7 @@ int ObSubPlanFilterOp::inner_open() MY_SPEC.enable_px_batch_rescans_.at(i)) { enable_left_px_batch_ = true; } - if (!MY_SPEC.exec_param_idxs_inited_ || MY_SPEC.enable_das_group_rescan_) { - // spf group rescan can't use hash-map optimize, it will induces skip-scan, which will induces correctness problem + if (!MY_SPEC.exec_param_idxs_inited_) { //unittest or old version, do not init hashmap } else if (OB_FAIL(iter->init_mem_entity())) { LOG_WARN("failed to init mem_entity", K(ret)); @@ -733,6 +647,7 @@ int ObSubPlanFilterOp::inner_close() destroy_update_set_mem(); if (MY_SPEC.enable_das_group_rescan_) { das_batch_params_.reset(); + rescan_params_info_.reset(); } return OB_SUCCESS; } @@ -852,12 +767,7 @@ int ObSubPlanFilterOp::handle_next_row() OZ(left_rows_.finish_add_row(false)); OZ(left_rows_.begin(left_rows_iter_)); if (MY_SPEC.enable_das_group_rescan_) { - //Lazy batch rescan right iterator. - //Just set the flag, do the rescan when call the iter->rewind(). - for(int32_t i = 1; OB_SUCC(ret) && i < child_cnt_; ++i) { - Iterator *&iter = subplan_iters_.at(i - 1); - iter->set_new_batch(true); - } + group_rescan_cnt_++; } } } @@ -1037,6 +947,7 @@ int ObSubPlanFilterOp::handle_next_batch_with_group_rescan(const int64_t op_max_ bool stop_fetch = false; ObEvalCtx::BatchInfoScopeGuard guard(eval_ctx_); uint64_t left_rows_total_cnt = 0; + DASGroupScanMarkGuard mark_guard(ctx_.get_das_ctx(), true); if (left_rows_iter_.is_valid() && left_rows_iter_.has_next()) { // fetch data from left store } else { @@ -1091,12 +1002,7 @@ int ObSubPlanFilterOp::handle_next_batch_with_group_rescan(const int64_t op_max_ } if (OB_SUCC(ret)) { - //Lazy batch rescan right iterator. - //Just set the flag, do the rescan when call the iter->rewind(). - for(int32_t i = 1; OB_SUCC(ret) && i < child_cnt_; ++i) { - Iterator *&iter = subplan_iters_.at(i - 1); - iter->set_new_batch(true); - } + group_rescan_cnt_++; } } } @@ -1461,6 +1367,16 @@ int ObSubPlanFilterOp::alloc_das_batch_params(uint64_t group_size) LOG_WARN("allocate das params failed", KR(ret), K(MY_SPEC.rescan_params_.count())); } } + if (OB_SUCC(ret) && !das_batch_params_.empty()) { + ret = rescan_params_info_.allocate_array(ctx_.get_allocator(), + MY_SPEC.rescan_params_.count()); + if (OB_SUCC(ret)) { + for (int64_t i = 0; OB_SUCC(ret) && i < rescan_params_info_.count(); ++i) { + rescan_params_info_.at(i).param_idx_ = MY_SPEC.rescan_params_.at(i).param_idx_; + rescan_params_info_.at(i).gr_param_ = &das_batch_params_.at(i); + } + } + } return ret; } @@ -1534,28 +1450,6 @@ int ObSubPlanFilterOp::fill_cur_row_das_batch_param(ObEvalCtx& eval_ctx, uint64_ return ret; } -int ObSubPlanFilterOp::bind_das_batch_params_to_store() const -{ - int ret = OB_SUCCESS; - int64_t param_cnt = MY_SPEC.rescan_params_.count(); - ObPhysicalPlanCtx *plan_ctx = GET_PHY_PLAN_CTX(ctx_); - ParamStore ¶m_store = plan_ctx->get_param_store_for_update(); - if (OB_UNLIKELY(param_cnt != das_batch_params_.count())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("das params count is invalid", KR(ret), K(param_cnt), K(das_batch_params_.count())); - } - for (int64_t i = 0; OB_SUCC(ret) && i < param_cnt; ++i) { - const ObDynamicParamSetter &rescan_param = MY_SPEC.rescan_params_.at(i); - int64_t param_idx = rescan_param.param_idx_; - int64_t array_obj_addr = reinterpret_cast(&das_batch_params_.at(i)); - param_store.at(param_idx).set_extend(array_obj_addr, T_EXT_SQL_ARRAY); - } - if (OB_SUCC(ret)) { - LOG_DEBUG("bind das param to store", K(das_batch_params_), K(plan_ctx->get_param_store())); - } - return ret; -} - void ObSubPlanFilterOp::get_current_group(uint64_t& current_group) const { current_group = current_group_; diff --git a/src/sql/engine/subquery/ob_subplan_filter_op.h b/src/sql/engine/subquery/ob_subplan_filter_op.h index 1d5dae24ce..c4dfd395b1 100644 --- a/src/sql/engine/subquery/ob_subplan_filter_op.h +++ b/src/sql/engine/subquery/ob_subplan_filter_op.h @@ -103,7 +103,6 @@ public: //for vectorized end bool is_onetime_plan() const { return onetime_plan_; } - void set_new_batch(bool new_batch) { is_new_batch_ = new_batch;}; TO_STRING_KV(K(onetime_plan_), K(init_plan_), K(inited_)); //a row cache for hash optimizer to use @@ -116,8 +115,6 @@ private: // for das batch spf int alloc_das_batch_store(); - int save_das_batch_store(); - int resume_das_batch_store(); // for das batch spf end ObOperator &op_; bool onetime_plan_; @@ -142,9 +139,6 @@ private: bool iter_end_; // for vectorized end - // for das batch spf - bool is_new_batch_; - uint64_t current_group_; common::ObArrayWrap das_batch_params_recovery_; // for das batch spf end }; @@ -217,6 +211,7 @@ public: int fill_cur_row_das_batch_param(ObEvalCtx& eval_ctx, uint64_t current_group) const; int bind_das_batch_params_to_store() const; void get_current_group(uint64_t& current_group) const; + void get_current_batch_cnt(int64_t& current_batch_cnt) const { current_batch_cnt = group_rescan_cnt_; } bool enable_left_das_batch() const {return MY_SPEC.enable_das_group_rescan_;} //for DAS batch SPF end @@ -227,6 +222,7 @@ public: ObBatchRescanCtl &get_batch_rescan_ctl() { return batch_rescan_ctl_; } int handle_next_batch_with_px_rescan(const int64_t op_max_batch_size); int handle_next_batch_with_group_rescan(const int64_t op_max_batch_size); + const GroupParamArray *get_rescan_params_info() const { return &rescan_params_info_; } private: void set_param_null() { set_pushdown_param_null(MY_SPEC.rescan_params_); }; void destroy_subplan_iters(); @@ -266,6 +262,7 @@ private: // for das batch rescan uint64_t max_group_size_; //Das batch rescan size; uint64_t current_group_; //The group id in this time right iter rescan; + common::ObArrayWrap das_batch_params_; // for das batch rescan end ObChunkDatumStore left_rows_; @@ -285,37 +282,12 @@ private: public: static const int64_t MAX_PX_RESCAN_PARAMS_SIZE = 4 << 20; // 4M static const int64_t MAX_DUMP_SIZE = 16 << 20; // 16M -}; - -class GroupParamBackupGuard -{ public: - GroupParamBackupGuard(ObEvalCtx& eval_ctx, - common::ObArrayWrap& das_batch_params_recovery, - const common::ObFixedArray& rescan_params, - int64_t params_count) - : eval_ctx_(eval_ctx), - das_batch_params_recovery_(das_batch_params_recovery), - rescan_params_(rescan_params), - params_count_(params_count) - { - save_das_batch_store(); - } - ~GroupParamBackupGuard() - { - resume_das_batch_store(); - } -private: - void save_das_batch_store(); - void resume_das_batch_store(); -private: - ObEvalCtx& eval_ctx_; - common::ObArrayWrap& das_batch_params_recovery_; - const common::ObFixedArray& rescan_params_; - int64_t params_count_; + // Count of reals rescan initiated by the spf operator; for batch rescan, it was plus one for each batch; + // For normal rescan, it was plus one for each rescan + int64_t group_rescan_cnt_; + GroupParamArray rescan_params_info_; }; - - } // end namespace sql } // end namespace oceanbase diff --git a/src/sql/engine/table/ob_index_lookup_op_impl.cpp b/src/sql/engine/table/ob_index_lookup_op_impl.cpp index 001ce282d4..76363c542b 100644 --- a/src/sql/engine/table/ob_index_lookup_op_impl.cpp +++ b/src/sql/engine/table/ob_index_lookup_op_impl.cpp @@ -26,8 +26,6 @@ ObIndexLookupOpImpl::ObIndexLookupOpImpl(LookupType lookup_type, const int64_t d default_batch_row_count_(default_batch_row_count), state_(INDEX_SCAN), index_end_(false), - index_group_cnt_(1), - lookup_group_cnt_(1), lookup_rowkey_cnt_(0), lookup_row_cnt_(0) {} @@ -43,17 +41,16 @@ int ObIndexLookupOpImpl::get_next_row() switch (state_) { case INDEX_SCAN: { lookup_rowkey_cnt_ = 0; - if (OB_FAIL(switch_index_table_and_rowkey_group_id())) { - LOG_WARN("failed to switch index table and rowkey group id", K(ret)); - } - int64_t start_group_idx = get_index_group_cnt() - 1; - while (OB_SUCC(ret) && lookup_rowkey_cnt_ < default_row_batch_cnt) { + lookup_row_cnt_ = 0; + reset_lookup_state(); + while (OB_SUCC(ret) && !index_end_ && lookup_rowkey_cnt_ < default_row_batch_cnt) { do_clear_evaluated_flag(); if (OB_FAIL(get_next_row_from_index_table())) { if (OB_ITER_END != ret) { LOG_WARN("get next row from index table failed", K(ret)); } else { - LOG_DEBUG("get next row from index table",K(ret), K(index_group_cnt_), K(lookup_rowkey_cnt_)); + index_end_ = true; + ret = OB_SUCCESS; } } else if (OB_FAIL(process_data_table_rowkey())) { LOG_WARN("process data table rowkey with das failed", K(ret)); @@ -61,20 +58,16 @@ int ObIndexLookupOpImpl::get_next_row() ++lookup_rowkey_cnt_; } } - if (OB_SUCC(ret) || OB_ITER_END == ret) { - state_ = DO_LOOKUP; - index_end_ = (OB_ITER_END == ret); - ret = OB_SUCCESS; - if (is_group_scan()) { - if (OB_FAIL((init_group_range(start_group_idx, get_index_group_cnt())))) { - LOG_WARN("failed to init group range",K(ret), K(start_group_idx), K(get_index_group_cnt())); - } + if (OB_SUCC(ret)) { + if (lookup_rowkey_cnt_ > 0) { + state_ = DO_LOOKUP; + } else { + state_ = FINISHED; } } break; } case DO_LOOKUP: { - lookup_row_cnt_ = 0; if (OB_FAIL(do_index_lookup())) { LOG_WARN("do index lookup failed", K(ret)); } else { @@ -85,8 +78,11 @@ int ObIndexLookupOpImpl::get_next_row() case OUTPUT_ROWS: { if (OB_FAIL(get_next_row_from_data_table())) { if (OB_ITER_END == ret) { - if (OB_FAIL(process_next_index_batch_for_row())) { - LOG_WARN("failed to process next index batch for row", K(ret)); + ret = OB_SUCCESS; + if (OB_FAIL(check_lookup_row_cnt())) { + LOG_WARN("failed to check table lookup", K(ret)); + } else { + state_ = INDEX_SCAN; } } else { LOG_WARN("look up get next row failed", K(ret)); @@ -94,14 +90,13 @@ int ObIndexLookupOpImpl::get_next_row() } else { got_next_row = true; ++lookup_row_cnt_; - LOG_DEBUG("got next row from table lookup", K(ret), K(lookup_row_cnt_), K(lookup_rowkey_cnt_), K(lookup_group_cnt_), K(index_group_cnt_), "main table output", ROWEXPR2STR(get_eval_ctx(), get_output_expr()) ); + LOG_DEBUG("local index lookup get next row", K(ret), K(lookup_row_cnt_), K(lookup_rowkey_cnt_), + "main table output", ROWEXPR2STR(get_eval_ctx(), get_output_expr())); } break; } case FINISHED: { - if (OB_SUCC(ret) || OB_ITER_END == ret) { - ret = OB_ITER_END; - } + ret = OB_ITER_END; break; } default: { @@ -117,58 +112,84 @@ int ObIndexLookupOpImpl::get_next_row() int ObIndexLookupOpImpl::get_next_rows(int64_t &count, int64_t capacity) { int ret = OB_SUCCESS; - bool got_next_row = false; + bool got_next_rows = false; int64_t simulate_batch_row_cnt = - EVENT_CALL(EventTable::EN_TABLE_LOOKUP_BATCH_ROW_COUNT); int64_t default_row_batch_cnt = simulate_batch_row_cnt > 0 ? simulate_batch_row_cnt : default_batch_row_count_; LOG_DEBUG("simulate lookup row batch count", K(simulate_batch_row_cnt), K(default_row_batch_cnt)); do { switch (state_) { case INDEX_SCAN: { + int64_t rowkey_count = 0; lookup_rowkey_cnt_ = 0; - if (OB_FAIL(switch_index_table_and_rowkey_group_id())) { - LOG_WARN("failed to switch index table and rowkey group id", K(ret)); + lookup_row_cnt_ = 0; + reset_lookup_state(); + while (OB_SUCC(ret) && !index_end_ && lookup_rowkey_cnt_ < default_row_batch_cnt) { + do_clear_evaluated_flag(); + if (OB_FAIL(get_next_rows_from_index_table(rowkey_count, default_row_batch_cnt - lookup_rowkey_cnt_))) { + if (OB_ITER_END != ret) { + LOG_WARN("get next rows from index table failed", K(ret)); + } else { + if (rowkey_count == 0) { + index_end_ = true; + } + ret = OB_SUCCESS; + } + } + if (OB_SUCC(ret) && rowkey_count > 0) { + if (OB_FAIL(process_data_table_rowkeys(rowkey_count, nullptr))) { + LOG_WARN("process data table rowkeys with das failed", K(ret)); + } else { + lookup_rowkey_cnt_ += rowkey_count; + } + } } - int64_t start_group_idx = get_index_group_cnt() - 1; - if (OB_FAIL(ret)) { - } else if (OB_FAIL(do_index_table_scan_for_rows(capacity ,start_group_idx, default_row_batch_cnt))) { - LOG_WARN("failed to do index table scan",K(ret)); + + if (OB_SUCC(ret)) { + if (lookup_rowkey_cnt_ > 0) { + state_ = DO_LOOKUP; + } else { + state_ = FINISHED; + } } break; } case DO_LOOKUP: { - lookup_row_cnt_ = 0; if (OB_FAIL(do_index_lookup())) { LOG_WARN("do index lookup failed", K(ret)); } else { - LOG_DEBUG("do index lookup end", K(get_index_group_cnt()), K(ret)); state_ = OUTPUT_ROWS; } break; } case OUTPUT_ROWS: { + count = 0; if (OB_FAIL(get_next_rows_from_data_table(count, capacity))) { if (OB_ITER_END == ret) { - if (OB_FAIL(process_next_index_batch_for_rows(count))) { - LOG_WARN("failed to process next index batch for rows", K(ret)); + ret = OB_SUCCESS; + if (count > 0) { + lookup_row_cnt_ += count; + got_next_rows = true; + } else { + if (OB_FAIL(check_lookup_row_cnt())) { + LOG_WARN("failed to check table lookup", K(ret)); + } else { + state_ = INDEX_SCAN; + } } } else { - LOG_WARN("look up get next row failed", K(ret)); + LOG_WARN("look up get next rows failed", K(ret)); } } else { - got_next_row = true; - update_state_in_output_rows_state(count); + got_next_rows = true; + lookup_row_cnt_ += count; const ObBitVector *skip = NULL; PRINT_VECTORIZED_ROWS(SQL, DEBUG, get_eval_ctx(), get_output_expr(), count, skip, - K(ret), K(lookup_row_cnt_), K(lookup_rowkey_cnt_), - K(lookup_group_cnt_), K(index_group_cnt_)); + K(ret), K(lookup_row_cnt_), K(lookup_rowkey_cnt_)); } break; } case FINISHED: { - update_states_in_finish_state(); - if (OB_SUCC(ret) || OB_ITER_END == ret) { - ret = OB_ITER_END; - } + ret = OB_ITER_END; break; } default: { @@ -176,11 +197,8 @@ int ObIndexLookupOpImpl::get_next_rows(int64_t &count, int64_t capacity) LOG_WARN("unexpected state", K(state_)); } } - } while (!got_next_row && OB_SUCC(ret)); - if (lookup_type_ == GLOBAL_INDEX && OB_ITER_END == ret) { - ret = OB_SUCCESS; - update_states_after_finish_state(); - } + } while (!got_next_rows && OB_SUCC(ret)); + return ret; } diff --git a/src/sql/engine/table/ob_index_lookup_op_impl.h b/src/sql/engine/table/ob_index_lookup_op_impl.h index 992c43b7f3..e1474b511a 100644 --- a/src/sql/engine/table/ob_index_lookup_op_impl.h +++ b/src/sql/engine/table/ob_index_lookup_op_impl.h @@ -38,7 +38,8 @@ protected: INDEX_SCAN, DO_LOOKUP, OUTPUT_ROWS, - FINISHED + FINISHED, + AUX_LOOKUP }; public: ObIndexLookupOpImpl(LookupType lookup_type, const int64_t default_batch_row_count); @@ -46,35 +47,18 @@ public: virtual int get_next_row(); virtual int get_next_rows(int64_t &count, int64_t capacity); virtual void do_clear_evaluated_flag() = 0; + virtual int reset_lookup_state() = 0; virtual int get_next_row_from_index_table() = 0; + virtual int get_next_rows_from_index_table(int64_t &count, int64_t capacity) = 0; virtual int process_data_table_rowkey() = 0; virtual int process_data_table_rowkeys(const int64_t size, const ObBitVector *skip) = 0; - virtual bool is_group_scan() const = 0; - virtual int init_group_range(int64_t cur_group_idx, int64_t group_size) = 0; virtual int do_index_lookup() = 0; virtual int get_next_row_from_data_table() = 0; virtual int get_next_rows_from_data_table(int64_t &count, int64_t capacity) = 0; - virtual int process_next_index_batch_for_row() = 0; - virtual int process_next_index_batch_for_rows(int64_t &count) = 0; - virtual bool need_next_index_batch() const = 0; virtual int check_lookup_row_cnt() = 0; - virtual int do_index_table_scan_for_rows(const int64_t max_row_cnt, - const int64_t start_group_idx, - const int64_t default_row_batch_cnt) = 0; - virtual void update_state_in_output_rows_state(int64_t &count) = 0; - virtual void update_states_in_finish_state() = 0; - virtual void update_states_after_finish_state() = 0; - // The following function distinguishes between the global index back and the local index back. - // For Local index, it will return 0 - // For Global index, it will return the property - virtual int64_t get_index_group_cnt() const = 0; - virtual int64_t get_lookup_group_cnt() const = 0; - virtual void inc_index_group_cnt() = 0; - virtual void inc_lookup_group_cnt() = 0; virtual ObEvalCtx & get_eval_ctx() = 0; virtual const ExprFixedArray & get_output_expr() = 0; - virtual int switch_index_table_and_rowkey_group_id() { return OB_SUCCESS; } int build_trans_datum(ObExpr *expr, ObEvalCtx *eval_ctx, ObIAllocator &alloc, ObDatum *&datum_ptr); protected: @@ -82,8 +66,6 @@ protected: int64_t default_batch_row_count_; LookupState state_; // index lookup state bool index_end_; // if index reach iterator end - int64_t index_group_cnt_; // number of groups fetched from index table - int64_t lookup_group_cnt_; // number of groups fetched from lookup table int64_t lookup_rowkey_cnt_; // number of rows fetched from index table int64_t lookup_row_cnt_; }; diff --git a/src/sql/engine/table/ob_table_scan_op.cpp b/src/sql/engine/table/ob_table_scan_op.cpp index 0a626b6d99..f8e6b194c4 100644 --- a/src/sql/engine/table/ob_table_scan_op.cpp +++ b/src/sql/engine/table/ob_table_scan_op.cpp @@ -18,7 +18,6 @@ #include "ob_table_scan_op.h" #include "sql/engine/ob_exec_context.h" #include "sql/executor/ob_task_spliter.h" -#include "sql/das/ob_das_group_scan_op.h" #include "sql/das/ob_das_define.h" #include "sql/das/ob_das_utils.h" #include "lib/profile/ob_perf_event.h" @@ -35,6 +34,7 @@ #include "share/external_table/ob_external_table_file_mgr.h" #include "share/external_table/ob_external_table_utils.h" #include "lib/container/ob_array_wrap.h" +#include "sql/das/iter/ob_das_iter_utils.h" #include "share/index_usage/ob_index_usage_info_mgr.h" namespace oceanbase @@ -130,12 +130,11 @@ OB_DEF_SERIALIZE(ObTableScanCtDef) OB_UNIS_ENCODE(calc_part_id_expr_); OB_UNIS_ENCODE(global_index_rowkey_exprs_); OB_UNIS_ENCODE(flashback_item_.fq_read_tx_uncommitted_); - // abandoned fields, please remove me at next barrier version + // abandoned fields, please remove me at next barrier version bool abandoned_always_false_aux_lookup = false; bool abandoned_always_false_text_ir = false; OB_UNIS_ENCODE(abandoned_always_false_aux_lookup); OB_UNIS_ENCODE(abandoned_always_false_text_ir); - OB_UNIS_ENCODE(attach_spec_); return ret; } @@ -164,12 +163,11 @@ OB_DEF_SERIALIZE_SIZE(ObTableScanCtDef) OB_UNIS_ADD_LEN(calc_part_id_expr_); OB_UNIS_ADD_LEN(global_index_rowkey_exprs_); OB_UNIS_ADD_LEN(flashback_item_.fq_read_tx_uncommitted_); - // abandoned fields, please remove me at next barrier version + // abandoned fields, please remove me at next barrier version bool abandoned_always_false_aux_lookup = false; bool abandoned_always_false_text_ir = false; OB_UNIS_ADD_LEN(abandoned_always_false_aux_lookup); OB_UNIS_ADD_LEN(abandoned_always_false_text_ir); - OB_UNIS_ADD_LEN(attach_spec_); return len; } @@ -220,11 +218,28 @@ OB_DEF_DESERIALIZE(ObTableScanCtDef) bool abandoned_always_false_text_ir = false; OB_UNIS_DECODE(abandoned_always_false_aux_lookup); OB_UNIS_DECODE(abandoned_always_false_text_ir); - OB_UNIS_DECODE(attach_spec_); return ret; } +ObDASScanCtDef *ObTableScanCtDef::get_lookup_ctdef() +{ + ObDASScanCtDef *lookup_ctdef = nullptr; + if (nullptr == attach_spec_.attach_ctdef_) { + lookup_ctdef = lookup_ctdef_; + } else { + ObDASTableLookupCtDef *table_lookup_ctdef = nullptr; + if (DAS_OP_TABLE_LOOKUP == attach_spec_.attach_ctdef_->op_type_) { + OB_ASSERT(2 == attach_spec_.attach_ctdef_->children_cnt_ && + attach_spec_.attach_ctdef_->children_ != nullptr); + if (DAS_OP_TABLE_SCAN == attach_spec_.attach_ctdef_->children_[1]->op_type_) { + lookup_ctdef = static_cast(attach_spec_.attach_ctdef_->children_[1]); + } + } + } + return lookup_ctdef; +} + int ObTableScanCtDef::allocate_dppr_table_loc() { int ret = OB_SUCCESS; @@ -615,7 +630,6 @@ int ObTableScanSpec::explain_index_selection_info( ObTableScanOp::ObTableScanOp(ObExecContext &exec_ctx, const ObOpSpec &spec, ObOpInput *input) : ObOperator(exec_ctx, spec, input), - das_ref_(eval_ctx_, exec_ctx), tsc_rtdef_(exec_ctx.get_allocator()), need_final_limit_(false), table_rescan_allocator_(NULL), @@ -631,8 +645,13 @@ ObTableScanOp::ObTableScanOp(ObExecContext &exec_ctx, const ObOpSpec &spec, ObOp scan_task_id_(0), report_checksum_(false), in_rescan_(false), - global_index_lookup_op_(NULL), - spat_index_() + spat_index_(), + output_ (nullptr), + fold_iter_(nullptr), + iter_tree_(nullptr), + scan_iter_(nullptr), + group_rescan_cnt_(0), + group_id_(0) { } @@ -640,44 +659,17 @@ ObTableScanOp::~ObTableScanOp() { } -bool ObTableScanOp::has_das_scan_op(const ObDASTabletLoc *tablet_loc, ObDASScanOp *&das_op) -{ - if (MY_SPEC.batch_scan_flag_) { - das_op = static_cast( - das_ref_.find_das_task(tablet_loc, DAS_OP_TABLE_BATCH_SCAN)); - } else { - das_op = static_cast( - das_ref_.find_das_task(tablet_loc, DAS_OP_TABLE_SCAN)); - } - return das_op != nullptr; -} - -int ObTableScanOp::init_das_group_range(const int64_t cur_group_idx, const int64_t group_size) -{ - int ret = OB_SUCCESS; - if (MY_SPEC.batch_scan_flag_) { - for (DASTaskIter task_iter = das_ref_.begin_task_iter(); !task_iter.is_end(); ++task_iter) { - ObDASGroupScanOp *batch_op = static_cast(*task_iter); - batch_op->init_group_range(cur_group_idx, group_size); - LOG_DEBUG("init das group range", K(batch_op), K(cur_group_idx), K(group_size)); - } - } - return ret; -} - OB_INLINE int ObTableScanOp::create_one_das_task(ObDASTabletLoc *tablet_loc) { int ret = OB_SUCCESS; - ObIDASTaskOp *task_op = nullptr; ObDASScanOp *scan_op = nullptr; - uint64_t table_loc_id = MY_SPEC.get_table_loc_id(); - ObDASOpType op_type = MY_SPEC.batch_scan_flag_ ? DAS_OP_TABLE_BATCH_SCAN : DAS_OP_TABLE_SCAN; - if (OB_LIKELY(has_das_scan_op(tablet_loc, scan_op))) { - // reuse das scan op - } else if (OB_FAIL(das_ref_.create_das_task(tablet_loc, op_type, task_op))) { + bool reuse_das_op = false; + if (OB_ISNULL(scan_iter_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr scan iter", K(ret)); + } else if (OB_FAIL(scan_iter_->create_das_task(tablet_loc, scan_op, reuse_das_op))) { LOG_WARN("prepare das task failed", K(ret)); - } else { - scan_op = static_cast(task_op); + } else if (!reuse_das_op) { scan_op->set_scan_ctdef(&MY_CTDEF.scan_ctdef_); scan_op->set_scan_rtdef(&tsc_rtdef_.scan_rtdef_); scan_op->set_can_part_retry(nullptr == tsc_rtdef_.scan_rtdef_.sample_info_ @@ -685,21 +677,13 @@ OB_INLINE int ObTableScanOp::create_one_das_task(ObDASTabletLoc *tablet_loc) scan_op->set_inner_rescan(in_rescan_); tsc_rtdef_.scan_rtdef_.table_loc_->is_reading_ = true; if (!MY_SPEC.is_index_global_ && MY_CTDEF.lookup_ctdef_ != nullptr) { - //is local index lookup, need to set the lookup ctdef to the das scan op - ObDASTableLoc *lookup_table_loc = tsc_rtdef_.lookup_rtdef_->table_loc_; - ObDASTabletLoc *lookup_tablet_loc = ObDASUtils::get_related_tablet_loc( - *tablet_loc, lookup_table_loc->loc_meta_->ref_table_id_); - if (OB_ISNULL(lookup_tablet_loc)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("lookup tablet loc is nullptr", K(ret), KPC(tablet_loc), KPC(lookup_table_loc->loc_meta_)); - } else if (OB_FAIL(scan_op->set_lookup_ctdef(MY_CTDEF.lookup_ctdef_))) { - LOG_WARN("set lookup ctdef failed", K(ret)); - } else if (OB_FAIL(scan_op->set_lookup_rtdef(tsc_rtdef_.lookup_rtdef_))) { - LOG_WARN("set lookup rtdef failed", K(ret)); - } else if (OB_FAIL(scan_op->set_lookup_tablet_id(lookup_tablet_loc->tablet_id_))) { - LOG_WARN("set lookup tablet id failed", K(ret), KPC(lookup_tablet_loc)); - } else { - lookup_table_loc->is_reading_ = true; + if (OB_FAIL(pushdown_normal_lookup_to_das(*scan_op))) { + LOG_WARN("pushdown normal lookup to das failed", K(ret)); + } + } + if (OB_SUCC(ret) && MY_CTDEF.attach_spec_.attach_ctdef_ != nullptr) { + if (OB_FAIL(pushdown_attach_task_to_das(*scan_op))) { + LOG_WARN("pushdown attach task to das failed", K(ret)); } } } @@ -711,12 +695,83 @@ OB_INLINE int ObTableScanOp::create_one_das_task(ObDASTabletLoc *tablet_loc) return ret; } +int ObTableScanOp::pushdown_normal_lookup_to_das(ObDASScanOp &target_op) +{ + int ret = OB_SUCCESS; + //is local index lookup, need to set the lookup ctdef to the das scan op + ObDASTableLoc *lookup_table_loc = tsc_rtdef_.lookup_rtdef_->table_loc_; + ObDASTabletLoc *lookup_tablet_loc = ObDASUtils::get_related_tablet_loc( + *target_op.get_tablet_loc(), lookup_table_loc->loc_meta_->ref_table_id_); + if (OB_ISNULL(lookup_tablet_loc)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("lookup tablet loc is nullptr", K(ret), KPC(target_op.get_tablet_loc()), KPC(lookup_table_loc->loc_meta_)); + } else if (OB_FAIL(target_op.reserve_related_buffer(1))) { + LOG_WARN("reserve related buffer failed", K(ret)); + } else if (OB_FAIL(target_op.set_related_task_info(MY_CTDEF.lookup_ctdef_, + tsc_rtdef_.lookup_rtdef_, + lookup_tablet_loc->tablet_id_))) { + LOG_WARN("set lookup info failed", K(ret)); + } else { + lookup_table_loc->is_reading_ = true; + } + return ret; +} + +int ObTableScanOp::pushdown_attach_task_to_das(ObDASScanOp &target_op) +{ + int ret = OB_SUCCESS; + ObDASAttachRtInfo *attach_rtinfo = tsc_rtdef_.attach_rtinfo_; + if (OB_FAIL(target_op.reserve_related_buffer(attach_rtinfo->related_scan_cnt_))) { + LOG_WARN("reserve related buffer failed", K(ret), K(attach_rtinfo->related_scan_cnt_)); + } else if (OB_FAIL(attach_related_taskinfo(target_op, attach_rtinfo->attach_rtdef_))) { + LOG_WARN("attach related task info failed", K(ret)); + } else { + target_op.set_attach_ctdef(MY_CTDEF.attach_spec_.attach_ctdef_); + target_op.set_attach_rtdef(tsc_rtdef_.attach_rtinfo_->attach_rtdef_); + } + return ret; +} + +int ObTableScanOp::attach_related_taskinfo(ObDASScanOp &target_op, ObDASBaseRtDef *attach_rtdef) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(attach_rtdef) || OB_ISNULL(attach_rtdef->ctdef_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("attach rtdef is invalid", K(ret), KP(attach_rtdef)); + } else if (attach_rtdef->op_type_ == DAS_OP_TABLE_SCAN) { + const ObDASScanCtDef *scan_ctdef = static_cast(attach_rtdef->ctdef_); + ObDASScanRtDef *scan_rtdef = static_cast(attach_rtdef); + ObDASTableLoc *table_loc = scan_rtdef->table_loc_; + ObDASTabletLoc *tablet_loc = ObDASUtils::get_related_tablet_loc( + *target_op.get_tablet_loc(), table_loc->loc_meta_->ref_table_id_); + if (OB_ISNULL(tablet_loc)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("related tablet loc is not found", K(ret), + KPC(target_op.get_tablet_loc()), + KPC(table_loc->loc_meta_)); + } else if (OB_FAIL(target_op.set_related_task_info(scan_ctdef, + scan_rtdef, + tablet_loc->tablet_id_))) { + LOG_WARN("set attach task info failed", K(ret), KPC(tablet_loc)); + } else { + table_loc->is_reading_ = true; + } + } else { + for (int i = 0; OB_SUCC(ret) && i < attach_rtdef->children_cnt_; ++i) { + if (OB_FAIL(attach_related_taskinfo(target_op, attach_rtdef->children_[i]))) { + LOG_WARN("recursive attach related task info failed", K(ret), K(i)); + } + } + } + return ret; +} + int ObTableScanOp::prepare_pushdown_limit_param() { int ret = OB_SUCCESS; if (!limit_param_.is_valid()) { //ignore, do nothing - } else if (MY_SPEC.batch_scan_flag_) { + } else if (in_batch_rescan_subplan()) { //batch scan can not pushdown limit param to storage need_final_limit_ = true; tsc_rtdef_.scan_rtdef_.limit_param_.offset_ = 0; @@ -728,7 +783,7 @@ int ObTableScanOp::prepare_pushdown_limit_param() tsc_rtdef_.lookup_rtdef_->limit_param_.limit_ = -1; } - } else if (tsc_rtdef_.has_lookup_limit() || das_ref_.get_das_task_cnt() > 1) { + } else if (tsc_rtdef_.has_lookup_limit() || (OB_NOT_NULL(scan_iter_) && scan_iter_->get_das_task_cnt() > 1)) { //for index back, need to final limit output rows in TableScan operator, //please see me for the reason: /* for multi-partition scanning, */ @@ -795,34 +850,30 @@ int ObTableScanOp::prepare_das_task() } return ret; } - int ObTableScanOp::prepare_all_das_tasks() { + // get grop size of batch rescan int ret = OB_SUCCESS; - if (MY_SPEC.batch_scan_flag_) { - if (OB_SUCC(ret)) { - if (!tsc_rtdef_.bnlj_params_.empty()) { - tsc_rtdef_.group_size_ = tsc_rtdef_.bnlj_params_.at(0).gr_param_->count_; - if (OB_UNLIKELY(tsc_rtdef_.group_size_ > tsc_rtdef_.max_group_size_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("The amount of data exceeds the pre allocated memory", K(ret)); - } - } else { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("batch nlj params is empty", K(ret)); - } + if (need_perform_real_batch_rescan()) { + tsc_rtdef_.group_size_ = tsc_rtdef_.bnlj_params_.at(0).gr_param_->count_; + if (OB_UNLIKELY(tsc_rtdef_.group_size_ > tsc_rtdef_.max_group_size_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("The amount of data exceeds the pre allocated memory", K(ret)); } } + if (OB_SUCC(ret)) { if (MY_SPEC.gi_above_ && !MY_INPUT.key_ranges_.empty()) { if (OB_FAIL(prepare_das_task())) { LOG_WARN("prepare das task failed", K(ret)); } } else { - int64_t group_size = MY_SPEC.batch_scan_flag_ ? tsc_rtdef_.group_size_ : 1; + int64_t group_size = (output_ == iter_tree_) ? 1: tsc_rtdef_.group_size_; GroupRescanParamGuard grp_guard(tsc_rtdef_, GET_PHY_PLAN_CTX(ctx_)->get_param_store_for_update()); for (int64_t i = 0; OB_SUCC(ret) && i < group_size; ++i) { - grp_guard.switch_group_rescan_param(i); + if (need_perform_real_batch_rescan()) { + grp_guard.switch_group_rescan_param(i); + } if (OB_FAIL(prepare_single_scan_range(i))) { LOG_WARN("prepare single scan range failed", K(ret)); } else if (OB_FAIL(prepare_das_task())) { @@ -834,9 +885,51 @@ int ObTableScanOp::prepare_all_das_tasks() } } } - if (OB_SUCC(ret)) { - if (OB_FAIL(init_das_group_range(0, tsc_rtdef_.group_size_))) { - LOG_WARN("set group range failed", K(ret), K_(tsc_rtdef_.group_size)); + return ret; +} + +int ObTableScanOp::init_attach_scan_rtdef(const ObDASBaseCtDef *attach_ctdef, + ObDASBaseRtDef *&attach_rtdef) +{ + int ret = OB_SUCCESS; + ObDASTaskFactory &das_factory = DAS_CTX(ctx_).get_das_factory(); + if (OB_ISNULL(attach_ctdef)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("attach ctdef is nullptr", K(ret)); + } else if (OB_FAIL(das_factory.create_das_rtdef(attach_ctdef->op_type_, attach_rtdef))) { + LOG_WARN("create das rtdef failed", K(ret), K(attach_ctdef->op_type_)); + } else if (ObDASTaskFactory::is_attached(attach_ctdef->op_type_)) { + attach_rtdef->ctdef_ = attach_ctdef; + attach_rtdef->children_cnt_ = attach_ctdef->children_cnt_; + attach_rtdef->eval_ctx_ = &eval_ctx_; + if (attach_ctdef->children_cnt_ > 0) { + if (OB_ISNULL(attach_rtdef->children_ = OB_NEW_ARRAY(ObDASBaseRtDef*, + &ctx_.get_allocator(), + attach_ctdef->children_cnt_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate child buf failed", K(ret), K(attach_ctdef->children_cnt_)); + } + for (int i = 0; OB_SUCC(ret) && i < attach_ctdef->children_cnt_; ++i) { + if (OB_FAIL(init_attach_scan_rtdef(attach_ctdef->children_[i], attach_rtdef->children_[i]))) { + LOG_WARN("init attach scan rtdef failed", K(ret)); + } + } + } + } else { + tsc_rtdef_.attach_rtinfo_->related_scan_cnt_++; + if (attach_ctdef == &MY_CTDEF.scan_ctdef_) { + attach_rtdef = &tsc_rtdef_.scan_rtdef_; + } else if (attach_ctdef->op_type_ != DAS_OP_TABLE_SCAN) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("attach ctdef type is invalid", K(ret), K(attach_ctdef->op_type_)); + } else { + const ObDASScanCtDef *attach_scan_ctdef = static_cast(attach_ctdef); + const ObDASTableLocMeta *attach_loc_meta = MY_CTDEF.attach_spec_.get_attach_loc_meta( + MY_SPEC.table_loc_id_, attach_scan_ctdef->ref_table_id_); + ObDASScanRtDef *attach_scan_rtdef = static_cast(attach_rtdef); + if (OB_FAIL(init_das_scan_rtdef(*attach_scan_ctdef, *attach_scan_rtdef, attach_loc_meta))) { + LOG_WARN("init das scan rtdef failed", K(ret)); + } } } return ret; @@ -848,13 +941,6 @@ int ObTableScanOp::init_table_scan_rtdef() ObPhysicalPlanCtx *plan_ctx = GET_PHY_PLAN_CTX(ctx_); ObSQLSessionInfo *my_session = GET_MY_SESSION(ctx_); ObDASTaskFactory &das_factory = DAS_CTX(ctx_).get_das_factory(); - ObMemAttr mem_attr; - mem_attr.tenant_id_ = my_session->get_effective_tenant_id(); - mem_attr.label_ = "ScanDASCtx"; - das_ref_.set_mem_attr(mem_attr); - das_ref_.set_expr_frame_info(&MY_SPEC.plan_->get_expr_frame_info()); - das_ref_.set_execute_directly(!MY_SPEC.use_dist_das_); - das_ref_.set_enable_rich_format(MY_SPEC.use_rich_format_); set_cache_stat(plan_ctx->get_phy_plan()->stat_); bool is_null_value = false; if (OB_SUCC(ret) && NULL != MY_SPEC.limit_) { @@ -892,11 +978,28 @@ int ObTableScanOp::init_table_scan_rtdef() LOG_WARN("create das rtdef failed", K(ret)); } else { tsc_rtdef_.lookup_rtdef_ = static_cast(das_rtdef); - if (OB_FAIL(init_das_scan_rtdef(lookup_ctdef, *tsc_rtdef_.lookup_rtdef_, MY_CTDEF.lookup_loc_meta_))) { + if (OB_FAIL(init_das_scan_rtdef(lookup_ctdef, + *tsc_rtdef_.lookup_rtdef_, + MY_CTDEF.lookup_loc_meta_))) { LOG_WARN("init das scan rtdef failed", K(ret), K(lookup_ctdef)); } } } + if (OB_SUCC(ret) && MY_CTDEF.attach_spec_.attach_ctdef_ != nullptr) { + if (OB_ISNULL(tsc_rtdef_.attach_rtinfo_ = OB_NEWx(ObDASAttachRtInfo, &ctx_.get_allocator()))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate attach rtinfo failed", K(ret)); + } else if (OB_FAIL(init_attach_scan_rtdef(MY_CTDEF.attach_spec_.attach_ctdef_, + tsc_rtdef_.attach_rtinfo_->attach_rtdef_))) { + LOG_WARN("init attach scan rtdef failed", K(ret)); + } else if (tsc_rtdef_.attach_rtinfo_->pushdown_tasks_.empty()) { + //has no pushdown task, means all attach task can be pushdown + if (OB_FAIL(tsc_rtdef_.attach_rtinfo_->pushdown_tasks_.push_back( + tsc_rtdef_.attach_rtinfo_->attach_rtdef_))) { + LOG_WARN("store pushdown das rtdef failed", K(ret)); + } + } + } return ret; } @@ -912,6 +1015,7 @@ OB_INLINE int ObTableScanOp::init_das_scan_rtdef(const ObDASScanCtDef &das_ctdef ObPhysicalPlanCtx *plan_ctx = GET_PHY_PLAN_CTX(ctx_); ObSQLSessionInfo *my_session = GET_MY_SESSION(ctx_); ObTaskExecutorCtx &task_exec_ctx = ctx_.get_task_exec_ctx(); + das_rtdef.ctdef_ = &das_ctdef; das_rtdef.timeout_ts_ = plan_ctx->get_ps_timeout_timestamp(); das_rtdef.tx_lock_timeout_ = my_session->get_trx_lock_timeout(); das_rtdef.scan_flag_ = MY_CTDEF.scan_flags_; @@ -939,8 +1043,8 @@ OB_INLINE int ObTableScanOp::init_das_scan_rtdef(const ObDASScanCtDef &das_ctdef das_rtdef.scan_flag_.is_lookup_for_4377_ = is_lookup; das_rtdef.need_check_output_datum_ = MY_SPEC.need_check_output_datum_; das_rtdef.sql_mode_ = my_session->get_sql_mode(); - das_rtdef.stmt_allocator_.set_alloc(&das_ref_.get_das_alloc()); - das_rtdef.scan_allocator_.set_alloc(&das_ref_.get_das_alloc()); + das_rtdef.stmt_allocator_.set_alloc(&ctx_.get_allocator()); + das_rtdef.scan_allocator_.set_alloc(&ctx_.get_allocator()); das_rtdef.eval_ctx_ = &get_eval_ctx(); if ((is_lookup_limit && is_lookup) || (!is_lookup_limit && !is_lookup)) { //when is_lookup_limit = true means that the limit param should pushdown to the lookup rtdef @@ -1045,7 +1149,7 @@ int ObTableScanOp::update_output_tablet_id() int ObTableScanOp::prepare_scan_range() { int ret = OB_SUCCESS; - if (OB_LIKELY(!MY_SPEC.batch_scan_flag_)) { + if (!need_perform_real_batch_rescan()) { ret = prepare_single_scan_range(); } else { ret = prepare_batch_scan_range(); @@ -1082,28 +1186,46 @@ int ObTableScanOp::prepare_batch_scan_range() LOG_DEBUG("after prepare batch scan range", K(MY_INPUT.key_ranges_), K(MY_INPUT.ss_key_ranges_)); return ret; } - int ObTableScanOp::build_bnlj_params() { int ret = OB_SUCCESS; - ObPhysicalPlanCtx *plan_ctx = GET_PHY_PLAN_CTX(ctx_); - if (!MY_SPEC.batch_scan_flag_) { + const GroupParamArray* group_params_above = nullptr; + if (OB_ISNULL(group_params_above = ctx_.get_das_ctx().get_group_params())) { // do nothing } else if (tsc_rtdef_.bnlj_params_.empty()) { tsc_rtdef_.bnlj_params_.set_capacity(MY_CTDEF.bnlj_param_idxs_.count()); - LOG_DEBUG("prepare batch scan range", K(MY_CTDEF.bnlj_param_idxs_), K(plan_ctx->get_param_store())); + LOG_TRACE("prepare batch scan range",K(MY_CTDEF.bnlj_param_idxs_), KPC(group_params_above)); for (int64_t i = 0; OB_SUCC(ret) && i < MY_CTDEF.bnlj_param_idxs_.count(); ++i) { int64_t param_idx = MY_CTDEF.bnlj_param_idxs_.at(i); - const ObObjParam &bnlj_param = plan_ctx->get_param_store().at(param_idx); - if (bnlj_param.is_ext_sql_array()) { - ObSqlArrayObj *array_obj = reinterpret_cast(bnlj_param.get_ext()); - OZ(tsc_rtdef_.bnlj_params_.push_back(GroupRescanParamInfo(param_idx, array_obj))); + uint64_t array_idx = OB_INVALID_ID; + bool exist = false; + if (OB_FAIL(ctx_.get_das_ctx().find_group_param_by_param_idx(param_idx, exist, array_idx))) { + LOG_WARN("failed to find group param by param idx", K(ret), K(i), K(param_idx)); + } else if (!exist) { + // ret = OB_ERR_UNEXPECTED; + // LOG_WARN("failed to find group param", K(ret), K(exist), K(i), K(array_idx)); + LOG_TRACE("bnlj params is not a array", K(i), K(param_idx)); + } else { + const GroupRescanParam &group_param = group_params_above->at(array_idx); + OZ(tsc_rtdef_.bnlj_params_.push_back(GroupRescanParamInfo(param_idx, group_param.gr_param_))); } } if (OB_SUCC(ret) && tsc_rtdef_.bnlj_params_.empty()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("batch param is empty in batch rescan", K(ret), - K(MY_CTDEF.bnlj_param_idxs_), K(plan_ctx->get_param_store())); + K(MY_CTDEF.bnlj_param_idxs_), KPC(group_params_above)); + } + if (OB_SUCC(ret) && (OB_ISNULL(fold_iter_))) { + if (OB_FAIL(ObDASIterUtils::create_group_fold_iter(MY_CTDEF, + tsc_rtdef_, + eval_ctx_, + ctx_, + eval_infos_, + MY_SPEC, + iter_tree_, + fold_iter_))) { + LOG_WARN("failed to create group fold iter", K(ret)); + } } } return ret; @@ -1222,7 +1344,7 @@ int ObTableScanOp::prepare_single_scan_range(int64_t group_idx) OZ(vt_result_converter_->convert_key_ranges(MY_INPUT.key_ranges_)); } LOG_DEBUG("prepare single scan range", K(ret), K(key_ranges), K(MY_INPUT.key_ranges_), - K(MY_INPUT.ss_key_ranges_)); + K(MY_INPUT.ss_key_ranges_), K(spec_.id_)); return ret; } @@ -1357,48 +1479,62 @@ int ObTableScanOp::inner_open() } } } - if (OB_SUCC(ret) && MY_SPEC.is_global_index_back()) { - if (OB_NOT_NULL(global_index_lookup_op_)) { - global_index_lookup_op_->destroy(); - global_index_lookup_op_->~ObGlobalIndexLookupOpImpl(); - global_index_lookup_op_ = nullptr; - } - void *lookup_buf = ctx_.get_allocator().alloc(sizeof(ObGlobalIndexLookupOpImpl)); - if (nullptr == lookup_buf) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("allocate memory failed", K(ret), K(lookup_buf)); - } else { - global_index_lookup_op_ = new (lookup_buf) ObGlobalIndexLookupOpImpl(this); - if (OB_FAIL(global_index_lookup_op_->open())) { - LOG_WARN("failed to open global index lookup op", K(ret)); + + // create and init iter_tree_. + const ObTableScanSpec &spec = MY_SPEC; + if (OB_SUCC(ret)) { + if (spec.should_scan_index()) { + if (spec.is_global_index_back()) { + if (OB_FAIL(ObDASIterUtils::create_global_lookup_iter_tree(MY_CTDEF, + tsc_rtdef_, + eval_ctx_, + ctx_, + eval_infos_, + spec, + can_partition_retry(), + scan_iter_, + iter_tree_))) { + LOG_WARN("failed to create global lookup iter tree", K(ret)); + } + } else if (OB_FAIL(ObDASIterUtils::create_local_lookup_iter_tree(MY_CTDEF, + tsc_rtdef_, + eval_ctx_, + ctx_, + eval_infos_, + spec, + scan_iter_, + iter_tree_))) { + LOG_WARN("failed to create local lookup iter tree", K(ret)); } + } else if (OB_FAIL(ObDASIterUtils::create_table_scan_iter_tree(MY_CTDEF, + tsc_rtdef_, + eval_ctx_, + ctx_, + eval_infos_, + spec, + scan_iter_, + iter_tree_))) { + LOG_WARN("failed to create table scan iter tree", K(ret)); } } + output_ = iter_tree_; return ret; } int ObTableScanOp::inner_close() { int ret = OB_SUCCESS; - if (das_ref_.has_task()) { - int tmp_ret = fill_storage_feedback_info(); - if (OB_UNLIKELY(OB_SUCCESS != tmp_ret)) { - LOG_WARN("fill storage feedback info failed", KR(tmp_ret)); + if (OB_NOT_NULL(scan_iter_)) { + if (scan_iter_->has_task()) { + int tmp_ret = fill_storage_feedback_info(); + if (OB_UNLIKELY(OB_SUCCESS != tmp_ret)) { + LOG_WARN("fill storage feedback info failed", KR(tmp_ret)); + } } - if (OB_FAIL(das_ref_.close_all_task())) { - LOG_WARN("close all das task failed", KR(ret)); + if (OB_FAIL(scan_iter_->reuse())) { + LOG_WARN("failed to reuse scan iter", K(ret)); } } - if (MY_SPEC.is_global_index_back()) { - int save_ret = ret; - if (OB_ISNULL(global_index_lookup_op_)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", KR(ret)); - } else if (OB_FAIL(global_index_lookup_op_->close())) { - LOG_WARN("close global index lookup op failed", KR(ret)); - } - ret = (OB_SUCCESS == save_ret) ? ret : save_ret; - } if (OB_SUCC(ret)) { fill_sql_plan_monitor_info(); } @@ -1464,6 +1600,15 @@ int ObTableScanOp::do_init_before_get_row() if (OB_TRY_LOCK_ROW_CONFLICT != ret) { LOG_WARN("fail to do table scan", K(ret)); } + } else { + if (in_batch_rescan_subplan()) { + // if the ancestor operator of TSC support batch rescan, update the group_id and batch rescan_cnt after perform a real-rescan + group_rescan_cnt_ = ctx_.get_das_ctx().get_group_rescan_cnt(); + group_id_ = ctx_.get_das_ctx().get_skip_scan_group_id(); + } + if (OB_FAIL(output_->set_merge_status(is_group_rescan() ? SORT_MERGE : SEQUENTIAL_MERGE))) { + LOG_WARN("failed to set merge status for das iter", K(ret)); + } } } } @@ -1474,24 +1619,28 @@ void ObTableScanOp::destroy() { tsc_rtdef_.~ObTableScanRtDef(); ObOperator::destroy(); - das_ref_.reset(); if (OB_NOT_NULL(vt_result_converter_)) { vt_result_converter_->destroy(); vt_result_converter_->~ObVirtualTableResultConverter(); vt_result_converter_ = nullptr; } - if (OB_NOT_NULL(global_index_lookup_op_)) { - global_index_lookup_op_->destroy(); - global_index_lookup_op_->~ObGlobalIndexLookupOpImpl(); - global_index_lookup_op_ = nullptr; + if (OB_NOT_NULL(iter_tree_)) { + iter_tree_->release(); + iter_tree_ = nullptr; } + if (OB_NOT_NULL(fold_iter_)) { + fold_iter_->release(); + fold_iter_ = nullptr; + } + output_ = nullptr; + scan_iter_ = nullptr; } int ObTableScanOp::fill_storage_feedback_info() { int ret = OB_SUCCESS; // fill storage feedback info for acs - ObTableScanParam &scan_param = DAS_SCAN_OP(*das_ref_.begin_task_iter())->get_scan_param(); + ObTableScanParam &scan_param = DAS_SCAN_OP(*scan_iter_->begin_task_iter())->get_scan_param(); bool is_index_back = scan_param.scan_flag_.index_back_; ObTableScanStat &table_scan_stat = GET_PHY_PLAN_CTX(ctx_)->get_table_scan_stat(); if (MY_SPEC.should_scan_index()) { @@ -1558,17 +1707,6 @@ int ObTableScanOp::inner_rescan() in_rescan_ = true; if (OB_FAIL(ObOperator::inner_rescan())) { LOG_WARN("failed to exec inner rescan"); - } else if (MY_SPEC.is_global_index_back()) { - if (OB_ISNULL(global_index_lookup_op_)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments",K(ret)); - } else { - global_index_lookup_op_->get_brs().size_ = brs_.size_ ; - global_index_lookup_op_->get_brs().end_ = brs_.end_; - if (OB_FAIL(global_index_lookup_op_->rescan())) { - LOG_WARN("failed to get next batch",K(ret)); - } - } } else { if (OB_FAIL(inner_rescan_for_tsc())) { LOG_WARN("failed to get next row",K(ret)); @@ -1585,6 +1723,7 @@ int ObTableScanOp::inner_rescan_for_tsc() MY_INPUT.key_ranges_.reuse(); MY_INPUT.ss_key_ranges_.reuse(); MY_INPUT.mbr_filters_.reuse(); + bool need_real_rescan = false; if (OB_FAIL(build_bnlj_params())) { // At start of each round of batch rescan, NLJ will fill param_store with // batch parameters. After each right operator rescan, NLJ will fill @@ -1592,33 +1731,51 @@ int ObTableScanOp::inner_rescan_for_tsc() // Therefore, we need to get and save bnlj parameters here or they will be // replaced by NLJ. LOG_WARN("build batch nlj params failed", KR(ret)); - } else if (!need_real_rescan()) { - ret = set_batch_iter(ctx_.get_das_ctx().jump_read_group_id_); + } else if (OB_FAIL(check_need_real_rescan(need_real_rescan))) { + LOG_WARN("failed to check if tsc need real rescan", K(ret)); + } else if (!need_real_rescan) { + LOG_TRACE("[group rescan] need switch iter", K(group_rescan_cnt_), K(ctx_.get_das_ctx().get_group_rescan_cnt()), + K(group_id_), K(ctx_.get_das_ctx().get_skip_scan_group_id()), K(spec_.id_)); + if (OB_FAIL(set_batch_iter(ctx_.get_das_ctx().get_skip_scan_group_id()))) { + LOG_WARN("failed to switch batch iter", K(ret), K(ctx_.get_das_ctx().get_skip_scan_group_id())); + } + group_id_ = ctx_.get_das_ctx().get_skip_scan_group_id(); } else { + reset_iter_tree_for_rescan(); + LOG_TRACE("[group rescan] need perform real rescan", K(group_rescan_cnt_), K(ctx_.get_das_ctx().get_group_rescan_cnt()), + K(group_id_), K(ctx_.get_das_ctx().get_skip_scan_group_id()), K(spec_.id_)); if (is_virtual_table(MY_SPEC.ref_table_id_) - || !das_ref_.is_all_local_task() + || (OB_NOT_NULL(scan_iter_) && !scan_iter_->is_all_local_task()) || (MY_SPEC.use_dist_das_ && nullptr != MY_CTDEF.das_dppr_tbl_)) { ret = close_and_reopen(); } else { ret = local_iter_rescan(); } + if (OB_SUCC(ret) && need_perform_real_batch_rescan()) { + LOG_TRACE("[group rescan] need perform real batch rescan"); + fold_iter_->init_group_range(0, tsc_rtdef_.bnlj_params_.at(0).gr_param_->count_); + } } + return ret; } + int ObTableScanOp::close_and_reopen() { int ret = OB_SUCCESS; iter_end_ = false; - if (OB_FAIL(inner_close())) { + if (OB_ISNULL(scan_iter_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr scan iter", K(ret)); + } else if (OB_FAIL(inner_close())) { LOG_WARN("fail to close op", K(ret)); } else if (OB_FAIL(reuse_table_rescan_allocator())) { LOG_WARN("reuse table rescan allocator failed", K(ret)); } else { - das_ref_.reuse(); need_final_limit_ = false; //in order to avoid memory expansion caused by repeatedly creating DAS Tasks, //stmt allocator uses DAS allocator in the reopen process - tsc_rtdef_.scan_rtdef_.stmt_allocator_.set_alloc(&das_ref_.get_das_alloc()); + tsc_rtdef_.scan_rtdef_.stmt_allocator_.set_alloc(scan_iter_->get_das_alloc()); tsc_rtdef_.scan_rtdef_.scan_allocator_.set_alloc(table_rescan_allocator_); MY_INPUT.key_ranges_.reuse(); MY_INPUT.ss_key_ranges_.reuse(); @@ -1631,7 +1788,10 @@ int ObTableScanOp::local_iter_rescan() { int ret = OB_SUCCESS; ObGranuleTaskInfo info; - if (OB_FAIL(get_access_tablet_loc(info))) { + if (OB_ISNULL(scan_iter_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr scan iter", K(ret)); + } else if (OB_FAIL(get_access_tablet_loc(info))) { LOG_WARN("fail to get access partition", K(ret)); } else if (OB_FAIL(local_iter_reuse())) { LOG_WARN("failed to reset query range", K(ret)); @@ -1643,36 +1803,40 @@ int ObTableScanOp::local_iter_rescan() OB_FAIL(prepare_scan_range())) { // prepare scan input param LOG_WARN("fail to prepare scan param", K(ret)); } else { - DASTaskIter task_iter = das_ref_.begin_task_iter(); + DASTaskIter task_iter = scan_iter_->begin_task_iter(); for (; OB_SUCC(ret) && !task_iter.is_end(); ++task_iter) { ObDASScanOp *scan_op = DAS_SCAN_OP(*task_iter); if (MY_SPEC.gi_above_) { if (!MY_SPEC.is_index_global_ && MY_CTDEF.lookup_ctdef_ != nullptr) { //is local index lookup, need to set the lookup ctdef to the das scan op - ObDASTableLoc *lookup_table_loc = tsc_rtdef_.lookup_rtdef_->table_loc_; - ObDASTabletLoc *lookup_tablet_loc = ObDASUtils::get_related_tablet_loc( - *MY_INPUT.tablet_loc_, MY_CTDEF.lookup_ctdef_->ref_table_id_); - if (OB_FAIL(scan_op->set_lookup_tablet_id(lookup_tablet_loc->tablet_id_))) { - LOG_WARN("set lookup tablet id failed", K(ret), KPC(lookup_tablet_loc)); + if (OB_FAIL(pushdown_normal_lookup_to_das(*scan_op))) { + LOG_WARN("pushdown normal lookup to das failed", K(ret)); + } + } + if (OB_SUCC(ret) && MY_CTDEF.attach_spec_.attach_ctdef_ != nullptr) { + if (OB_FAIL(pushdown_attach_task_to_das(*scan_op))) { + LOG_WARN("pushdown attach task to das failed", K(ret)); } } } if (OB_SUCC(ret)) { if (OB_FAIL(cherry_pick_range_by_tablet_id(scan_op))) { LOG_WARN("prune query range by partition id failed", K(ret)); - } else if (OB_FAIL(init_das_group_range(0, tsc_rtdef_.group_size_))) { - LOG_WARN("set group range failed", K(ret), K_(tsc_rtdef_.group_size)); - } else if (OB_FAIL(MTL(ObDataAccessService*)->rescan_das_task(das_ref_, *scan_op))) { + } else if (OB_FAIL(scan_iter_->rescan_das_task(scan_op))) { LOG_WARN("rescan das task failed", K(ret)); } } } - if (OB_SUCC(ret) && das_ref_.has_task()) { - //prepare to output row - scan_result_ = das_ref_.begin_result_iter(); - if (OB_FAIL(update_output_tablet_id())) { - LOG_WARN("update output row pkey failed", K(ret), KPC(scan_result_.get_tablet_loc())); - } + } + + if (OB_SUCC(ret)) { + if (in_batch_rescan_subplan()) { + // if the ancestor operator of TSC support batch rescan, update the group_id and batch rescan_cnt after perform a real-rescan + group_rescan_cnt_ = ctx_.get_das_ctx().get_group_rescan_cnt(); + group_id_ = ctx_.get_das_ctx().get_skip_scan_group_id(); + } + if (OB_FAIL(output_->set_merge_status(is_group_rescan() ? SORT_MERGE : SEQUENTIAL_MERGE))) { + LOG_WARN("failed to set merge status for das iter", K(ret)); } } return ret; @@ -1684,7 +1848,7 @@ int ObTableScanOp::local_iter_rescan() int ObTableScanOp::local_iter_reuse() { int ret = OB_SUCCESS; - for (DASTaskIter task_iter = das_ref_.begin_task_iter(); + for (DASTaskIter task_iter = scan_iter_->begin_task_iter(); !task_iter.is_end(); ++task_iter) { ObDASScanOp *scan_op = DAS_SCAN_OP(*task_iter); bool need_switch_param = (scan_op->get_tablet_loc() != MY_INPUT.tablet_loc_ && @@ -1712,75 +1876,92 @@ int ObTableScanOp::switch_iterator() return OB_NOT_SUPPORTED; } -bool ObTableScanOp::need_real_rescan() +int ObTableScanOp::check_need_real_rescan(bool &bret) { - bool bret = false; - if (!MY_SPEC.batch_scan_flag_) { + int ret = OB_SUCCESS; + bret = false; + const GroupParamArray* group_params_above = nullptr; + bool enable_group_rescan_test_mode = false; + enable_group_rescan_test_mode = (OB_SUCCESS != (OB_E(EventTable::EN_DAS_GROUP_RESCAN_TEST_MODE) OB_SUCCESS)); + if (OB_ISNULL(group_params_above = ctx_.get_das_ctx().get_group_params())) { bret = true; } else if (tsc_rtdef_.bnlj_params_.empty()) { //batch rescan not init, need to do real rescan bret = true; } else { - ObPhysicalPlanCtx *plan_ctx = GET_PHY_PLAN_CTX(ctx_); - int64_t param_idx = tsc_rtdef_.bnlj_params_.at(0).param_idx_; - //param store has been inited by nlj, to fetch next batch result - bret = plan_ctx->get_param_store().at(param_idx).is_ext_sql_array(); - } - return bret; -} - -int ObTableScanOp::switch_batch_iter() -{ - int ret = OB_SUCCESS; - for (DASTaskIter task_iter = das_ref_.begin_task_iter(); - OB_SUCC(ret) && !task_iter.is_end(); ++task_iter) { - ObDASGroupScanOp *group_scan_op = DAS_GROUP_SCAN_OP(*task_iter); - if (OB_FAIL(group_scan_op->switch_scan_group())) { - if (OB_ITER_END != ret) { - LOG_WARN("switch batch iter failed", K(ret)); + // the above operator of tsc support batch group rescan + if (group_rescan_cnt_ < ctx_.get_das_ctx().get_group_rescan_cnt()) { + // need perform batch rescan, the output of tsc is changed to fold_iter_ + if (ctx_.get_das_ctx().get_skip_scan_group_id() > 0) { + output_ = iter_tree_; + LOG_TRACE("[group rescan] skip read is found"); } else { - iter_end_ = true; + output_ = fold_iter_; + } + bret = true; + } else if (group_rescan_cnt_ == ctx_.get_das_ctx().get_group_rescan_cnt()) { + if (group_id_ < ctx_.get_das_ctx().get_skip_scan_group_id()) { + if (output_ == fold_iter_) { + bret = false; + } else { + bret = true; + } + } else if (group_id_ == ctx_.get_das_ctx().get_skip_scan_group_id()) { + // the sql paln like this: + // spf + // / \ + // tsc_1 px_partition_iterator + // \ + // tsc_2 + // if enable spf batch rescan in this paln, the rescan of tsc will called by px_partition_iterator, which need perform a real rescan + bret = true; + output_ = iter_tree_; + LOG_TRACE("[group rescan] gi rescan is supportted in batch rescan"); + } else { + if (enable_group_rescan_test_mode) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("the group id of tsc exceeds the group id of above operator", + K(ret), K(group_rescan_cnt_), K(group_id_), K(ctx_.get_das_ctx().get_skip_scan_group_id())); + } else { + bret = true; + output_ = iter_tree_; + LOG_TRACE("[group rescan] found unexpected group id", K(group_rescan_cnt_), K(group_id_), K(ctx_.get_das_ctx().get_skip_scan_group_id())); + } } - } - } - if (OB_SUCC(ret) && !iter_end_) { - if (!das_ref_.has_task()) { - iter_end_ = true; } else { - //prepare to output row - scan_result_ = das_ref_.begin_result_iter(); - if (OB_FAIL(update_output_tablet_id())) { - LOG_WARN("update output row pkey failed", K(ret), KPC(scan_result_.get_tablet_loc())); + if (enable_group_rescan_test_mode) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("the batch rescan count of tsc exceeds the batch count of above operator", + K(ret), K(group_rescan_cnt_), K(ctx_.get_das_ctx().get_group_rescan_cnt())); + + } else { + bret = true; + output_ = iter_tree_; + LOG_TRACE("[group rescan] found unexpected group rescan cnt", K(group_rescan_cnt_), K(ctx_.get_das_ctx().get_group_rescan_cnt())); } } } return ret; } +void ObTableScanOp::reset_iter_tree_for_rescan() +{ + if (OB_NOT_NULL(fold_iter_)) { + fold_iter_->reuse(); + } + if (iter_tree_->get_type() == DAS_ITER_LOOKUP) { + iter_tree_->reuse(); + } +} + int ObTableScanOp::set_batch_iter(int64_t group_id) { int ret = OB_SUCCESS; - for (DASTaskIter task_iter = das_ref_.begin_task_iter(); - OB_SUCC(ret) && !task_iter.is_end(); ++task_iter) { - ObDASGroupScanOp *group_scan_op = DAS_GROUP_SCAN_OP(*task_iter); - if (OB_FAIL(group_scan_op->set_scan_group(group_id))) { - if (OB_ITER_END != ret) { - LOG_WARN("switch batch iter failed", K(ret)); - } else { - iter_end_ = true; - } - } - } - if (OB_SUCC(ret) && !iter_end_) { - if (!das_ref_.has_task()) { - iter_end_ = true; - } else { - //prepare to output row - scan_result_ = das_ref_.begin_result_iter(); - if (OB_FAIL(update_output_tablet_id())) { - LOG_WARN("update output row pkey failed", K(ret), KPC(scan_result_.get_tablet_loc())); - } - } + if (!is_group_rescan()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("switch group with a null fold_iter", K(ret)); + } else { + ret = fold_iter_->set_scan_group(group_id); } return ret; } @@ -1801,15 +1982,9 @@ int ObTableScanOp::get_next_row_with_das() } while (OB_SUCC(ret) && !got_row) { clear_evaluated_flag(); - if (OB_FAIL(scan_result_.get_next_row())) { + if (OB_FAIL(output_->get_next_row())) { if (OB_ITER_END == ret) { - if (OB_FAIL(scan_result_.next_result())) { - if (OB_ITER_END != ret) { - LOG_WARN("fetch next task failed", K(ret)); - } - } else if (OB_FAIL(update_output_tablet_id())) { - LOG_WARN("update output row pkey failed", K(ret), K(scan_result_.get_tablet_loc()->tablet_id_)); - } + // do nothing. } else { LOG_WARN("get next row from das result failed", K(ret)); } @@ -1861,27 +2036,18 @@ int ObTableScanOp::get_next_batch_with_das(int64_t &count, int64_t capacity) // ObNewIterIterator::get_next_rows() may return rows too when got OB_ITER_END. // It's hard to use, we split it into two calls here since get_next_rows() is reentrant // when got OB_ITER_END. - ret = scan_result_.get_next_rows(count, batch_size); + ret = output_->get_next_rows(count, batch_size); if (OB_ITER_END == ret && count > 0) { ret = OB_SUCCESS; } if (OB_FAIL(ret)) { - if (OB_ITER_END == ret) { - input_row_cnt_ += count; - if (OB_FAIL(scan_result_.next_result())) { - if (OB_ITER_END != ret) { - LOG_WARN("fetch next task failed", K(ret)); - } - } else if (OB_FAIL(update_output_tablet_id())) { - LOG_WARN("update output row pkey failed", K(ret), K(scan_result_.get_tablet_loc()->tablet_id_)); - } - } else { + if (OB_ITER_END != ret) { LOG_WARN("get next batch from das result failed", K(ret)); } } else { // We need do filter first before do the limit. // See the issue 47201028. - if (need_final_limit_ && !MY_SPEC.filters_.empty() && count > 0) { + if (!MY_SPEC.filters_.empty() && count > 0) { bool all_filtered = false; if (OB_FAIL(filter_rows(MY_SPEC.filters_, *brs_.skip_, @@ -1901,7 +2067,8 @@ int ObTableScanOp::get_next_batch_with_das(int64_t &count, int64_t capacity) input_row_cnt_ += count; } } - } + } // while end + if (OB_SUCC(ret) && need_final_limit_) { batch_size = capacity; count = 0; @@ -1918,21 +2085,13 @@ int ObTableScanOp::get_next_batch_with_das(int64_t &count, int64_t capacity) // ObNewIterIterator::get_next_rows() may return rows too when got OB_ITER_END. // It's hard to use, we split it into two calls here since get_next_rows() is reentrant // when got OB_ITER_END. - ret = scan_result_.get_next_rows(count, batch_size); + ret = output_->get_next_rows(count, batch_size); brs_.all_rows_active_ = true; if (OB_ITER_END == ret && count > 0) { ret = OB_SUCCESS; } if (OB_FAIL(ret)) { - if (OB_ITER_END == ret) { - if (OB_FAIL(scan_result_.next_result())) { - if (OB_ITER_END != ret) { - LOG_WARN("fetch next task failed", K(ret)); - } - } else if (OB_FAIL(update_output_tablet_id())) { - LOG_WARN("update output row pkey failed", K(ret), K(scan_result_.get_tablet_loc()->tablet_id_)); - } - } else { + if (OB_ITER_END != ret) { LOG_WARN("get next batch from das result failed", K(ret)); } } else { @@ -1968,18 +2127,9 @@ int ObTableScanOp::get_next_batch_with_das(int64_t &count, int64_t capacity) int ObTableScanOp::inner_get_next_row_implement() { int ret = OB_SUCCESS; - if (OB_SUCC(ret) && MY_SPEC.is_global_index_back()) { - if (OB_ISNULL(global_index_lookup_op_)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments",K(ret)); - } else if (OB_FAIL(global_index_lookup_op_->get_next_row())) { - LOG_WARN("failed to get next batch",K(ret)); - } - } else { - if (OB_FAIL(inner_get_next_row_for_tsc())) { - if (OB_ITER_END != ret) { - LOG_WARN("failed to get next row",K(ret)); - } + if (OB_FAIL(inner_get_next_row_for_tsc())) { + if (OB_ITER_END != ret) { + LOG_WARN("failed to get next row", K(ret)); } } return ret; @@ -2033,7 +2183,7 @@ int ObTableScanOp::inner_get_next_row_for_tsc() LOG_WARN("add ddl column checksum failed", K(ret)); } } - if (OB_UNLIKELY(OB_ITER_END == ret && das_ref_.has_task())) { + if (OB_UNLIKELY(OB_ITER_END == ret && OB_NOT_NULL(scan_iter_) && scan_iter_->has_task())) { // ObIPartitionGroup *partition = NULL; // ObIPartitionGroupGuard *guard = NULL; // if (OB_ISNULL(guard)) { @@ -2041,7 +2191,7 @@ int ObTableScanOp::inner_get_next_row_for_tsc() // } else if (DAS_SCAN_OP->get_scan_param().main_table_scan_stat_.bf_access_cnt_ > 0) { // partition->feedback_scan_access_stat(DAS_SCAN_OP->get_scan_param()); // } - ObTableScanParam &scan_param = DAS_SCAN_OP(*das_ref_.begin_task_iter())->get_scan_param(); + ObTableScanParam &scan_param = DAS_SCAN_OP(*scan_iter_->begin_task_iter())->get_scan_param(); ObTableScanStat &table_scan_stat = GET_PHY_PLAN_CTX(ctx_)->get_table_scan_stat(); fill_table_scan_stat(scan_param.main_table_scan_stat_, table_scan_stat); if (MY_SPEC.should_scan_index() && scan_param.scan_flag_.index_back_) { @@ -2058,6 +2208,7 @@ int ObTableScanOp::inner_get_next_row_for_tsc() } return ret; } + int ObTableScanOp::inner_get_next_batch(const int64_t max_row_cnt) { int ret = OB_SUCCESS; @@ -2069,25 +2220,8 @@ int ObTableScanOp::inner_get_next_batch(const int64_t max_row_cnt) if (enable_random_output && max_row_cnt > 1) { gen_rand_size_and_skip_bits(max_row_cnt, rand_row_cnt, rand_append_bits); } - if (OB_SUCC(ret) && MY_SPEC.is_global_index_back()) { - int64_t count = 0; - if (OB_ISNULL(global_index_lookup_op_)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments",K(ret)); - } else { - global_index_lookup_op_->get_brs().size_ = brs_.size_ ; - global_index_lookup_op_->get_brs().end_ = brs_.end_; - if (OB_FAIL(global_index_lookup_op_->get_next_rows(count, rand_row_cnt))) { - LOG_WARN("failed to get next rows",K(ret), K(rand_row_cnt)); - } else { - brs_.size_ = global_index_lookup_op_->get_brs().size_; - brs_.end_ = global_index_lookup_op_->get_brs().end_; - } - } - } else { - if (OB_FAIL(inner_get_next_batch_for_tsc(rand_row_cnt))) { - LOG_WARN("failed to get next row",K(ret)); - } + if (OB_FAIL(inner_get_next_batch_for_tsc(rand_row_cnt))) { + LOG_WARN("failed to get next batch", K(ret)); } if (OB_SUCC(ret) && enable_random_output && !brs_.end_ @@ -2102,6 +2236,7 @@ int ObTableScanOp::inner_get_next_batch(const int64_t max_row_cnt) } return ret; } + int ObTableScanOp::inner_get_next_batch_for_tsc(const int64_t max_row_cnt) { int ret = OB_SUCCESS; @@ -2168,7 +2303,7 @@ int ObTableScanOp::inner_get_next_batch_for_tsc(const int64_t max_row_cnt) } } - if (OB_SUCC(ret) && brs_.end_ && das_ref_.has_task()) { + if (OB_SUCC(ret) && brs_.end_ && OB_NOT_NULL(scan_iter_) && scan_iter_->has_task()) { // ObIPartitionGroup *partition = NULL; // ObIPartitionGroupGuard *guard = NULL; // if (OB_ISNULL(guard)) { @@ -2176,7 +2311,7 @@ int ObTableScanOp::inner_get_next_batch_for_tsc(const int64_t max_row_cnt) // } else if (DAS_SCAN_OP->get_scan_param().main_table_scan_stat_.bf_access_cnt_ > 0) { // partition->feedback_scan_access_stat(DAS_SCAN_OP->get_scan_param()); // } - ObTableScanParam &scan_param = DAS_SCAN_OP(*das_ref_.begin_task_iter())->get_scan_param(); + ObTableScanParam &scan_param = DAS_SCAN_OP(*scan_iter_->begin_task_iter())->get_scan_param(); ObTableScanStat &table_scan_stat = GET_PHY_PLAN_CTX(ctx_)->get_table_scan_stat(); fill_table_scan_stat(scan_param.main_table_scan_stat_, table_scan_stat); if (MY_SPEC.should_scan_index() && scan_param.scan_flag_.index_back_) { @@ -2214,21 +2349,17 @@ OB_INLINE int ObTableScanOp::do_table_scan() int ret = OB_SUCCESS; need_init_before_get_row_ = false; lib::CompatModeGuard g(MY_SPEC.is_vt_mapping_ ? lib::Worker::CompatMode::MYSQL : lib::get_compat_mode()); - if (das_ref_.has_task()) { + if (OB_ISNULL(scan_iter_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr scan iter", K(ret)); + } else if (scan_iter_->has_task()) { //execute with das LOG_DEBUG("do table scan with DAS", K(MY_SPEC.ref_table_id_), K(MY_SPEC.table_loc_id_)); if (OB_FAIL(prepare_pushdown_limit_param())) { LOG_WARN("prepare pushdow limit param failed", K(ret)); - } else if (OB_FAIL(das_ref_.execute_all_task())) { + } else if (OB_FAIL(scan_iter_->do_table_scan())) { LOG_WARN("execute all das scan task failed", K(ret)); } - if (OB_SUCC(ret)) { - //prepare to output row - scan_result_ = das_ref_.begin_result_iter(); - if (OB_FAIL(update_output_tablet_id())) { - LOG_WARN("update output row pkey failed", K(ret), K(scan_result_.get_tablet_loc()->tablet_id_)); - } - } } else { iter_end_ = true; } @@ -2244,7 +2375,6 @@ int ObTableScanOp::cherry_pick_range_by_tablet_id(ObDASScanOp *scan_op) const ObIArray &input_ranges = MY_INPUT.key_ranges_; const ObIArray &input_ss_ranges = MY_INPUT.ss_key_ranges_; const ObIArray &input_filters = MY_INPUT.mbr_filters_; - ObDASGroupScanOp *batch_op = DAS_GROUP_SCAN_OP(scan_op); bool add_all = false; bool prune_all = true; if (!MY_SPEC.is_vt_mapping_ && OB_UNLIKELY(input_ranges.count() != input_ss_ranges.count())) { @@ -3067,14 +3197,14 @@ int ObTableScanOp::inner_get_next_spatial_index_row() int ObTableScanOp::init_spatial_index_rows() { int ret = OB_SUCCESS; - void *buf = ctx_.get_allocator().alloc(sizeof(ObSpatIndexRow)); + void *buf = ctx_.get_allocator().alloc(sizeof(ObDomainIndexRow)); void *mbr_buffer = ctx_.get_allocator().alloc(OB_DEFAULT_MBR_SIZE); void *obj_buf = ctx_.get_allocator().alloc(sizeof(ObObj) * 2 * SAPTIAL_INDEX_DEFAULT_ROW_COUNT); if (OB_ISNULL(buf) || OB_ISNULL(mbr_buffer) || OB_ISNULL(obj_buf)) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("allocate spatial row store failed", K(ret), K(buf), K(mbr_buffer)); } else { - spat_index_.spat_rows_ = new(buf) ObSpatIndexRow(); + spat_index_.spat_rows_ = new(buf) ObDomainIndexRow(); spat_index_.mbr_buffer_ = mbr_buffer; spat_index_.obj_buffer_ = obj_buf; } @@ -3133,579 +3263,5 @@ void ObTableScanOp::adjust_rand_output_brs(const int64_t rand_append_bits) brs_.all_rows_active_ = false; } -ObGlobalIndexLookupOpImpl::ObGlobalIndexLookupOpImpl(ObTableScanOp *table_scan_op) : - ObIndexLookupOpImpl(GLOBAL_INDEX, 10000 /*default_batch_row_count*/), - table_scan_op_(table_scan_op), - das_ref_(table_scan_op_->get_eval_ctx(), table_scan_op_->get_exec_ctx()), lookup_result_(), - lookup_memctx_() -{ -} - -int ObGlobalIndexLookupOpImpl::open() -{ - int ret = OB_SUCCESS; - ObSqlCtx *sql_ctx = NULL; - ObSQLSessionInfo *my_session = GET_MY_SESSION(table_scan_op_->get_exec_ctx()); - if (OB_ISNULL(sql_ctx = table_scan_op_->get_exec_ctx().get_sql_ctx()) - || OB_ISNULL(sql_ctx->schema_guard_) - || OB_ISNULL(get_calc_part_id_expr())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("invalid argument", K(ret), KP(sql_ctx), KP(get_calc_part_id_expr())); - } else { - ObMemAttr mem_attr; - mem_attr.tenant_id_ = my_session->get_effective_tenant_id(); - mem_attr.label_ = ObModIds::OB_SQL_TABLE_LOOKUP; - das_ref_.set_mem_attr(mem_attr); - das_ref_.set_expr_frame_info(&table_scan_op_->get_spec().plan_->get_expr_frame_info()); - } - if (OB_SUCC(ret)) { - table_scan_op_->das_ref_.set_lookup_iter(&lookup_result_); - } - if (OB_SUCC(ret) && OB_ISNULL(lookup_memctx_)) { - lib::ContextParam param; - param.set_mem_attr(my_session->get_effective_tenant_id(), - ObModIds::OB_SQL_TABLE_LOOKUP, ObCtxIds::DEFAULT_CTX_ID) - .set_properties(lib::USE_TL_PAGE_OPTIONAL); - if (OB_FAIL(CURRENT_CONTEXT->CREATE_CONTEXT(lookup_memctx_, param))) { - LOG_WARN("create lookup mem context entity failed", K(ret)); - } else { - table_scan_op_->tsc_rtdef_.lookup_rtdef_->scan_allocator_.set_alloc(&lookup_memctx_->get_arena_allocator()); - table_scan_op_->tsc_rtdef_.lookup_rtdef_->stmt_allocator_.set_alloc(&lookup_memctx_->get_arena_allocator()); - } - } - LOG_DEBUG("open table lookup", K(table_scan_op_->get_spec())); - return ret; -} - -int ObGlobalIndexLookupOpImpl::close() -{ - int ret = OB_SUCCESS; - if (OB_FAIL(das_ref_.close_all_task())) { - LOG_WARN("close all das task failed", K(ret)); - } - return ret; -} - -int ObGlobalIndexLookupOpImpl::rescan() -{ - int ret = OB_SUCCESS; - if (OB_ISNULL(table_scan_op_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("table scan op not init", K(ret)); - } else if (!table_scan_op_->need_real_rescan() && lookup_group_cnt_ < index_group_cnt_) { - LOG_DEBUG("rescan in group lookup, only need to switch iterator", - K(lookup_group_cnt_), K(index_group_cnt_)); - if (OB_FAIL(table_scan_op_->build_bnlj_params())) { - LOG_WARN("build batch nlj params failed", K(ret)); - } else if (OB_FAIL(switch_lookup_result_iter())) { - LOG_WARN("switch lookup result iter failed", K(ret)); - } - } else if (OB_FAIL(das_ref_.close_all_task())) { - LOG_WARN("failed to close all das task", K(ret)); - } else if (OB_FAIL(table_scan_op_->inner_rescan_for_tsc())) { - LOG_WARN("rescan operator failed", K(ret)); - } else { - reset_for_rescan(); - } -#ifndef NDEBUG - OX(OB_ASSERT(false == brs_.end_)); -#endif - return ret; -} - -int ObGlobalIndexLookupOpImpl::get_next_row_from_index_table() -{ - int ret = OB_SUCCESS; - bool got_row = false; - do { - if (OB_FAIL(table_scan_op_->inner_get_next_row_for_tsc())) { - if (OB_ITER_END != ret) { - LOG_WARN("get next row from child failed", K(ret)); - } else if (get_batch_rescan()) { - //switch to next index iterator, call child's rescan - if (OB_FAIL(table_scan_op_->inner_rescan_for_tsc())) { - if (OB_ITER_END != ret) { - LOG_WARN("rescan index operator failed", K(ret)); - } - } else { - inc_index_group_cnt(); - LOG_DEBUG("switch to next index batch to fetch rowkey", K(get_index_group_cnt()), K(lookup_rowkey_cnt_)); - } - } - } else { - got_row = true; - } - } while (OB_SUCC(ret)&& !got_row) ; - return ret; -} - -int ObGlobalIndexLookupOpImpl::process_data_table_rowkey() -{ - int ret = OB_SUCCESS; - ObObjectID partition_id = ObExprCalcPartitionId::NONE_PARTITION_ID; - ObTabletID tablet_id; - ObDASScanOp *das_scan_op = nullptr; - ObDASTabletLoc *tablet_loc = nullptr; - - ObDASScanRtDef *lookup_rtdef = table_scan_op_->tsc_rtdef_.lookup_rtdef_; - ObDASCtx &das_ctx = DAS_CTX(table_scan_op_->get_exec_ctx()); - if (OB_FAIL(ObExprCalcPartitionBase::calc_part_and_tablet_id(get_calc_part_id_expr(), table_scan_op_->get_eval_ctx(), partition_id, tablet_id))) { - LOG_WARN("fail to calc part id", K(ret), KPC(get_calc_part_id_expr())); - } else if (OB_FAIL(das_ctx.extended_tablet_loc(*lookup_rtdef->table_loc_, tablet_id, tablet_loc))) { - LOG_WARN("pkey to tablet loc failed", K(ret)); - } else if (OB_UNLIKELY(!has_das_scan_op(tablet_loc, das_scan_op))) { - ObDASOpType op_type = get_batch_rescan() ? DAS_OP_TABLE_BATCH_SCAN : DAS_OP_TABLE_SCAN; - ObIDASTaskOp *tmp_op = nullptr; - if (OB_FAIL(das_ref_.create_das_task(tablet_loc, op_type, tmp_op))) { - LOG_WARN("prepare das task failed", K(ret)); - } else { - das_scan_op = static_cast(tmp_op); - das_scan_op->set_scan_ctdef(get_lookup_ctdef()); - das_scan_op->set_scan_rtdef(lookup_rtdef); - das_scan_op->set_can_part_retry(table_scan_op_->can_partition_retry()); - } - } - if (OB_SUCC(ret)) { - storage::ObTableScanParam &scan_param = das_scan_op->get_scan_param(); - ObNewRange lookup_range; - if (OB_FAIL(build_data_table_range(lookup_range))) { - LOG_WARN("build data table range failed", K(ret), KPC(tablet_loc)); - } else if (OB_FAIL(scan_param.key_ranges_.push_back(lookup_range))) { - LOG_WARN("store lookup key range failed", K(ret), K(scan_param)); - } else { - scan_param.is_get_ = true; - } - } - - if (OB_SUCC(ret) && get_lookup_ctdef()->trans_info_expr_ != nullptr) { - void *buf = nullptr; - ObDatum *datum_ptr = nullptr; - if (OB_FAIL(build_trans_datum(get_lookup_ctdef()->trans_info_expr_, - &(table_scan_op_->get_eval_ctx()), - lookup_memctx_->get_arena_allocator(), - datum_ptr))) { - - } else if (OB_ISNULL(datum_ptr)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected null", K(ret)); - } else if (OB_FAIL(das_scan_op->trans_info_array_.push_back(datum_ptr))) { - LOG_WARN("fail to push back trans info array", K(ret), KPC(datum_ptr)); - } - } - return ret; -} - -int ObGlobalIndexLookupOpImpl::process_data_table_rowkeys(const int64_t size, const ObBitVector *skip) -{ - int ret = OB_SUCCESS; - ObEvalCtx::BatchInfoScopeGuard batch_info_guard(table_scan_op_->get_eval_ctx()); - batch_info_guard.set_batch_size(size); - for (auto i = 0; OB_SUCC(ret) && i < size; i++) - { - if (skip->at(i)) { - continue; - } - batch_info_guard.set_batch_idx(i); - if (OB_FAIL(process_data_table_rowkey())) { - LOG_WARN("Failed to process_data_table_rowkey", K(ret), K(i)); - } else { - ++lookup_rowkey_cnt_; - } - } - return ret; -} - -int ObGlobalIndexLookupOpImpl::init_group_range(int64_t cur_group_idx, int64_t group_size) -{ - int ret = OB_SUCCESS; - if (get_batch_rescan()) { - for (DASTaskIter task_iter = das_ref_.begin_task_iter(); !task_iter.is_end(); ++task_iter) { - ObDASGroupScanOp *group_op = static_cast(*task_iter); - group_op->init_group_range(cur_group_idx, group_size); - LOG_DEBUG("set group info", - "scan_range", group_op->get_scan_param().key_ranges_, - K(*group_op)); - } - } - return ret; -} - -int ObGlobalIndexLookupOpImpl::do_index_lookup() -{ - int ret = das_ref_.execute_all_task(); - if (OB_SUCC(ret)) { - lookup_result_ = das_ref_.begin_result_iter(); - } - return ret; -} - -int ObGlobalIndexLookupOpImpl::get_next_row_from_data_table() -{ - int ret = OB_SUCCESS; - bool got_row = false; - if (OB_UNLIKELY(lookup_result_.is_end())) { - ret = OB_ITER_END; - LOG_DEBUG("lookup task is empty", K(ret)); - } - ObDASScanRtDef *lookup_rtdef = table_scan_op_->tsc_rtdef_.lookup_rtdef_; - while (OB_SUCC(ret) && !got_row) { - lookup_rtdef->p_pd_expr_op_->clear_datum_eval_flag(); - if (OB_FAIL(lookup_result_.get_next_row())) { - if (OB_ITER_END == ret) { - if (OB_FAIL(lookup_result_.next_result())) { - if (OB_ITER_END != ret) { - LOG_WARN("fetch next task result failed", K(ret)); - } - } - } else { - LOG_WARN("get next row from das result failed", K(ret)); - } - } else { - got_row = true; - } - } - return ret; -} - -int ObGlobalIndexLookupOpImpl::get_next_rows_from_data_table(int64_t &count, int64_t capacity) -{ - int ret = OB_SUCCESS; - UNUSED(count); - int64_t batch_size = min(capacity, table_scan_op_->get_tsc_spec().max_batch_size_); - if (OB_FAIL(get_next_data_table_rows(brs_.size_, batch_size))) { - if (OB_ITER_END == ret) { - LOG_DEBUG("get next data table rows return ITER_END", K(ret)); - } else { - LOG_WARN("look up get next row failed", K(ret)); - } - } - return ret; -} - -int ObGlobalIndexLookupOpImpl::process_next_index_batch_for_row() -{ - int ret = OB_SUCCESS; - if (OB_FAIL(check_lookup_row_cnt())) { - LOG_WARN("check lookup row cnt failed", K(ret)); - } else if (need_next_index_batch()) { - if (OB_FAIL(das_ref_.close_all_task())) { - LOG_WARN("close all das task failed", K(ret)); - } else { - state_ = INDEX_SCAN; - das_ref_.reuse(); - index_end_ = false; - if (OB_SUCC(ret) && lookup_memctx_ != nullptr) { - lookup_memctx_->reset_remain_one_page(); - } - } - } else { - state_ = FINISHED; - } - return ret; -} - -int ObGlobalIndexLookupOpImpl::process_next_index_batch_for_rows(int64_t &count) -{ - int ret = OB_SUCCESS; - UNUSED(count); - if (OB_FAIL(check_lookup_row_cnt())) { - LOG_WARN("check lookup row cnt failed", K(ret)); - } else if (need_next_index_batch()) { // index search does not reach end, continue index scan - state_ = INDEX_SCAN; - if (OB_FAIL(das_ref_.close_all_task())) { - LOG_WARN("close all das task failed", K(ret)); - } else { - das_ref_.reuse(); - if (OB_SUCC(ret) && lookup_memctx_ != nullptr) { - lookup_memctx_->reset_remain_one_page(); - } - } - } else { - state_ = FINISHED; - } - return ret; -} - -bool ObGlobalIndexLookupOpImpl::need_next_index_batch() const -{ - bool bret = false; - if (!get_batch_rescan()) { - bret = !index_end_; - } else if (lookup_group_cnt_ >= index_group_cnt_) { - bret = !index_end_; - } - return bret; -} - -int ObGlobalIndexLookupOpImpl::check_lookup_row_cnt() -{ - int ret = OB_SUCCESS; - ObSQLSessionInfo *my_session = GET_MY_SESSION(table_scan_op_->get_exec_ctx()); - if (GCONF.enable_defensive_check() - && get_lookup_ctdef()->pd_expr_spec_.pushdown_filters_.empty()) { - if (OB_UNLIKELY(lookup_rowkey_cnt_ != lookup_row_cnt_) - && index_group_cnt_ == lookup_group_cnt_) { - ret = OB_ERR_DEFENSIVE_CHECK; - ObString func_name = ObString::make_string("check_lookup_row_cnt"); - LOG_USER_ERROR(OB_ERR_DEFENSIVE_CHECK, func_name.length(), func_name.ptr()); - LOG_ERROR("Fatal Error!!! Catch a defensive error!", - K(ret), K_(lookup_rowkey_cnt), K_(lookup_row_cnt), - "index_group_cnt", get_index_group_cnt(), - "lookup_group_cnt", get_lookup_group_cnt(), - "index_table_id", table_scan_op_->get_tsc_spec().get_ref_table_id(), - K(DAS_CTX(table_scan_op_->get_exec_ctx()).get_snapshot()), - KPC(my_session->get_tx_desc())); - //now to dump lookup das task info - int64_t rownum = 0; - for (DASTaskIter task_iter = das_ref_.begin_task_iter(); !task_iter.is_end(); ++task_iter) { - ObDASScanOp *das_op = static_cast(*task_iter); - if (das_op->trans_info_array_.count() == das_op->get_scan_param().key_ranges_.count()) { - for (int64_t i = 0; i < das_op->trans_info_array_.count(); i++) { - rownum++; - ObDatum *datum = das_op->trans_info_array_.at(i); - LOG_ERROR("dump TableLookup DAS Task range and trans_info", - K(rownum), KPC(datum), - K(das_op->get_scan_param().key_ranges_.at(i)), - K(das_op->get_tablet_id())); - } - } else { - for (int64_t i = 0; i < das_op->get_scan_param().key_ranges_.count(); i++) { - rownum++; - LOG_ERROR("dump TableLookup DAS Task range", - K(rownum), - K(das_op->get_scan_param().key_ranges_.at(i)), - K(das_op->get_tablet_id())); - } - } - } - } - } - - int simulate_error = EVENT_CALL(EventTable::EN_DAS_SIMULATE_DUMP_WRITE_BUFFER); - if (0 != simulate_error) { - for (DASTaskIter task_iter = das_ref_.begin_task_iter(); !task_iter.is_end(); ++task_iter) { - ObDASScanOp *das_op = static_cast(*task_iter); - for (int64_t i = 0; i < das_op->trans_info_array_.count(); i++) { - ObDatum *datum = das_op->trans_info_array_.at(i); - LOG_INFO("dump TableLookup DAS Task trans info", K(i), - KPC(das_op->trans_info_array_.at(i)), - K(das_op->get_scan_param().key_ranges_.at(i)), - K(das_op->get_tablet_id())); - } - } - } - - return ret; -} - -int ObGlobalIndexLookupOpImpl::do_index_table_scan_for_rows(const int64_t max_row_cnt, - const int64_t start_group_idx, - const int64_t default_row_batch_cnt) -{ - int ret = OB_SUCCESS; - const ObBatchRows* child_brs = &table_scan_op_->get_brs(); - int64_t batch_size = common::min(max_row_cnt, table_scan_op_->get_tsc_spec().max_batch_size_); - while (OB_SUCC(ret) && lookup_rowkey_cnt_ < default_row_batch_cnt) { - if (OB_NOT_NULL(child_brs->skip_)) { - child_brs->skip_->reset(table_scan_op_->get_spec().max_batch_size_ > 0? table_scan_op_->get_spec().max_batch_size_ : 1); - } - int64_t rowkey_batch_size = min(batch_size, default_row_batch_cnt - lookup_rowkey_cnt_); - if (OB_FAIL(table_scan_op_->inner_get_next_batch_for_tsc(rowkey_batch_size))) { - LOG_WARN("get next row from child failed", K(ret)); - } else if (child_brs->size_ == 0 && child_brs->end_) { - if (get_batch_rescan()) { - if (OB_FAIL(table_scan_op_->inner_rescan_for_tsc())) { - if (OB_ITER_END != ret) { - LOG_WARN("rescan index operator failed", K(ret)); - } else { - ret = OB_SUCCESS; - index_end_ = true; - break; - } - } else { - inc_index_group_cnt(); - LOG_DEBUG("switch to next index batch to fetch rowkey", K(index_group_cnt_), K(lookup_rowkey_cnt_)); - } - } else { - // index scan is finished, go to lookup stage - index_end_ = true; - break; - } - } else { - // critical path: no child_brs sanity check - index_end_ = (true == child_brs->end_); - table_scan_op_->clear_evaluated_flag(); - if (OB_FAIL(process_data_table_rowkeys(child_brs->size_, child_brs->skip_))) { - LOG_WARN("process data table rowkey with das failed", K(ret)); - } - } - } - if (OB_SUCC(ret)) { - state_ = DO_LOOKUP; - OZ(init_group_range(start_group_idx, index_group_cnt_)); - } - LOG_DEBUG("index scan end", KPC(child_brs), K(index_end_), K(index_group_cnt_), K(lookup_rowkey_cnt_), K(lookup_group_cnt_), K(lookup_row_cnt_)); - return ret; -} - -void ObGlobalIndexLookupOpImpl::update_state_in_output_rows_state(int64_t &count) -{ - UNUSED(count); - brs_.end_ = false; -} - -void ObGlobalIndexLookupOpImpl::update_states_in_finish_state() -{ - brs_.end_ = true; -} - -void ObGlobalIndexLookupOpImpl::reset_for_rescan() -{ - if (lookup_memctx_ != nullptr) { - lookup_memctx_->reset_remain_one_page(); - } - state_ = INDEX_SCAN; - das_ref_.reuse(); - index_end_ = false; - index_group_cnt_ = 1; - lookup_group_cnt_ = 1; -} - -int ObGlobalIndexLookupOpImpl::build_data_table_range(ObNewRange &lookup_range) -{ - int ret = OB_SUCCESS; - int64_t rowkey_cnt = table_scan_op_->get_tsc_ctdef().global_index_rowkey_exprs_.count(); - ObObj *obj_ptr = nullptr; - void *buf = nullptr; - if (OB_ISNULL(buf = lookup_memctx_->get_arena_allocator().alloc(sizeof(ObObj) * rowkey_cnt))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("allocate buffer failed", K(ret), K(rowkey_cnt)); - } else { - obj_ptr = new(buf) ObObj[rowkey_cnt]; - } - for (int64_t i = 0; OB_SUCC(ret) && i < rowkey_cnt; ++i) { - ObObj tmp_obj; - ObExpr *expr = table_scan_op_->get_tsc_ctdef().global_index_rowkey_exprs_.at(i); - ObDatum &col_datum = expr->locate_expr_datum(table_scan_op_->get_eval_ctx()); - if (OB_FAIL(col_datum.to_obj(tmp_obj, expr->obj_meta_, expr->obj_datum_map_))) { - LOG_WARN("convert datum to obj failed", K(ret)); - } else if (OB_FAIL(ob_write_obj(lookup_memctx_->get_arena_allocator(), tmp_obj, obj_ptr[i]))) { - LOG_WARN("deep copy rowkey value failed", K(ret), K(tmp_obj)); - } - } - if (OB_SUCC(ret)) { - ObRowkey table_rowkey(obj_ptr, rowkey_cnt); - uint64_t ref_table_id = get_lookup_ctdef()->ref_table_id_; - if (OB_FAIL(lookup_range.build_range(ref_table_id, table_rowkey))) { - LOG_WARN("build lookup range failed", K(ret), K(ref_table_id), K(table_rowkey)); - } else { - lookup_range.group_idx_ = index_group_cnt_ - 1; - } - LOG_DEBUG("build data table range", K(ret), K(table_rowkey), K(lookup_range)); - } - return ret; -} - -int ObGlobalIndexLookupOpImpl::switch_lookup_result_iter() -{ - int ret = OB_SUCCESS; - for (DASTaskIter task_iter = das_ref_.begin_task_iter(); - OB_SUCC(ret) && !task_iter.is_end(); ++task_iter) { - ObDASGroupScanOp *batch_op = static_cast(*task_iter); - if (OB_FAIL(batch_op->switch_scan_group())) { - if (OB_ITER_END != ret) { - LOG_WARN("switch batch iter failed", K(ret)); - } - } - } - if (OB_SUCC(ret)) { - ++lookup_group_cnt_; - state_ = OUTPUT_ROWS; - lookup_result_ = das_ref_.begin_result_iter(); - } - return ret; -} - -bool ObGlobalIndexLookupOpImpl::has_das_scan_op(const ObDASTabletLoc *tablet_loc, ObDASScanOp *&das_op) -{ - if (get_batch_rescan()) { - das_op = static_cast( - das_ref_.find_das_task(tablet_loc, DAS_OP_TABLE_BATCH_SCAN)); - } else { - das_op = static_cast( - das_ref_.find_das_task(tablet_loc, DAS_OP_TABLE_SCAN)); - } - return das_op != nullptr; -} - -int ObGlobalIndexLookupOpImpl::get_next_data_table_rows(int64_t &count,int64_t capacity) -{ - int ret = OB_SUCCESS; - bool got_rows = false; - if (OB_UNLIKELY(lookup_result_.is_end())) { - ret = OB_ITER_END; - LOG_DEBUG("lookup task is empty", K(ret)); - } - while (OB_SUCC(ret) && !got_rows) { - table_scan_op_->clear_evaluated_flag(); - ret = lookup_result_.get_next_rows(count, capacity); - if (OB_ITER_END == ret && count > 0) { - got_rows = true; - ret = OB_SUCCESS; - } - if (OB_FAIL(ret)) { - if (OB_ITER_END == ret) { - if (OB_FAIL(lookup_result_.next_result())) { - if (OB_ITER_END != ret) { - LOG_WARN("fetch next task result failed", K(ret)); - } else { - // do nothing, just return OB_ITER_END to notify the caller das scan - // reach end - LOG_DEBUG("das_ref_ reach end, stop lookup table"); - } - } - } else { - LOG_WARN("get next row from das result failed", K(ret)); - } - } else if (count == 0) { - if (OB_FAIL(lookup_result_.next_result())) { - if (OB_ITER_END != ret) { - LOG_WARN("fetch next task failed", K(ret)); - } else { - // do nothing, just return OB_ITER_END to notify the caller das scan - // reach end - LOG_DEBUG("das_ref_ reach end, stop lookup table"); - } - } - } else { - got_rows = true; - } - } - if (OB_SUCC(ret) && got_rows) { - lookup_row_cnt_ += count; - } - return ret; -} - -int ObGlobalIndexLookupOpImpl::reset_brs() -{ - int ret = OB_SUCCESS; - brs_.size_ = 0; - brs_.end_ = false; - return ret; -} - -void ObGlobalIndexLookupOpImpl::destroy() -{ - state_ = FINISHED; - index_end_ = true; - das_ref_.reset(); - if (lookup_memctx_ != nullptr) { - lookup_memctx_->reset_remain_one_page(); - DESTROY_CONTEXT(lookup_memctx_); - lookup_memctx_ = nullptr; - } -} - } // end namespace sql } // end namespace oceanbase diff --git a/src/sql/engine/table/ob_table_scan_op.h b/src/sql/engine/table/ob_table_scan_op.h index f824583f23..dada33d49c 100644 --- a/src/sql/engine/table/ob_table_scan_op.h +++ b/src/sql/engine/table/ob_table_scan_op.h @@ -24,9 +24,15 @@ #include "sql/das/ob_das_ref.h" #include "sql/das/ob_data_access_service.h" #include "sql/das/ob_das_scan_op.h" +#include "sql/das/ob_text_retrieval_op.h" #include "sql/das/ob_das_attach_define.h" +#include "sql/das/ob_text_retrieval_op.h" #include "sql/engine/basic/ob_pushdown_filter.h" #include "sql/engine/table/ob_index_lookup_op_impl.h" +#include "sql/das/iter/ob_das_iter.h" +#include "sql/das/iter/ob_das_merge_iter.h" +#include "sql/das/iter/ob_das_group_fold_iter.h" + namespace oceanbase { namespace common @@ -71,7 +77,7 @@ public: obj_buffer_(nullptr) {} ~ObSpatialIndexCache() {}; - ObSpatIndexRow *spat_rows_; + ObDomainIndexRow *spat_rows_; uint8_t spat_row_index_; void *mbr_buffer_; void *obj_buffer_; @@ -154,6 +160,7 @@ public: scan_ctdef_.access_column_ids_; } int allocate_dppr_table_loc(); + ObDASScanCtDef *get_lookup_ctdef(); TO_STRING_KV(K_(pre_query_range), K_(flashback_item), K_(bnlj_param_idxs), @@ -163,7 +170,8 @@ public: KPC_(lookup_loc_meta), KPC_(das_dppr_tbl), KPC_(calc_part_id_expr), - K_(global_index_rowkey_exprs)); + K_(global_index_rowkey_exprs), + K_(attach_spec)); //the query range of index scan/table scan ObQueryRange pre_query_range_; FlashBackItem flashback_item_; @@ -202,7 +210,8 @@ struct ObTableScanRtDef range_buffers_(nullptr), range_buffer_idx_(0), group_size_(0), - max_group_size_(0) + max_group_size_(0), + attach_rtinfo_(nullptr) { } void prepare_multi_part_limit_param(); @@ -222,6 +231,7 @@ struct ObTableScanRtDef // for equal_query_range opt end int64_t group_size_; int64_t max_group_size_; + ObDASAttachRtInfo *attach_rtinfo_; }; // table scan operator input @@ -440,13 +450,15 @@ protected: int reassign_task_ranges(ObGranuleTaskInfo &info); int local_iter_reuse(); - int switch_batch_iter(); int set_batch_iter(int64_t group_id); + void reset_iter_tree_for_rescan(); + bool is_group_rescan() const { return OB_NOT_NULL(output_) && output_ == fold_iter_; } int calc_expr_int_value(const ObExpr &expr, int64_t &retval, bool &is_null_value); int init_table_scan_rtdef(); int init_das_scan_rtdef(const ObDASScanCtDef &das_ctdef, ObDASScanRtDef &das_rtdef, const ObDASTableLocMeta *loc_meta); + int init_attach_scan_rtdef(const ObDASBaseCtDef *attach_ctdef, ObDASBaseRtDef *&attach_rtdef); int prepare_scan_range(); int prepare_batch_scan_range(); int build_bnlj_params(); @@ -483,14 +495,30 @@ protected: int fill_generated_cellid_mbr(const ObObj &cellid, const ObObj &mbr); int inner_get_next_spatial_index_row(); int init_spatial_index_rows(); + void set_real_rescan_cnt(int64_t real_rescan_cnt) { group_rescan_cnt_ = real_rescan_cnt; } + int64_t get_real_rescan_cnt() { return group_rescan_cnt_; } + // in_batch_rescan_subplan means the ancestor operator(NLJ/SPF) of TSC uses bacth rescan + bool in_batch_rescan_subplan() + { + return !tsc_rtdef_.bnlj_params_.empty(); + } + // because of adptive batch rescan in TSC, TSC may performs single-line rescan in some scenarios; + // need_perform_real_batch_rescan means TSC needs perform a real batch rescan in the adaptive batch-rescan process + // and the return value changes during execution + bool need_perform_real_batch_rescan() + { + return (OB_NOT_NULL(fold_iter_) && output_ == fold_iter_); + } protected: int prepare_das_task(); int prepare_all_das_tasks(); int prepare_pushdown_limit_param(); bool has_das_scan_op(const ObDASTabletLoc *tablet_loc, ObDASScanOp *&das_op); - int init_das_group_range(const int64_t cur_group_idx, const int64_t group_size); int create_one_das_task(ObDASTabletLoc *tablet_loc); + int pushdown_normal_lookup_to_das(ObDASScanOp &target_op); + int pushdown_attach_task_to_das(ObDASScanOp &target_op); + int attach_related_taskinfo(ObDASScanOp &target_op, ObDASBaseRtDef *attach_rtdef); int do_table_scan(); int get_next_row_with_das(); bool need_init_checksum(); @@ -502,6 +530,7 @@ protected: int get_next_batch_with_das(int64_t &count, int64_t capacity); void replace_bnlj_param(int64_t batch_idx); bool need_real_rescan(); + int check_need_real_rescan(bool &bret); static int check_is_physical_rowid(ObIAllocator &allocator, ObRowkey &row_key, bool &is_physical_rowid, @@ -592,7 +621,6 @@ private: void adjust_rand_output_brs(const int64_t rand_skip_bits); protected: - ObDASRef das_ref_; DASOpResultIter scan_result_; ObTableScanRtDef tsc_rtdef_; bool need_final_limit_; @@ -619,71 +647,26 @@ protected: int64_t scan_task_id_; bool report_checksum_; bool in_rescan_; - ObGlobalIndexLookupOpImpl *global_index_lookup_op_; ObSpatialIndexCache spat_index_; + + // output_ is used to output data, TSC operator directly invokes output_::get_next_row(s), + // it points to fold_iter_ in group rescan and iter_tree_ in normal scan. + ObDASIter *output_; + + // fold_iter_ is used for group rescan, it folds the output of iter_tree_ according to group_idx. + ObDASGroupFoldIter *fold_iter_; + + // iter_tree_ is used to produce data, + // for table scan and local index lookup: + // iter_tree_ and scan_iter_ are the same, both refer to a ObDASMergeIter, + // for global index lookup: + // iter_tree_ refers to a ObDASLookupIter for lookup and scan_iter_ refers to a ObDASMergeIter for index scan. + ObDASIter *iter_tree_; + ObDASMergeIter *scan_iter_; + int64_t group_rescan_cnt_; + int64_t group_id_; }; -class ObGlobalIndexLookupOpImpl : public ObIndexLookupOpImpl -{ -public: - ObGlobalIndexLookupOpImpl(ObTableScanOp *table_scan_op); - int open(); - int close(); - int rescan(); - void destroy(); - ObBatchRows& get_brs() {return brs_;} -private: - OB_INLINE ObExpr* get_calc_part_id_expr() { return table_scan_op_->get_tsc_ctdef().calc_part_id_expr_; } - OB_INLINE ObDASTableLocMeta* get_loc_meta() { return table_scan_op_->get_tsc_ctdef().lookup_loc_meta_; } - OB_INLINE const ObDASScanCtDef* get_lookup_ctdef() { return table_scan_op_->get_tsc_ctdef().lookup_ctdef_; } - OB_INLINE bool get_batch_rescan() const { return table_scan_op_->get_tsc_spec().batch_scan_flag_; } -public: - virtual void do_clear_evaluated_flag() override { table_scan_op_->clear_evaluated_flag(); } - virtual int get_next_row_from_index_table() override; - virtual int process_data_table_rowkey() override; - virtual int process_data_table_rowkeys(const int64_t size, const ObBitVector *skip) override; - virtual bool is_group_scan() const override {return true;} - virtual int init_group_range(int64_t cur_group_idx, int64_t group_size) override; - virtual int do_index_lookup() override; - virtual int get_next_row_from_data_table() override; - virtual int get_next_rows_from_data_table(int64_t &count, int64_t capacity) override; - virtual int process_next_index_batch_for_row() override; - virtual int process_next_index_batch_for_rows(int64_t &count) override; - virtual bool need_next_index_batch() const override; - virtual int check_lookup_row_cnt() override; - virtual int do_index_table_scan_for_rows(const int64_t max_row_cnt, - const int64_t start_group_idx, - const int64_t default_row_batch_cnt) override; - virtual void update_state_in_output_rows_state(int64_t &count) override; - virtual void update_states_in_finish_state() override; - virtual void update_states_after_finish_state() override {brs_.end_ = true;} - - // The following function distinguishes between the global index back and the local index back. - // For Local index, it will return 0 - // For Global index, it will return the property - virtual int64_t get_index_group_cnt() const override {return index_group_cnt_;} - virtual int64_t get_lookup_group_cnt() const override {return lookup_group_cnt_;} - virtual void inc_index_group_cnt() override {index_group_cnt_++;} - virtual void inc_lookup_group_cnt() override {lookup_group_cnt_++;} - virtual ObEvalCtx & get_eval_ctx() override {return table_scan_op_->get_eval_ctx();} - virtual const ExprFixedArray & get_output_expr() override {return table_scan_op_->get_tsc_ctdef().get_das_output_exprs(); } - - void reset_for_rescan(); - int build_data_table_range(common::ObNewRange &lookup_range); - int switch_lookup_result_iter(); - bool has_das_scan_op(const ObDASTabletLoc *tablet_loc, ObDASScanOp *&das_op); - int get_next_data_table_rows(int64_t &count, const int64_t capacity); - int reset_brs(); -private: - ObTableScanOp *table_scan_op_; - - ObDASRef das_ref_; - DASOpResultIter lookup_result_; - ObBatchRows brs_; - lib::MemoryContext lookup_memctx_; -private: - DISALLOW_COPY_AND_ASSIGN(ObGlobalIndexLookupOpImpl); -}; } // end namespace sql } // end namespace oceanbase diff --git a/src/sql/ob_sql_define.h b/src/sql/ob_sql_define.h index d0afb29c96..9645133372 100644 --- a/src/sql/ob_sql_define.h +++ b/src/sql/ob_sql_define.h @@ -135,7 +135,10 @@ enum ObNameTypeClass enum ObMatchAgainstMode { NATURAL_LANGUAGE_MODE = 0, - BOOLEAN_MODE = 1 + NATURAL_LANGUAGE_MODE_WITH_QUERY_EXPANSION = 1, + BOOLEAN_MODE = 2, + WITH_QUERY_EXPANSION = 3, + MAX_MATCH_AGAINST_MODE = 4, }; #define IS_JOIN(type) \ @@ -652,6 +655,25 @@ ObTMSegmentArrayset_tenant_id(MTL_ID()); } +inline const ObString &ob_match_against_mode_str(const ObMatchAgainstMode mode) +{ + static const ObString ma_mode_str[] = + { + "NATURAL LANGUAGE MODE", + "NATURAL LANGUAGE MODE WITH QUERY EXPANSION", + "BOOLEAN MODE", + "WITH QUERY EXPANSION", + "UNKNOWN MATCH MODE" + }; + + if (OB_LIKELY(mode >= ObMatchAgainstMode::NATURAL_LANGUAGE_MODE) + && OB_LIKELY(mode < ObMatchAgainstMode::MAX_MATCH_AGAINST_MODE)) { + return ma_mode_str[mode]; + } else { + return ma_mode_str[ObMatchAgainstMode::MAX_MATCH_AGAINST_MODE]; + } +} + static bool is_fixed_length(ObObjType type) { bool is_fixed = true; ObObjTypeClass tc = ob_obj_type_class(type); diff --git a/src/sql/optimizer/ob_access_path_estimation.cpp b/src/sql/optimizer/ob_access_path_estimation.cpp index 8ca7ef6ef5..da83013525 100644 --- a/src/sql/optimizer/ob_access_path_estimation.cpp +++ b/src/sql/optimizer/ob_access_path_estimation.cpp @@ -425,7 +425,9 @@ int ObAccessPathEstimation::check_path_can_use_storage_estimation(const AccessPa int64_t partition_count = part_info->get_phy_tbl_location_info().get_partition_cnt(); if (partition_count > 1 || scan_range_count <= 0 || - (!path->est_cost_info_.index_meta_info_.is_geo_index_ && scan_range_count > ObOptEstCost::MAX_STORAGE_RANGE_ESTIMATION_NUM)) { + (!path->est_cost_info_.index_meta_info_.is_geo_index_ && + !path->est_cost_info_.index_meta_info_.is_multivalue_index_ && + scan_range_count > ObOptEstCost::MAX_STORAGE_RANGE_ESTIMATION_NUM)) { can_use = false; } else { can_use = true; diff --git a/src/sql/optimizer/ob_access_path_estimation.h b/src/sql/optimizer/ob_access_path_estimation.h index 3e6084242d..6fe3c9ecce 100644 --- a/src/sql/optimizer/ob_access_path_estimation.h +++ b/src/sql/optimizer/ob_access_path_estimation.h @@ -43,7 +43,11 @@ public: const bool is_inner_path, const ObIArray &filter_exprs, ObBaseTableEstMethod &method); - + static int inner_estimate_rowcount(ObOptimizerContext &ctx, + common::ObIArray &paths, + const bool is_inner_path, + const ObIArray &filter_exprs, + bool &is_use_ds); static int estimate_full_table_rowcount(ObOptimizerContext &ctx, const ObTablePartitionInfo &table_part_info, ObTableMetaInfo &meta); diff --git a/src/sql/optimizer/ob_dynamic_sampling.cpp b/src/sql/optimizer/ob_dynamic_sampling.cpp index 84ff3518e7..f85728ac26 100644 --- a/src/sql/optimizer/ob_dynamic_sampling.cpp +++ b/src/sql/optimizer/ob_dynamic_sampling.cpp @@ -1357,6 +1357,7 @@ int ObDynamicSamplingUtils::check_ds_can_use_filter(const ObRawExpr *filter, filter->has_flag(CNT_DYNAMIC_USER_VARIABLE) || filter->has_flag(CNT_PL_UDF) || filter->has_flag(CNT_SO_UDF) || + filter->has_flag(CNT_MATCH_EXPR) || filter->get_expr_type() == T_FUN_SET_TO_STR || filter->get_expr_type() == T_FUN_ENUM_TO_STR || filter->get_expr_type() == T_OP_GET_PACKAGE_VAR || diff --git a/src/sql/optimizer/ob_index_info_cache.h b/src/sql/optimizer/ob_index_info_cache.h index cb192bc507..a51ab6dd46 100644 --- a/src/sql/optimizer/ob_index_info_cache.h +++ b/src/sql/optimizer/ob_index_info_cache.h @@ -20,6 +20,7 @@ namespace oceanbase { namespace sql { +class ObShardingInfo; /* 缓存query range的信息 */ class QueryRangeInfo { @@ -138,6 +139,8 @@ public: is_index_back_(false), is_index_global_(false), is_geo_index_(false), + is_fulltext_index_(false), + is_multivalue_index_(false), range_info_(), ordering_info_(), interesting_order_info_(OrderingFlag::NOT_MATCH), @@ -171,12 +174,16 @@ public: void set_is_index_global(const bool is_index_global) { is_index_global_ = is_index_global; } bool is_index_geo() const { return is_geo_index_; } void set_is_index_geo(const bool is_index_geo) { is_geo_index_ = is_index_geo; } + bool is_fulltext_index() const { return is_fulltext_index_; } + void set_is_fulltext_index(const bool is_fulltext_index) { is_fulltext_index_ = is_fulltext_index; } void set_partition_info(ObTablePartitionInfo *partition_info) { partition_info_ = partition_info; } ObTablePartitionInfo *get_partition_info() const { return partition_info_; } void set_sharding_info(ObShardingInfo *sharding_info) { sharding_info_ = sharding_info; } ObShardingInfo *get_sharding_info() const { return sharding_info_; } + bool is_multivalue_index() const { return is_multivalue_index_; } + void set_is_multivalue_index(const bool is_multivalue_index) { is_multivalue_index_ = is_multivalue_index; } TO_STRING_KV(K_(index_id), K_(is_unique_index), K_(is_index_back), K_(is_index_global), - K_(range_info), K_(ordering_info), K_(interesting_order_info), + K_(is_fulltext_index), K_(is_multivalue_index), K_(range_info), K_(ordering_info), K_(interesting_order_info), K_(interesting_order_prefix_count)); private: uint64_t index_id_; @@ -184,6 +191,8 @@ private: bool is_index_back_; bool is_index_global_; bool is_geo_index_; + bool is_fulltext_index_; + bool is_multivalue_index_; QueryRangeInfo range_info_; OrderingInfo ordering_info_; int64_t interesting_order_info_; // 记录索引的序在stmt中的哪些地方用到 e.g. join, group by, order by diff --git a/src/sql/optimizer/ob_join_order.cpp b/src/sql/optimizer/ob_join_order.cpp index b5c7d02cdb..20b55b1cc6 100644 --- a/src/sql/optimizer/ob_join_order.cpp +++ b/src/sql/optimizer/ob_join_order.cpp @@ -930,7 +930,7 @@ int ObJoinOrder::get_valid_index_ids_with_no_index_hint(ObSqlSchemaGuard &schema || OB_ISNULL(index_schema)) { ret = OB_SCHEMA_ERROR; LOG_WARN("fail to get table schema", K(index_id), K(ret)); - } else if (index_schema->is_domain_index()) { + } else if (index_schema->is_multivalue_index()) { /* do nothing */ } else if (OB_FAIL(valid_index_ids.push_back(index_id))) { LOG_WARN("fail to push back index id", K(ret)); @@ -1015,9 +1015,11 @@ int ObJoinOrder::get_query_range_info(const uint64_t table_id, ObQueryRangeArray &ss_ranges = range_info.get_ss_ranges(); ObIArray &range_columns = range_info.get_range_columns(); bool is_geo_index = false; + bool is_multi_index = false; + bool is_domain_index = false; ObWrapperAllocator wrap_allocator(*allocator_); ColumnIdInfoMapAllocer map_alloc(OB_MALLOC_NORMAL_BLOCK_SIZE, wrap_allocator); - ColumnIdInfoMap geo_columnInfo_map; + ColumnIdInfoMap domain_columnInfo_map; if (OB_ISNULL(get_plan()) || OB_ISNULL(opt_ctx = &get_plan()->get_optimizer_context()) || OB_ISNULL(schema_guard = opt_ctx->get_sql_schema_guard()) || @@ -1033,28 +1035,33 @@ int ObJoinOrder::get_query_range_info(const uint64_t table_id, } else if (OB_FAIL(get_plan()->get_index_column_items(opt_ctx->get_expr_factory(), table_id, *index_schema, range_columns))) { LOG_WARN("failed to generate rowkey column items", K(ret)); - } else if ((is_geo_index = index_schema->is_spatial_index()) && OB_FAIL(extract_geo_schema_info(base_table_id, - index_id, - wrap_allocator, - map_alloc, - geo_columnInfo_map))) { + } else if ((is_geo_index = index_schema->is_spatial_index()) + && OB_FAIL(extract_geo_schema_info(base_table_id, + index_id, + wrap_allocator, + map_alloc, + domain_columnInfo_map))) { LOG_WARN("failed to extract geometry schema info", K(ret), K(table_id), K(index_id)); + } else if (FALSE_IT(is_multi_index = index_schema->is_multivalue_index())) { } else { const ObSQLSessionInfo *session = opt_ctx->get_session_info(); const ObDataTypeCastParams dtc_params = ObBasicSessionInfo::create_dtc_params(session); + bool all_single_value_range = false; int64_t equal_prefix_count = 0; int64_t equal_prefix_null_count = 0; int64_t range_prefix_count = 0; bool contain_always_false = false; bool has_exec_param = false; + bool is_domain_index = (is_geo_index || is_multi_index); + common::ObSEArray agent_table_filter; bool is_oracle_inner_index_table = share::is_oracle_mapping_real_virtual_table(index_schema->get_table_id()); if (is_oracle_inner_index_table && OB_FAIL(extract_valid_range_expr_for_oracle_agent_table(helper.filters_, agent_table_filter))) { LOG_WARN("failed to extract expr", K(ret)); - } else if (!is_geo_index && OB_FAIL(extract_preliminary_query_range(range_columns, + } else if (!is_domain_index && OB_FAIL(extract_preliminary_query_range(range_columns, is_oracle_inner_index_table ? agent_table_filter : helper.filters_, @@ -1066,9 +1073,15 @@ int ObJoinOrder::get_query_range_info(const uint64_t table_id, is_oracle_inner_index_table ? agent_table_filter : helper.filters_, - geo_columnInfo_map, + domain_columnInfo_map, query_range))) { LOG_WARN("failed to extract query range", K(ret), K(index_id)); + } else if (is_multi_index + && OB_FAIL(extract_multivalue_preliminary_query_range(range_columns, + is_oracle_inner_index_table ? + agent_table_filter : helper.filters_, + query_range))) { + LOG_WARN("failed to extract query range", K(ret), K(index_id)); } else if (OB_ISNULL(query_range)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(query_range), K(ret)); @@ -1745,6 +1758,8 @@ int ObJoinOrder::create_one_access_path(const uint64_t table_id, ap->est_cost_info_.index_meta_info_.is_unique_index_ = index_info_entry->is_unique_index(); ap->est_cost_info_.index_meta_info_.is_global_index_ = index_info_entry->is_index_global(); ap->est_cost_info_.index_meta_info_.is_geo_index_ = index_info_entry->is_index_geo(); + ap->est_cost_info_.index_meta_info_.is_multivalue_index_ = index_info_entry->is_multivalue_index(); + ap->est_cost_info_.index_meta_info_.is_fulltext_index_ = index_info_entry->is_fulltext_index(); ap->est_cost_info_.is_virtual_table_ = is_virtual_table(ref_id); ap->est_cost_info_.table_metas_ = &get_plan()->get_basic_table_metas(); ap->est_cost_info_.sel_ctx_ = &get_plan()->get_selectivity_ctx(); @@ -2351,7 +2366,11 @@ int ObJoinOrder::check_and_extract_query_range(const uint64_t table_id, //do some quick check bool expr_match = false; //some condition on index contain_always_false = false; - if (!index_info_entry.is_index_geo() && OB_FAIL(check_exprs_overlap_index(restrict_infos, index_keys, expr_match))) { + bool is_multivlaue_idx = index_info_entry.is_multivalue_index(); + if (is_multivlaue_idx && + OB_FAIL(check_exprs_overlap_multivalue_index(table_id, index_table_id, restrict_infos, index_keys, expr_match))) { + LOG_WARN("get_range_columns failed", K(ret)); + } else if (!is_multivlaue_idx && !index_info_entry.is_index_geo() && OB_FAIL(check_exprs_overlap_index(restrict_infos, index_keys, expr_match))) { LOG_WARN("check quals match index error", K(restrict_infos), K(index_keys)); } else if (index_info_entry.is_index_geo() && OB_FAIL(check_exprs_overlap_gis_index(restrict_infos, index_keys, expr_match))) { LOG_WARN("check quals match gis index error", K(restrict_infos), K(index_keys)); @@ -2607,6 +2626,8 @@ int ObJoinOrder::fill_index_info_entry(const uint64_t table_id, entry->set_is_index_geo(is_index_geo); entry->set_is_index_back(is_index_back); entry->set_is_unique_index(is_unique_index); + entry->set_is_fulltext_index(index_schema->is_fts_index()); + entry->set_is_multivalue_index(index_schema->is_multivalue_index_aux()); entry->get_ordering_info().set_scan_direction(direction); } if (OB_SUCC(ret)) { @@ -2977,17 +2998,34 @@ int ObJoinOrder::get_valid_index_ids(const uint64_t table_id, const ObDMLStmt *stmt = NULL; const TableItem *table_item = NULL; ObSqlSchemaGuard *schema_guard = NULL; + ObSQLSessionInfo *session_info = NULL; uint64_t tids[OB_MAX_INDEX_PER_TABLE + 1]; int64_t index_count = OB_MAX_INDEX_PER_TABLE + 1; const LogTableHint *log_table_hint = NULL; + ObMatchFunRawExpr *match_expr = NULL; if (OB_ISNULL(get_plan()) || OB_ISNULL(stmt = get_plan()->get_stmt()) || - OB_ISNULL(schema_guard = OPT_CTX.get_sql_schema_guard())) { + OB_ISNULL(schema_guard = OPT_CTX.get_sql_schema_guard()) || + OB_ISNULL(session_info = OPT_CTX.get_session_info())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("NULL pointer error", K(get_plan()), K(stmt), K(schema_guard), K(ret)); } else if (OB_ISNULL(table_item = stmt->get_table_item_by_id(table_id))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("Table item should not be NULL", K(table_id), K(table_item), K(ret)); + } else if (OB_FAIL(stmt->get_match_expr_on_table(table_id, match_expr))) { + LOG_WARN("failed to check has fulltext search on table", K(ret)); + } else if (OB_NOT_NULL(match_expr)) { + // If there is a full-text search requirement on current base table, We can only choose the + // path that accesses the word-doc inverted index for now. + uint64_t inv_idx_tid = OB_INVALID_ID; + if (OB_FAIL(get_matched_inv_index_tid(match_expr, ref_table_id, inv_idx_tid))) { + LOG_WARN("failed to get matched inverted index table id", K(ret)); + } else if (inv_idx_tid == OB_INVALID_ID) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected table id", K(ret)); + } else if (OB_FAIL(valid_index_ids.push_back(inv_idx_tid))) { + LOG_WARN("failed to assign index ids", K(ret)); + } } else if (table_item->is_index_table_) { if (OB_FAIL(valid_index_ids.push_back(table_item->ref_id_))) { LOG_WARN("failed to push back array", K(ret)); @@ -3664,6 +3702,58 @@ int ObJoinOrder::check_exprs_overlap_gis_index(const ObIArray& quals return ret; } +int ObJoinOrder::check_exprs_overlap_multivalue_index( + const uint64_t table_id, + const uint64_t index_table_id, + const ObIArray& quals, + const ObIArray& keys, + bool &match) +{ + LOG_TRACE("OPT:[CHECK GIS MATCH]", K(keys)); + + int ret = OB_SUCCESS; + match = false; + const ObDMLStmt *stmt = nullptr; + if (OB_ISNULL(get_plan()) || OB_ISNULL(stmt = get_plan()->get_stmt())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get stmt or plan unexpected null", K(ret), K(get_plan())); + } else if (keys.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Index keys should not be empty", K(keys.count()), K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && !match && i < quals.count(); ++i) { + ObRawExpr *qual = quals.at(i); + if (OB_ISNULL(qual)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("qual expr should not be NULL", K(qual), K(i), K(ret)); + } else if (qual->get_expr_type() != T_OP_BOOL) { + } else if (!qual->is_domain_json_expr()) { + } else { + const ObColumnSchemaV2 *mulvalue_col = nullptr; + for (int64_t k = 0; k < keys.count() && !match; k++) { + ObColumnRefRawExpr *ref = static_cast(keys.at(k)); + ObRawExpr *column_expr = nullptr; + ObRawExpr *depend_expr = nullptr; + if (!ref->is_multivalue_generated_column()) { + } else if (OB_ISNULL(column_expr = stmt->get_column_expr_by_id(table_id, ref->get_column_id()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get multivalue column, null expr", K(ret)); + } else if (OB_ISNULL(depend_expr = (static_cast(column_expr))->get_dependant_expr())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get multivalue depend expr, null expr", K(ret)); + } else { + qual = ObRawExprUtils::skip_inner_added_expr(qual); + ObExprEqualCheckContext equal_ctx; + equal_ctx.override_const_compare_ = true; + match = depend_expr->same_as(*qual, &equal_ctx); + } + } + } + } + } + return ret; +} + int ObJoinOrder::extract_preliminary_query_range(const ObIArray &range_columns, const ObIArray &predicates, ObIArray &expr_constraints, @@ -3812,6 +3902,54 @@ int ObJoinOrder::extract_geo_preliminary_query_range(const ObIArray return ret; } +int ObJoinOrder::extract_multivalue_preliminary_query_range(const ObIArray &range_columns, + const ObIArray &predicates, + ObQueryRange *&query_range) +{ + int ret = OB_SUCCESS; + ObOptimizerContext *opt_ctx = NULL; + const ParamStore *params = NULL; + if (OB_ISNULL(get_plan()) || + OB_ISNULL(opt_ctx = &get_plan()->get_optimizer_context()) || + OB_ISNULL(allocator_) || + OB_ISNULL(params = opt_ctx->get_params())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("get unexpected null", K(get_plan()), K(opt_ctx), + K(allocator_), K(params), K(ret)); + } else { + void *tmp_ptr = allocator_->alloc(sizeof(ObQueryRange)); + ObQueryRange *tmp_qr = NULL; + if (OB_ISNULL(tmp_ptr)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory for query range", K(ret)); + } else { + tmp_qr = new(tmp_ptr)ObQueryRange(*allocator_); + const ObDataTypeCastParams dtc_params = + ObBasicSessionInfo::create_dtc_params(opt_ctx->get_session_info()); + bool is_in_range_optimization_enabled = false; + if (OB_FAIL(ObOptimizerUtil::is_in_range_optimization_enabled(opt_ctx->get_global_hint(), + opt_ctx->get_session_info(), + is_in_range_optimization_enabled))) { + LOG_WARN("failed to check in range optimization enabled", K(ret)); + } else if (OB_FAIL(tmp_qr->preliminary_extract_query_range(range_columns, predicates, + dtc_params, opt_ctx->get_exec_ctx(), + NULL, params, false, true, + is_in_range_optimization_enabled))) { + LOG_WARN("failed to preliminary extract query range", K(ret)); + } + } + if (OB_SUCC(ret)) { + query_range = tmp_qr; + } else { + if (NULL != tmp_qr) { + tmp_qr->~ObQueryRange(); + tmp_qr = NULL; + } + } + } + return ret; +} + int ObJoinOrder::get_candi_range_expr(const ObIArray &range_columns, const ObIArray &predicates, ObIArray &range_predicates) @@ -11966,6 +12104,7 @@ int ObJoinOrder::get_simple_index_info(const uint64_t table_id, is_unique_index = index_schema->is_unique_index(); is_index_global = index_schema->is_global_index_table(); is_index_back = index_schema->is_spatial_index() ? true : false; + is_index_back = (is_index_back || index_schema->is_multivalue_index_aux()); for (int64_t idx = 0; OB_SUCC(ret) && !is_index_back && idx < column_ids.count(); ++idx) { bool found = false; const uint64_t used_column_id = column_ids.at(idx); @@ -12134,7 +12273,10 @@ int ObJoinOrder::fill_filters(const ObIArray &all_filters, ret = est_cost_info.postfix_filters_.push_back(filter); } // 对于空间索引,空间谓词一定要回表计算 - if (OB_SUCC(ret) && est_cost_info.index_meta_info_.is_geo_index_) { + if (OB_SUCC(ret) && + (est_cost_info.index_meta_info_.is_geo_index_ || + est_cost_info.index_meta_info_.is_fulltext_index_ || + est_cost_info.index_meta_info_.is_multivalue_index_)) { ret = est_cost_info.table_filters_.push_back(filter); } } else { @@ -14848,7 +14990,14 @@ int ObJoinOrder::try_get_generated_col_index_expr(ObRawExpr *qual, if (OB_SUCC(ret) && is_same) { ObRawExprCopier copier(expr_factory); ObSEArray column_exprs; - if (OB_FAIL(ObRawExprUtils::extract_column_exprs(qual, column_exprs))) { + ObSQLSessionInfo *session_info = OPT_CTX.get_session_info(); + + if (ObRawExprUtils::is_domain_expr_need_special_replace(child, depend_expr)) { + if (OB_FAIL(ObRawExprUtils::replace_domain_wrapper_expr(depend_expr, + col_expr, copier, expr_factory, session_info, qual, j, new_qual))) { + LOG_WARN("failed to replace expr", K(ret)); + } + } else if (OB_FAIL(ObRawExprUtils::extract_column_exprs(qual, column_exprs))) { LOG_WARN("extract_column_exprs error", K(ret)); } else if (OB_FAIL(copier.add_skipped_expr(column_exprs))) { LOG_WARN("failed to add skipped exprs", K(ret)); @@ -14857,7 +15006,10 @@ int ObJoinOrder::try_get_generated_col_index_expr(ObRawExpr *qual, //depend_expr's res type may be diff from its column's. copy real_qual and deduce type again. } else if (OB_FAIL(static_cast(new_qual)->replace_param_expr(j, col_expr))) { LOG_WARN("replace failed", K(ret)); - } else if (OB_FAIL(new_qual->formalize(OPT_CTX.get_session_info()))) { + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(new_qual->formalize(session_info))) { if (ret != OB_SUCCESS) { //probably type deduced failed. do nothing LOG_WARN("new qual is not formalized correctly", K(ret), K(*new_qual)); @@ -15420,3 +15572,60 @@ int ObJoinOrder::param_values_table_expr(ObIArray &values_vector, } return ret; } + +int ObJoinOrder::get_matched_inv_index_tid(ObMatchFunRawExpr *match_expr, + uint64_t ref_table_id, + uint64_t &inv_idx_tid) +{ + int ret = OB_SUCCESS; + ObSqlSchemaGuard *schema_guard = NULL; + ObSQLSessionInfo *session_info = NULL; + const ObTableSchema *table_schema = NULL; + ObSEArray index_infos; + if (OB_ISNULL(match_expr) || OB_ISNULL(schema_guard = OPT_CTX.get_sql_schema_guard()) || + OB_ISNULL(session_info = OPT_CTX.get_session_info())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (OB_FAIL(schema_guard->get_table_schema(ref_table_id, table_schema))) { + LOG_WARN("failed to get main table schema", K(ret)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (OB_FAIL(table_schema->get_simple_index_infos(index_infos))) { + LOG_WARN("failed to get index infos", K(ret)); + } else { + ColumnReferenceSet column_set; + ObIArray &column_list = match_expr->get_match_columns(); + bool found_matched_index = false; + for (int64_t i = 0; OB_SUCC(ret) && i < column_list.count(); ++i) { + ObColumnRefRawExpr *col_ref = nullptr; + if (OB_UNLIKELY(OB_ISNULL(column_list.at(i)) || !column_list.at(i)->is_column_ref_expr())) { + ret = OB_INVALID_ARGUMENT; + LOG_USER_ERROR(OB_INVALID_ARGUMENT, "match against column"); + } else if (FALSE_IT(col_ref = static_cast(column_list.at(i)))) { + } else if (OB_FAIL(column_set.add_member(col_ref->get_column_id()))) { + LOG_WARN("add to column set failed", K(ret)); + } + } + for (int64_t i = 0; OB_SUCC(ret) && i < index_infos.count() && !found_matched_index; ++i) { + const ObTableSchema *inv_idx_schema = nullptr; + const ObAuxTableMetaInfo &index_info = index_infos.at(i); + if (!share::schema::is_fts_index_aux(index_info.index_type_)) { + // skip + } else if (OB_FAIL(schema_guard->get_table_schema(index_info.table_id_, inv_idx_schema))) { + LOG_WARN("failed to get index schema", K(ret)); + } else if (OB_ISNULL(inv_idx_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected index schema", K(ret), KPC(inv_idx_schema)); + } else if (OB_FAIL(ObTransformUtils::check_fulltext_index_match_column(column_set, + table_schema, + inv_idx_schema, + found_matched_index))) { + LOG_WARN("failed to check fulltext index match column", K(ret)); + } else if (found_matched_index) { + inv_idx_tid = index_info.table_id_; + } + } + } + return ret; +} diff --git a/src/sql/optimizer/ob_join_order.h b/src/sql/optimizer/ob_join_order.h index 6b0d5a928d..744dd501b6 100644 --- a/src/sql/optimizer/ob_join_order.h +++ b/src/sql/optimizer/ob_join_order.h @@ -1394,6 +1394,10 @@ struct NullAwareAntiJoinInfo { bool &is_index_back, bool &is_global_index); + int get_matched_inv_index_tid(ObMatchFunRawExpr *match_expr, + uint64_t ref_table_id, + uint64_t &inv_idx_tid); + inline ObTablePartitionInfo *get_table_partition_info() { return table_partition_info_; } int param_funct_table_expr(ObRawExpr* &function_table_expr, @@ -1662,6 +1666,10 @@ struct NullAwareAntiJoinInfo { const ColumnIdInfoMap &column_schema_info, ObQueryRange *&query_range); + int extract_multivalue_preliminary_query_range(const ObIArray &range_columns, + const ObIArray &predicates, + ObQueryRange *&query_range); + int extract_geo_schema_info(const uint64_t table_id, const uint64_t index_id, ObWrapperAllocator &wrap_allocator, @@ -1688,6 +1696,12 @@ struct NullAwareAntiJoinInfo { const ObIArray& keys, bool &match); + int check_exprs_overlap_multivalue_index(const uint64_t table_id, + const uint64_t index_table_id, + const ObIArray& quals, + const ObIArray& keys, + bool &match); + /** * 判断连接条件是否匹配索引前缀 * @keys 索引列 diff --git a/src/sql/optimizer/ob_log_join.cpp b/src/sql/optimizer/ob_log_join.cpp index f390364437..d71f672855 100644 --- a/src/sql/optimizer/ob_log_join.cpp +++ b/src/sql/optimizer/ob_log_join.cpp @@ -1352,7 +1352,9 @@ int ObLogJoin::check_and_set_use_batch() if (OB_SUCC(ret) && can_use_batch_nlj_) { bool contains_invalid_startup = false; bool contains_limit = false; - if (get_child(1)->get_type() == log_op_def::LOG_GRANULE_ITERATOR) { + bool enable_group_rescan_test_mode = false; + enable_group_rescan_test_mode = (OB_SUCCESS != (OB_E(EventTable::EN_DAS_GROUP_RESCAN_TEST_MODE) OB_SUCCESS)); + if (get_child(1)->get_type() == log_op_def::LOG_GRANULE_ITERATOR && !enable_group_rescan_test_mode) { can_use_batch_nlj_ = false; } else if (OB_FAIL(plan->contains_startup_with_exec_param(get_child(1), contains_invalid_startup))) { @@ -1423,12 +1425,6 @@ int ObLogJoin::check_if_disable_batch(ObLogicalOperator* root, bool &can_use_bat } } else if (log_op_def::LOG_SET == root->get_type()) { ObLogSet *log_set = static_cast(root); - if (log_set->get_set_op() != ObSelectStmt::UNION) { - //Disable batch nested loop join that contains set operations other than UNION - //because other set operations may involve short-circuit operations. - //Currently, batch NLJ does not support short-circuit execution. - can_use_batch_nlj = false; - } for (int64_t i = 0; OB_SUCC(ret) && can_use_batch_nlj && i < root->get_num_of_child(); ++i) { ObLogicalOperator *child = root->get_child(i); if (OB_ISNULL(child)) { @@ -1439,24 +1435,8 @@ int ObLogJoin::check_if_disable_batch(ObLogicalOperator* root, bool &can_use_bat } } } else if (log_op_def::LOG_JOIN == root->get_type()) { - ObLogJoin *join = static_cast(root); - ObSQLSessionInfo *session_info = NULL; - ObLogPlan *plan = NULL; - if (OB_ISNULL(plan = get_plan()) - || OB_ISNULL(session_info = plan->get_optimizer_context().get_session_info())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected null", K(ret), K(plan), K(session_info)); - } else if (!session_info->is_spf_mlj_group_rescan_enabled()) { - //Group rescan optimization for nested joins at multiple levels is disabled by default. - can_use_batch_nlj = false; - } else if (!join->can_use_batch_nlj()) { - can_use_batch_nlj = false; - LOG_TRACE("child join not support batch_nlj", K(root->get_name())); - } else if (OB_FAIL(SMART_CALL(check_if_disable_batch(root->get_child(0), can_use_batch_nlj)))) { - LOG_WARN("failed to check use batch nlj", K(ret)); - } else if (OB_FAIL(SMART_CALL(check_if_disable_batch(root->get_child(1), can_use_batch_nlj)))) { - LOG_WARN("failed to check use batch nlj for right op", K(ret)); - } + // multi level nlj use batch is disabled + can_use_batch_nlj = false; } else { can_use_batch_nlj = false; } diff --git a/src/sql/optimizer/ob_log_plan.cpp b/src/sql/optimizer/ob_log_plan.cpp index 74af151927..93a7cd3acd 100644 --- a/src/sql/optimizer/ob_log_plan.cpp +++ b/src/sql/optimizer/ob_log_plan.cpp @@ -4329,6 +4329,7 @@ int ObLogPlan::allocate_access_path(AccessPath *ap, scan->set_is_index_global(ap->is_global_index_); scan->set_index_back(ap->est_cost_info_.index_meta_info_.is_index_back_); scan->set_is_spatial_index(ap->est_cost_info_.index_meta_info_.is_geo_index_); + scan->set_is_multivalue_index(ap->est_cost_info_.index_meta_info_.is_multivalue_index_); scan->set_use_das(ap->use_das_); scan->set_table_partition_info(ap->table_partition_info_); scan->set_table_opt_info(ap->table_opt_info_); @@ -4361,10 +4362,29 @@ int ObLogPlan::allocate_access_path(AccessPath *ap, } if (OB_SUCC(ret)) { - if (OB_FAIL(scan->set_table_scan_filters(ap->filter_))) { + ObSEArray non_match_filters; + ObSEArray match_filters; + if (OB_FAIL(ObRawExprUtils::extract_match_against_filters(ap->filter_, + non_match_filters, + match_filters))) { + LOG_WARN("failed to extract ir fitler from filters", K(ret), K(ap->filter_)); + } else if (match_filters.count() > 0) { + if (OB_FAIL(prepare_text_retrieval_scan(match_filters, scan))) { + LOG_WARN("failed to allocate text ir scan", K(ret)); + } else if (OB_FAIL(scan->set_table_scan_filters(non_match_filters))) { + LOG_WARN("failed to set filters", K(ret)); + } else if (OB_FAIL(append(scan->get_pushdown_filter_exprs(), ap->pushdown_filters_))) { + LOG_WARN("failed to append pushdown filters", K(ret)); + } else { + LOG_DEBUG("handle text ir expr in plan", K(ret), K(non_match_filters), K(match_filters)); + } + } else if (OB_FAIL(scan->set_table_scan_filters(ap->filter_))) { LOG_WARN("failed to set filters", K(ret)); } else if (OB_FAIL(append(scan->get_pushdown_filter_exprs(), ap->pushdown_filters_))) { LOG_WARN("failed to append pushdown filters", K(ret)); + } else if (ap->est_cost_info_.index_meta_info_.is_multivalue_index_ && + OB_FAIL(prepare_multivalue_retrieval_scan(scan))) { + LOG_WARN("failed to prepare multivalue doc_rowkey ", K(ret)); } } @@ -8443,7 +8463,7 @@ int ObLogPlan::allocate_sort_and_exchange_as_top(ObLogicalOperator *&top, bool has_select_into = false; bool is_single = true; bool has_order_by = false; - if (OB_ISNULL(top)) { + if (OB_ISNULL(top) || OB_ISNULL(get_stmt())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret)); } else if (OB_FAIL(check_select_into(has_select_into, is_single, has_order_by))) { @@ -8481,8 +8501,22 @@ int ObLogPlan::allocate_sort_and_exchange_as_top(ObLogicalOperator *&top, } // allocate push down sort if necessary - if (OB_SUCC(ret) && - (exch_info.is_pq_local() || !exch_info.need_exchange()) && !sort_keys.empty() && + bool need_further_sort = true; + if (OB_FAIL(ret)) { + // do nothing + } else if (OB_SUCC(ret) && NULL != topn_expr && need_sort && + OB_FAIL(try_push_topn_into_text_retrieval_scan(top, + topn_expr, + get_stmt()->get_limit_expr(), + get_stmt()->get_offset_expr(), + is_fetch_with_ties, + exch_info.need_exchange(), + sort_keys, + need_further_sort))) { + LOG_WARN("failed to push topn into text retrieval scan", K(ret)); + } else if (!need_further_sort) { + // do nothing + } else if ((exch_info.is_pq_local() || !exch_info.need_exchange()) && !sort_keys.empty() && (need_sort || is_local_order)) { int64_t real_prefix_pos = need_sort && !is_local_order ? prefix_pos : 0; bool real_local_order = need_sort ? false : is_local_order; @@ -8903,7 +8937,8 @@ int ObLogPlan::try_push_limit_into_table_scan(ObLogicalOperator *top, !get_stmt()->is_calc_found_rows() && !table_scan->is_sample_scan() && !(table_scan->get_is_index_global() && table_scan->get_index_back() && table_scan->has_index_lookup_filter()) && (NULL == table_scan->get_limit_expr() || - ObOptimizerUtil::is_point_based_sub_expr(limit_expr, table_scan->get_limit_expr()))) { + ObOptimizerUtil::is_point_based_sub_expr(limit_expr, table_scan->get_limit_expr())) && + table_scan->get_text_retrieval_info().topk_limit_expr_ == NULL) { if (!top->is_distributed()) { new_limit_expr = limit_expr; new_offset_expr = offset_expr; @@ -8918,6 +8953,8 @@ int ObLogPlan::try_push_limit_into_table_scan(ObLogicalOperator *top, } else { is_pushed = true; } + } else if (OB_NOT_NULL(table_scan->get_text_retrieval_info().topk_limit_expr_)) { + is_pushed = true; } } else { /*do nothing*/ } return ret; @@ -12619,8 +12656,15 @@ int ObLogPlan::collect_location_related_info(ObLogicalOperator &op) } else if (tsc_op.get_index_back()) { if (OB_FAIL(rel_info.related_ids_.push_back(tsc_op.get_real_ref_table_id()))) { LOG_WARN("store the related table id failed", K(ret)); + } else if (tsc_op.need_doc_id_index_back() && + OB_FAIL(rel_info.related_ids_.push_back(tsc_op.get_doc_id_index_table_id()))) { + LOG_WARN("store doc id index back aux tid failed", K(ret)); + } else if (tsc_op.is_text_retrieval_scan() && + OB_FAIL(rel_info.related_ids_.push_back(tsc_op.get_text_retrieval_info().fwd_idx_tid_))) { + LOG_WARN("store forward index id for text retrieval failed", K(ret)); } } + if (OB_SUCC(ret) && OB_FAIL(optimizer_context_.get_loc_rel_infos().push_back(rel_info))) { LOG_WARN("store location related info failed", K(ret)); } @@ -13004,7 +13048,6 @@ bool ObLogPlan::has_depend_table(const ObRelIds& table_ids) return b_ret; } - int ObLogPlan::allocate_output_expr_for_values_op(ObLogicalOperator &values_op) { int ret = OB_SUCCESS; @@ -15049,6 +15092,197 @@ int ObLogPlan::compute_duplicate_table_replicas(ObLogicalOperator *op) return ret; } +int ObLogPlan::prepare_text_retrieval_scan(const ObIArray &exprs, ObLogicalOperator *scan) +{ + // TODO: only support one match against expr as filter for now + int ret = OB_SUCCESS; + ObLogTableScan *table_scan = static_cast(scan); + ObRawExpr *match_pred = NULL; + ObMatchFunRawExpr *match_against = NULL; + ObSchemaGetterGuard *schema_guard = NULL; + ObSQLSessionInfo *session = NULL; + const ObTableSchema *table_schema = NULL; + const ObTableSchema *inv_idx_schema = NULL; + const ObTableSchema *fwd_idx_schema = NULL; + uint64_t doc_id_rowkey_tid = OB_INVALID_ID; + uint64_t fwd_idx_tid = OB_INVALID_ID; + uint64_t inv_idx_tid = OB_INVALID_ID; + ObSEArray index_infos; + + if (OB_UNLIKELY(1 != exprs.count()) || OB_ISNULL(match_pred = exprs.at(0)) || OB_ISNULL(scan) || + OB_ISNULL(get_stmt())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argumsnts", K(ret), KPC(match_pred), KP(scan)); + } else if (OB_ISNULL(get_stmt()) + || OB_ISNULL(schema_guard = get_optimizer_context().get_schema_guard()) + || OB_ISNULL(session = get_optimizer_context().get_session_info())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null pointers", K(ret), KP(get_stmt()), KP(schema_guard), KP(session)); + } else if (OB_UNLIKELY(!match_pred->has_flag(CNT_MATCH_EXPR) + || LOG_TABLE_SCAN != scan->get_type() + || 0 == match_pred->get_param_count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected node or expr passed in", KPC(match_pred), K(scan->get_type()), K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < match_pred->get_param_count(); ++i) { + ObRawExpr *curr_expr = match_pred->get_param_expr(i); + if (OB_ISNULL(curr_expr)) { + ret = OB_ERR_UNEXPECTED; + } else if (curr_expr->get_expr_type() == T_FUN_MATCH_AGAINST) { + if (OB_NOT_NULL(match_against)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supported match filter with more than one match against expr", + K(ret), KPC(match_pred), KPC(match_against)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "more than one distinct match against expr"); + } else { + match_against = static_cast(curr_expr); + } + } + } + } + + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(match_against)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null match against expr", K(ret), KPC(match_pred), KPC(match_against)); + } else if (OB_FAIL(schema_guard->get_table_schema(session->get_effective_tenant_id(), + table_scan->get_real_ref_table_id(), + table_schema))) { + LOG_WARN("failed to get table schema", K(ret)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null table schema", K(ret)); + } else if (OB_FAIL(table_schema->get_simple_index_infos(index_infos))) { + LOG_WARN("failed to get index infos", K(ret)); + } else if (OB_FAIL(table_schema->get_doc_id_rowkey_tid(doc_id_rowkey_tid))) { + LOG_WARN("failed to get doc_id_rowkey table id", K(ret)); + } else if (OB_FALSE_IT(inv_idx_tid = table_scan->get_index_table_id())) { + } else if (OB_FAIL(schema_guard->get_table_schema(session->get_effective_tenant_id(), + inv_idx_tid, + inv_idx_schema))) { + LOG_WARN("failed to get inverted index id", K(ret)); + } else if (OB_ISNULL(inv_idx_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null index schema", K(ret)); + } else { + bool found_fwd_idx = false; + const ObString &inv_idx_name = inv_idx_schema->get_table_name_str(); + for (int64_t i = 0; OB_SUCC(ret) && i < index_infos.count(); ++i) { + const ObAuxTableMetaInfo &index_info = index_infos.at(i); + if (!share::schema::is_fts_doc_word_aux(index_info.index_type_)) { + // skip + } else if (OB_FAIL(schema_guard->get_table_schema( + session->get_effective_tenant_id(), index_info.table_id_, fwd_idx_schema))) { + LOG_WARN("failed to get table schema", K(ret)); + } else if (OB_ISNULL(fwd_idx_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpecter nullptr to fwd idx schema", K(ret)); + } else { + const ObString &fwd_idx_name = fwd_idx_schema->get_table_name_str(); + // 依赖正排索引表名的后缀长度 + int64_t fwd_idx_suffix_len = strlen("_fts_doc_word"); + ObString fwd_idx_prefix_name; + if (OB_UNLIKELY(fwd_idx_name.length() <= fwd_idx_suffix_len)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(fwd_idx_name), K(fwd_idx_suffix_len)); + } else if (OB_FALSE_IT(fwd_idx_prefix_name.assign_ptr(fwd_idx_name.ptr(), + fwd_idx_name.length() - fwd_idx_suffix_len))) { + } else if (fwd_idx_prefix_name.compare(inv_idx_name) == 0) { + found_fwd_idx = true; + fwd_idx_tid = fwd_idx_schema->get_table_id(); + } + } + } + } + if (OB_SUCC(ret)) { + ObTextRetrievalInfo &tr_info = table_scan->get_text_retrieval_info(); + tr_info.match_expr_ = match_against; + tr_info.inv_idx_tid_ = inv_idx_tid; + tr_info.fwd_idx_tid_ = fwd_idx_tid; + tr_info.doc_id_idx_tid_ = doc_id_rowkey_tid; + tr_info.pushdown_match_filter_ = match_pred; + table_scan->set_doc_id_index_table_id(doc_id_rowkey_tid); + table_scan->set_index_back(true); + } + return ret; +} + +int ObLogPlan::prepare_multivalue_retrieval_scan(ObLogicalOperator *scan) +{ + int ret = OB_SUCCESS; + ObLogTableScan *table_scan = static_cast(scan); + ObSchemaGetterGuard *schema_guard = nullptr; + ObSQLSessionInfo *session = nullptr; + const ObTableSchema *table_schema = nullptr; + uint64_t doc_id_rowkey_tid = OB_INVALID_ID; + + if (OB_ISNULL(schema_guard = get_optimizer_context().get_schema_guard()) + || OB_ISNULL(session = get_optimizer_context().get_session_info())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null pointers", K(ret), KP(get_stmt()), KP(schema_guard), KP(session)); + } else if (OB_FAIL(schema_guard->get_table_schema( + session->get_effective_tenant_id(), table_scan->get_real_ref_table_id(), table_schema))) { + LOG_WARN("failed to get table schema", K(ret)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null table schema", K(ret)); + } else if (OB_FAIL(table_schema->get_doc_id_rowkey_tid(doc_id_rowkey_tid))) { + LOG_WARN("failed to get doc_id_rowkey table id", K(ret)); + } else { + table_scan->set_doc_id_index_table_id(doc_id_rowkey_tid); + table_scan->set_index_back(true); + } + return ret; +} + +int ObLogPlan::try_push_topn_into_text_retrieval_scan(ObLogicalOperator *&top, + ObRawExpr *topn_expr, + ObRawExpr *limit_expr, + ObRawExpr *offset_expr, + bool is_fetch_with_ties, + bool need_exchange, + const ObIArray &sort_keys, + bool &need_further_sort) +{ + int ret = OB_SUCCESS; + need_further_sort = true; + ObLogTableScan *table_scan = NULL; + bool has_multi_sort_keys = false; + ObRawExpr *pushed_limit_expr = NULL; + ObRawExpr *pushed_offset_expr = NULL; + if (OB_ISNULL(top) || OB_ISNULL(get_stmt())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(top), K(limit_expr), K(get_stmt()), K(ret)); + } else if (log_op_def::LOG_TABLE_SCAN != top->get_type()) { + // do nothing + } else if (OB_FALSE_IT(table_scan = static_cast(top))) { + } else if (!table_scan->is_text_retrieval_scan()) { + // do nothing + } else if (table_scan->get_filter_exprs().count() != 0 || + table_scan->get_pushdown_filter_exprs().count() != 0) { + // do nothing, topn pushdown requires that only match filter exists on the base table. + } else if (sort_keys.count() >= 1 && OB_NOT_NULL(sort_keys.at(0).expr_) && + sort_keys.at(0).expr_ == table_scan->get_text_retrieval_info().match_expr_) { + // only accept match expr as prefix sort key. + has_multi_sort_keys = sort_keys.count() == 1 ? false : true; + need_further_sort = has_multi_sort_keys || table_scan->use_das() || need_exchange; + pushed_limit_expr = need_further_sort ? topn_expr : limit_expr; + pushed_offset_expr = need_further_sort ? NULL : offset_expr; + ObSEArray tmp_sort_keys; + table_scan->get_text_retrieval_info().topk_limit_expr_ = pushed_limit_expr; + table_scan->get_text_retrieval_info().topk_offset_expr_ = pushed_offset_expr; + table_scan->get_text_retrieval_info().sort_key_.expr_ = sort_keys.at(0).expr_; + table_scan->get_text_retrieval_info().sort_key_.order_type_ = sort_keys.at(0).order_type_; + table_scan->get_text_retrieval_info().with_ties_ = (has_multi_sort_keys || is_fetch_with_ties); + if (OB_FAIL(tmp_sort_keys.push_back(sort_keys.at(0)))) { + LOG_WARN("failed to push back order item", K(ret)); + } else if (OB_FAIL(table_scan->set_op_ordering(tmp_sort_keys))) { + LOG_WARN("failed to set op ordering", K(ret)); + } + } + return ret; +} + int ObLogPlan::init_lateral_table_depend_info(const ObIArray &table_items) { int ret = OB_SUCCESS; @@ -15088,4 +15322,4 @@ int ObLogPlan::init_lateral_table_depend_info(const ObIArray &table_ LOG_TRACE("succeed to init function table depend info", K(table_depend_infos_)); } return ret; -} +} \ No newline at end of file diff --git a/src/sql/optimizer/ob_log_plan.h b/src/sql/optimizer/ob_log_plan.h index ca23a674bd..7a2c51add3 100644 --- a/src/sql/optimizer/ob_log_plan.h +++ b/src/sql/optimizer/ob_log_plan.h @@ -1422,6 +1422,16 @@ public: int construct_startup_filter_for_limit(ObRawExpr *limit_expr, ObLogicalOperator *log_op); + int prepare_text_retrieval_scan(const ObIArray &exprs, ObLogicalOperator *scan); + int prepare_multivalue_retrieval_scan(ObLogicalOperator *scan); + int try_push_topn_into_text_retrieval_scan(ObLogicalOperator *&top, + ObRawExpr *topn_expr, + ObRawExpr *limit_expr, + ObRawExpr *offset_expr, + bool is_fetch_with_ties, + bool need_exchange, + const ObIArray &sort_keys, + bool &need_further_sort); protected: virtual int generate_normal_raw_plan() = 0; virtual int generate_dblink_raw_plan(); diff --git a/src/sql/optimizer/ob_log_subplan_filter.cpp b/src/sql/optimizer/ob_log_subplan_filter.cpp index 460b9d7565..ede8ccd846 100644 --- a/src/sql/optimizer/ob_log_subplan_filter.cpp +++ b/src/sql/optimizer/ob_log_subplan_filter.cpp @@ -544,6 +544,7 @@ int ObLogSubPlanFilter::check_and_set_das_group_rescan() for (int64_t i = 1; OB_SUCC(ret) && enable_das_group_rescan_ && i < get_num_of_child(); i++) { ObLogicalOperator *child = get_child(i); bool contains_invalid_startup = false; + bool contains_limit = false; if (OB_ISNULL(child)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected null", K(ret)); diff --git a/src/sql/optimizer/ob_log_table_scan.cpp b/src/sql/optimizer/ob_log_table_scan.cpp index 0746ba38c4..4e2c5b092a 100644 --- a/src/sql/optimizer/ob_log_table_scan.cpp +++ b/src/sql/optimizer/ob_log_table_scan.cpp @@ -51,6 +51,8 @@ const char *ObLogTableScan::get_name() const } if (sample_method != SampleInfo::NO_SAMPLE) { name = (sample_method == SampleInfo::ROW_SAMPLE) ? "TABLE ROW SAMPLE SCAN" : "TABLE BLOCK SAMPLE SCAN"; + } else if (is_text_retrieval_scan()) { + name = use_das() ? "DISTRIBUTED TEXT RETRIEVAL SCAN" : "TEXT RETRIEVAL SCAN"; } else if (is_skip_scan()) { name = use_das() ? "DISTRIBUTED TABLE SKIP SCAN" : "TABLE SKIP SCAN"; } else if (EXTERNAL_TABLE == get_table_type()) { @@ -178,6 +180,8 @@ int ObLogTableScan::get_op_exprs(ObIArray &all_exprs) LOG_WARN("failed to add lookup trans expr", K(ret)); } else if (NULL != trans_info_expr_ && OB_FAIL(all_exprs.push_back(trans_info_expr_))) { LOG_WARN("failed to push back expr", K(ret)); + } else if (is_text_retrieval_scan() && OB_FAIL(get_text_retrieval_calc_exprs(all_exprs))) { + LOG_WARN("failed to get text retrieval exprs", K(ret)); } else if (OB_FAIL(append(all_exprs, access_exprs_))) { LOG_WARN("failed to append exprs", K(ret)); } else if (OB_FAIL(append(all_exprs, pushdown_aggr_exprs_))) { @@ -209,6 +213,30 @@ int ObLogTableScan::allocate_expr_post(ObAllocExprContext &ctx) LOG_WARN("failed to mark expr as produced", K(*expr), K(branch_id_), K(id_), K(ret)); } else { /*do nothing*/ } } + if (OB_SUCC(ret) && is_text_retrieval_scan()) { + // match against relevance expr will be calculated in storage + ObSEArray tmp_exprs; + if (OB_FAIL(ObRawExprUtils::extract_column_exprs(get_text_retrieval_info().relevance_expr_, tmp_exprs))) { + LOG_WARN("failed to extract column exprs", K(ret)); + } else if (OB_FAIL(tmp_exprs.push_back(get_text_retrieval_info().doc_token_cnt_))) { + LOG_WARN("failed to append tmp exprs", K(ret)); + } else if (OB_FAIL(tmp_exprs.push_back(get_text_retrieval_info().total_doc_cnt_))) { + LOG_WARN("failed to append tmp exprs", K(ret)); + } else if (OB_FAIL(tmp_exprs.push_back(get_text_retrieval_info().related_doc_cnt_))) { + LOG_WARN("failed to append tmp exprs", K(ret)); + } else if (OB_FAIL(tmp_exprs.push_back(get_text_retrieval_info().match_expr_))) { + LOG_WARN("failed to append tmp exprs", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < tmp_exprs.count(); ++i) { + ObRawExpr *expr = tmp_exprs.at(i); + if (OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("null expr", K(ret)); + } else if (OB_FAIL(mark_expr_produced(expr, branch_id_, id_, ctx))) { + LOG_WARN("failed to mark expr as produced", K(*expr), K(branch_id_), K(id_), K(ret)); + } else { /*do nothing*/ } + } + } // check if we can produce some more exprs, such as 1 + 'c1' after we have produced 'c1' if (OB_SUCC(ret)) { @@ -383,6 +411,8 @@ int ObLogTableScan::generate_access_exprs() LOG_WARN("get unexpected null", K(get_plan()), K(get_stmt()), K(ret)); } else if (OB_FAIL(copy_filter_before_index_back())) { LOG_WARN("failed to copy filter before index back", K(ret)); + } else if (is_text_retrieval_scan() && OB_FAIL(prepare_text_retrieval_dep_exprs())) { + LOG_WARN("failed to copy text retrieval aggr exprs", K(ret)); } else if (OB_FAIL(generate_necessary_rowkey_and_partkey_exprs())) { LOG_WARN("failed to generate rowkey and part exprs", K(ret)); } else if (use_batch() @@ -396,6 +426,8 @@ int ObLogTableScan::generate_access_exprs() LOG_WARN("failed to push back exprs", K(ret)); } else if (is_spatial_index_ && OB_FAIL(append_array_no_dup(access_exprs_, spatial_exprs_))) { LOG_WARN("failed to push back exprs", K(ret)); + } else if (OB_FAIL(append_array_no_dup(access_exprs_, domain_exprs_))) { + LOG_WARN("failed to append domain exprs", K(ret)); } else if (is_index_global_ && index_back_) { if (OB_FAIL(ObRawExprUtils::extract_column_exprs(filter_exprs_, temp_exprs))) { LOG_WARN("failed to extract column exprs", K(ret)); @@ -594,7 +626,7 @@ int ObLogTableScan::has_nonpushdown_filter(bool &has_npd_filter) int ObLogTableScan::extract_pushdown_filters(ObIArray &nonpushdown_filters, ObIArray &scan_pushdown_filters, ObIArray &lookup_pushdown_filters, - bool ignore_pd_filter /*= false */) + bool ignore_pd_filter /*= false */) const { int ret = OB_SUCCESS; const ObIArray &filters = get_filter_exprs(); @@ -824,6 +856,7 @@ int ObLogTableScan::get_mbr_column_exprs(const uint64_t table_id, ObRawExpr *expr = NULL; const ObDMLStmt *stmt = NULL; ObSEArray temp_exprs; + if (OB_ISNULL(stmt = get_stmt())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("stmt is null", K(ret)); @@ -901,6 +934,10 @@ int ObLogTableScan::generate_necessary_rowkey_and_partkey_exprs() LOG_WARN("failed to check whether stmt has lob column", K(ret)); } else if (OB_FAIL(get_mbr_column_exprs(table_id_, spatial_exprs_))) { LOG_WARN("failed to check whether stmt has mbr column", K(ret)); + } else if (need_doc_id_index_back() && OB_FAIL(extract_doc_id_index_back_expr(domain_exprs_))) { + LOG_WARN("failed to extract doc id index back exprs", K(ret)); + } else if (is_text_retrieval_scan() && OB_FAIL(extract_text_retrieval_access_expr(domain_exprs_))) { + LOG_WARN("failed to extract text retrieval access exprs", K(ret)); } else if (is_heap_table && is_index_global_ && index_back_ && OB_FAIL(get_part_column_exprs(table_id_, ref_table_id_, part_exprs_))) { LOG_WARN("failed to get part column exprs", K(ret)); @@ -1337,7 +1374,7 @@ int ObLogTableScan::get_plan_item_info(PlanText &plan_text, } //Print ranges - if (OB_FAIL(ret)) { + if (OB_FAIL(ret) || is_text_retrieval_scan()) { } else if (OB_FAIL(BUF_PRINTF(", "))) { LOG_WARN("BUF_PRINTF fails", K(ret)); } else if (OB_FAIL(BUF_PRINTF("\n "))) { @@ -1365,6 +1402,12 @@ int ObLogTableScan::get_plan_item_info(PlanText &plan_text, } } + if (OB_SUCC(ret) && is_text_retrieval_scan()) { + // print match against related exprs + if (OB_FAIL(print_text_retrieval_annotation(buf, buf_len, pos, type))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); + } + } END_BUF_PRINT(plan_item.special_predicates_, plan_item.special_predicates_len_); } @@ -2189,6 +2232,317 @@ ObRawExpr * ObLogTableScan::get_real_expr(const ObRawExpr *col) const return ret; } +int ObLogTableScan::extract_doc_id_index_back_expr(ObIArray &exprs) +{ + int ret = OB_SUCCESS; + uint64_t doc_id_rowkey_tid = OB_INVALID_ID; + ObColumnRefRawExpr *doc_id_col_expr = nullptr; + ObSqlSchemaGuard *schema_guard = nullptr; + const ObTableSchema *table_schema = nullptr; + const ObColumnSchemaV2 *doc_id_col_schema = nullptr; + ObSEArray col_items; + if (!need_doc_id_index_back()) { + //skip + } else if (OB_ISNULL(get_stmt()) || OB_ISNULL(get_plan()) || + OB_ISNULL(schema_guard = get_plan()->get_optimizer_context().get_sql_schema_guard())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret), KP(get_stmt()), KP(get_plan()), KP(schema_guard)); + } else if (OB_FAIL(schema_guard->get_table_schema(ref_table_id_, table_schema))) { + LOG_WARN("failed to get table schema", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < table_schema->get_column_count(); ++i) { + const ObColumnSchemaV2 *col_schema = nullptr; + if (OB_ISNULL(col_schema = table_schema->get_column_schema_by_idx(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get column schema by index", K(ret)); + } else if (col_schema->is_doc_id_column()) { + doc_id_col_schema = col_schema; + break; + } + } + } + + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(doc_id_col_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected doc id column schema not found", K(ret), KPC(table_schema)); + } else if (OB_FAIL(ObRawExprUtils::build_column_expr( + get_plan()->get_optimizer_context().get_expr_factory(), *doc_id_col_schema, doc_id_col_expr))) { + LOG_WARN("failed to create doc id column expr", K(ret), KPC(doc_id_col_schema)); + } else if (OB_FAIL(exprs.push_back(doc_id_col_expr))) { + LOG_WARN("failed to append doc id col expr", K(ret)); + } else if (OB_FAIL(get_stmt()->get_column_items(table_id_, col_items))) { + LOG_WARN("failed to get column items", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < col_items.count(); ++i) { + const ColumnItem &col_item = col_items.at(i); + bool is_rowkey = false; + if (OB_FAIL(table_schema->get_rowkey_info().is_rowkey_column(col_item.column_id_, is_rowkey))) { + LOG_WARN("failed to check if column item is rowkey", K(ret)); + } else if (is_rowkey) { + exprs.push_back(col_item.expr_); + } + } + } + + return ret; +} + +int ObLogTableScan::extract_text_retrieval_access_expr(ObIArray &exprs) +{ + int ret = OB_SUCCESS; + ObTextRetrievalInfo &tr_info = get_text_retrieval_info(); + if (OB_ISNULL(tr_info.match_expr_) || OB_ISNULL(tr_info.total_doc_cnt_) || + OB_ISNULL(tr_info.doc_token_cnt_) || OB_ISNULL(tr_info.related_doc_cnt_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null match against expr", K(ret)); + } else if (OB_FAIL(exprs.push_back(tr_info.token_column_))) { + LOG_WARN("failed to append token column to access exprs", K(ret)); + } else if (OB_FAIL(exprs.push_back(tr_info.token_cnt_column_))) { + LOG_WARN("failed to append token count column to access exprs", K(ret)); + } else if (OB_FAIL(exprs.push_back(tr_info.doc_id_column_))) { + LOG_WARN("failed to append doc id column to access exprs", K(ret)); + } else if (OB_FAIL(exprs.push_back(tr_info.doc_length_column_))) { + LOG_WARN("failed to append doc length column to access exprs", K(ret)); + } else if (OB_FAIL(exprs.push_back(tr_info.total_doc_cnt_->get_param_expr(0)))) { + LOG_WARN("failed to append total doc cnt access col to access exprs", K(ret)); + } else if (OB_FAIL(exprs.push_back(tr_info.doc_token_cnt_->get_param_expr(0)))) { + LOG_WARN("failed to append doc token cnt access col to access exprs", K(ret)); + } else if (OB_FAIL(exprs.push_back(tr_info.related_doc_cnt_->get_param_expr(0)))) { + LOG_WARN("failed to append relater doc cnt access col to access exprs", K(ret)); + } + return ret; +} + +int ObLogTableScan::get_text_retrieval_calc_exprs(ObIArray &all_exprs) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(get_text_retrieval_info().match_expr_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null match against expr", K(ret)); + } else if (OB_FAIL(all_exprs.push_back(get_text_retrieval_info().related_doc_cnt_))) { + LOG_WARN("failed to append relevanced doc cnt expr", K(ret)); + } else if (OB_FAIL(all_exprs.push_back(get_text_retrieval_info().doc_token_cnt_))) { + LOG_WARN("failed to append doc token cnt expr", K(ret)); + } else if (OB_FAIL(all_exprs.push_back(get_text_retrieval_info().total_doc_cnt_))) { + LOG_WARN("failed to append total doc cnt expr", K(ret)); + } else if (OB_FAIL(all_exprs.push_back(get_text_retrieval_info().relevance_expr_))) { + LOG_WARN("failed to append relevance expr", K(ret)); + } else if (OB_FAIL(all_exprs.push_back(get_text_retrieval_info().match_expr_))) { + LOG_WARN("failed to append text retrieval expr", K(ret)); + } else if (OB_FAIL(all_exprs.push_back(get_text_retrieval_info().pushdown_match_filter_))) { + LOG_WARN("failed to append match filter", K(ret)); + } else if (OB_NOT_NULL(get_text_retrieval_info().topk_limit_expr_) && + OB_FAIL(all_exprs.push_back(get_text_retrieval_info().topk_limit_expr_))) { + LOG_WARN("failed to append limit expr", K(ret)); + } else if (OB_NOT_NULL(get_text_retrieval_info().topk_offset_expr_) && + OB_FAIL(all_exprs.push_back(get_text_retrieval_info().topk_offset_expr_))) { + LOG_WARN("failed to append offset expr", K(ret)); + } + return ret; +} + +int ObLogTableScan::print_text_retrieval_annotation(char *buf, int64_t buf_len, int64_t &pos, ExplainType type) +{ + int ret = OB_SUCCESS; + ObTextRetrievalInfo &tr_info = get_text_retrieval_info(); + ObMatchFunRawExpr *match_expr = tr_info.match_expr_; + ObRawExpr *pushdown_match_filter = tr_info.pushdown_match_filter_; + ObRawExpr *limit = tr_info.topk_limit_expr_; + ObRawExpr *offset = tr_info.topk_offset_expr_; + ObSEArray sort_keys; + if (OB_FAIL(BUF_PRINTF(", "))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); + } else if (OB_FAIL(BUF_PRINTF("\n "))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); + } else if (FALSE_IT(EXPLAIN_PRINT_EXPR(match_expr, type))) { + } + if (OB_SUCC(ret) && OB_NOT_NULL(pushdown_match_filter)) { + if (OB_FAIL(BUF_PRINTF(", "))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); + } else if (OB_FAIL(BUF_PRINTF("\n "))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); + } else if (FALSE_IT(EXPLAIN_PRINT_EXPR(pushdown_match_filter, type))) { + } + } + if (OB_SUCC(ret) && OB_NOT_NULL(tr_info.sort_key_.expr_)) { + if (OB_FAIL(BUF_PRINTF(", "))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); + } else if (OB_FAIL(BUF_PRINTF("\n "))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); + } else if (OB_FAIL(sort_keys.push_back(tr_info.sort_key_))) { + LOG_WARN("failed to push back order item", K(ret)); + } else if (FALSE_IT(EXPLAIN_PRINT_SORT_ITEMS(sort_keys, type))) { + } else if (OB_FAIL(BUF_PRINTF(", "))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); + } else if (FALSE_IT(EXPLAIN_PRINT_EXPR(limit, type))) { + } else if (OB_FAIL(BUF_PRINTF(", "))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); + } else if (FALSE_IT(EXPLAIN_PRINT_EXPR(offset, type))) { + } else if (OB_FAIL(BUF_PRINTF(", "))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); + } else if (OB_FAIL(BUF_PRINTF("with_ties("))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); + } else if (tr_info.with_ties_ && OB_FAIL(BUF_PRINTF("true"))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); + } else if (!tr_info.with_ties_ && OB_FAIL(BUF_PRINTF("false"))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); + } else if (OB_FAIL(BUF_PRINTF(")"))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); + } + } + return ret; +} + +int ObLogTableScan::prepare_text_retrieval_dep_exprs() +{ + int ret = OB_SUCCESS; + const ObTableSchema *table_schema; + const ObTableSchema *inv_index_schema; + ObSqlSchemaGuard *schema_guard = NULL; + TableItem *table_item = nullptr; + ObRawExprFactory *expr_factory = nullptr; + ObSQLSessionInfo *session_info = nullptr; + uint64_t token_col_id = OB_INVALID_ID; + ObColumnRefRawExpr *token_column = nullptr; + uint64_t token_cnt_col_id = OB_INVALID_ID; + ObColumnRefRawExpr *token_cnt_column = nullptr; + uint64_t doc_length_col_id = OB_INVALID_ID; + ObColumnRefRawExpr *doc_length_column = nullptr; + ObColumnRefRawExpr *doc_id_column = nullptr; + ObAggFunRawExpr *related_doc_cnt = nullptr; + ObAggFunRawExpr *total_doc_cnt = nullptr; + ObAggFunRawExpr *doc_token_cnt = nullptr; + ObOpRawExpr *relevance_expr = nullptr; + ObTextRetrievalInfo &tr_info = get_text_retrieval_info(); + if (OB_NOT_NULL(tr_info.doc_id_column_) && OB_NOT_NULL(tr_info.doc_length_column_) && + OB_NOT_NULL(tr_info.token_column_) && OB_NOT_NULL(tr_info.token_cnt_column_) && + OB_NOT_NULL(tr_info.doc_token_cnt_) && OB_NOT_NULL(tr_info.total_doc_cnt_) && + OB_NOT_NULL(tr_info.related_doc_cnt_) && OB_NOT_NULL(tr_info.relevance_expr_)) { + // do nothing, exprs already generated + } else if (OB_ISNULL(get_stmt()) || OB_ISNULL(get_plan()) || + OB_ISNULL(expr_factory = &get_plan()->get_optimizer_context().get_expr_factory()) || + OB_ISNULL(session_info = get_plan()->get_optimizer_context().get_session_info()) || + OB_ISNULL(schema_guard = get_plan()->get_optimizer_context().get_sql_schema_guard())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null pointer", K(ret)); + } else if (OB_FAIL(schema_guard->get_table_schema(get_real_ref_table_id(), table_schema))) { + LOG_WARN("failed to get table schema", K(ret)); + } else if (OB_FAIL(schema_guard->get_table_schema(tr_info.inv_idx_tid_, inv_index_schema))) { + LOG_WARN("failed to get table schema", K(ret)); + } else if (OB_ISNULL(table_item = get_stmt()->get_table_item_by_id(get_table_id()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null pointer", K(ret)); + } else { + ObRawExprCopier copier(get_plan()->get_optimizer_context().get_expr_factory()); + for (int64_t i = 0; OB_SUCC(ret) && i < inv_index_schema->get_column_count(); ++i) { + const ObColumnSchemaV2 *col_schema = inv_index_schema->get_column_schema_by_idx(i); + if (OB_ISNULL(col_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null column schema ptr", K(ret)); + } else { + const ObColumnSchemaV2 *col_schema_in_data_table = table_schema->get_column_schema(col_schema->get_column_id()); + if (OB_ISNULL(col_schema_in_data_table)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, column schema is nullptr in data table", K(ret), KPC(col_schema), KPC(table_schema)); + } else if (col_schema_in_data_table->is_doc_id_column()) { + // create doc id expr later + // Since currently, doc id column on main table schema is a special "virtual generated" column, + // which can not be calculated by its expr record on schema + // So we use its column ref expr on index table for index back / projection instead + if (OB_FAIL(ObRawExprUtils::build_column_expr(*expr_factory, *col_schema, doc_id_column))) { + LOG_WARN("failed to build doc id column expr", K(ret)); + } + } else if (col_schema_in_data_table->is_word_count_column()) { + token_cnt_col_id = col_schema->get_column_id(); + } else if (col_schema_in_data_table->is_word_segment_column()) { + token_col_id = col_schema->get_column_id(); + } else if (col_schema_in_data_table->is_doc_length_column()) { + doc_length_col_id = col_schema->get_column_id(); + } else {} + } + } + for (int64_t i = 0; OB_SUCC(ret) && i < table_schema->get_column_count(); ++i) { + const ObColumnSchemaV2 *col_schema = table_schema->get_column_schema_by_idx(i); + if (OB_ISNULL(col_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null column schema ptr", K(ret)); + } else if (col_schema->get_column_id() == token_cnt_col_id) { + if (OB_FAIL(ObRawExprUtils::build_column_expr(*expr_factory, *col_schema, token_cnt_column))) { + LOG_WARN("failed to build doc id column expr", K(ret)); + } else if (OB_NOT_NULL(token_cnt_column)) { + token_cnt_column->set_ref_id(get_table_id(), col_schema->get_column_id()); + token_cnt_column->set_column_attr(get_table_name(), col_schema->get_column_name_str()); + token_cnt_column->set_database_name(table_item->database_name_); + } + } else if (col_schema->get_column_id() == token_col_id) { + if (OB_FAIL(ObRawExprUtils::build_column_expr(*expr_factory, *col_schema, token_column))) { + LOG_WARN("failed to build doc id column expr", K(ret)); + } else if (OB_NOT_NULL(token_column)) { + token_column->set_ref_id(get_table_id(), col_schema->get_column_id()); + token_column->set_column_attr(get_table_name(), col_schema->get_column_name_str()); + token_column->set_database_name(table_item->database_name_); + } + } else if (col_schema->get_column_id() == doc_length_col_id) { + if (OB_FAIL(ObRawExprUtils::build_column_expr(*expr_factory, *col_schema, doc_length_column))) { + LOG_WARN("failed to build doc id column expr", K(ret)); + } else if (OB_NOT_NULL(doc_length_column)) { + doc_length_column->set_ref_id(get_table_id(), col_schema->get_column_id()); + doc_length_column->set_column_attr(get_table_name(), col_schema->get_column_name_str()); + doc_length_column->set_database_name(table_item->database_name_); + } + } else {} + } + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(token_cnt_column) || OB_ISNULL(token_column) || OB_ISNULL(doc_id_column) || + OB_ISNULL(doc_length_column)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null fulltext generated column", K(ret), + KP(token_cnt_column), KP(token_column), KP(doc_id_column)); + } else if (OB_FAIL(expr_factory->create_raw_expr(T_FUN_COUNT, related_doc_cnt))) { + LOG_WARN("failed to create related doc cnt agg expr", K(ret)); + } else if (OB_FAIL(related_doc_cnt->add_real_param_expr(token_cnt_column))) { + LOG_WARN("failed to set agg param", K(ret)); + } else if (OB_FAIL(related_doc_cnt->formalize(session_info))) { + LOG_WARN("failed to formalize related doc cnt expr", K(ret)); + } else if (OB_FAIL(expr_factory->create_raw_expr(T_FUN_COUNT, total_doc_cnt))) { + LOG_WARN("failed to create related doc cnt agg expr", K(ret)); + } else if (OB_FAIL(total_doc_cnt->add_real_param_expr(doc_id_column))) { + LOG_WARN("failed to set agg param", K(ret)); + } else if (OB_FAIL(total_doc_cnt->formalize(session_info))) { + LOG_WARN("failed to formalize total doc cnt expr", K(ret)); + } else if (OB_FAIL(expr_factory->create_raw_expr(T_FUN_SUM, doc_token_cnt))) { + LOG_WARN("failed to create document token count sum agg expr", K(ret)); + } else if (OB_FAIL(doc_token_cnt->add_real_param_expr(token_cnt_column))) { + LOG_WARN("failed to set agg param", K(ret)); + } else if (OB_FAIL(doc_token_cnt->formalize(session_info))) { + LOG_WARN("failed to formalize document token count expr", K(ret)); + } else if (OB_FAIL(ObRawExprUtils::build_bm25_expr(*expr_factory, related_doc_cnt, + token_cnt_column, total_doc_cnt, + doc_token_cnt, relevance_expr, + session_info))) { + LOG_WARN("failed to build bm25 expr", K(ret)); + } else if (OB_FAIL(relevance_expr->formalize(session_info))) { + LOG_WARN("failed to formalize bm25 expr", K(ret)); + // Copy column ref expr referenced by aggregation in different index table scan + // to avoid share expression + } else if (OB_FAIL(copier.copy(related_doc_cnt->get_param_expr(0)))) { + } else if (OB_FAIL(copier.copy(total_doc_cnt->get_param_expr(0)))) { + } else if (OB_FAIL(copier.copy(doc_token_cnt->get_param_expr(0)))) { + } else { + tr_info.token_column_ = token_column; + tr_info.token_cnt_column_ = token_cnt_column; + tr_info.doc_id_column_ = doc_id_column; + tr_info.doc_length_column_ = doc_length_column; + tr_info.related_doc_cnt_ = related_doc_cnt; + tr_info.doc_token_cnt_ = doc_token_cnt; + tr_info.total_doc_cnt_ = total_doc_cnt; + tr_info.relevance_expr_ = relevance_expr; + } + } + return ret; +} + int ObLogTableScan::get_card_without_filter(double &card) { int ret = OB_SUCCESS; diff --git a/src/sql/optimizer/ob_log_table_scan.h b/src/sql/optimizer/ob_log_table_scan.h index 62d0394298..c61a841c0d 100644 --- a/src/sql/optimizer/ob_log_table_scan.h +++ b/src/sql/optimizer/ob_log_table_scan.h @@ -26,6 +26,54 @@ namespace sql { class Path; +struct ObTextRetrievalInfo +{ + ObTextRetrievalInfo() + : match_expr_(NULL), + pushdown_match_filter_(NULL), + sort_key_(), + topk_limit_expr_(NULL), + topk_offset_expr_(NULL), + with_ties_(false), + inv_idx_tid_(OB_INVALID_ID), + fwd_idx_tid_(OB_INVALID_ID), + doc_id_idx_tid_(OB_INVALID_ID), + token_column_(NULL), + token_cnt_column_(NULL), + doc_id_column_(NULL), + doc_length_column_(NULL), + related_doc_cnt_(NULL), + total_doc_cnt_(NULL), + doc_token_cnt_(NULL), + relevance_expr_(NULL) + { } + ~ObTextRetrievalInfo() {} + + TO_STRING_KV(K_(match_expr), K_(pushdown_match_filter), K_(sort_key), K_(topk_limit_expr), + K_(topk_offset_expr), K_(with_ties), K_(inv_idx_tid), K_(fwd_idx_tid), K_(doc_id_idx_tid)); + + bool need_sort() const { return sort_key_.expr_ != nullptr; } + + ObMatchFunRawExpr *match_expr_; + ObRawExpr *pushdown_match_filter_; + OrderItem sort_key_; // for pushdown topk, only support match expr as sort expr + ObRawExpr *topk_limit_expr_; + ObRawExpr *topk_offset_expr_; + bool with_ties_; + uint64_t inv_idx_tid_; // choosed aux inverted index table id (word-doc) + uint64_t fwd_idx_tid_; // choosed aux forward index table id (doc-word) + uint64_t doc_id_idx_tid_; // choosed aux doc_id index table id (doc-rowkey) + // the following exprs are used for intermediate calculation of relevance score + ObColumnRefRawExpr *token_column_; + ObColumnRefRawExpr *token_cnt_column_; + ObColumnRefRawExpr *doc_id_column_; + ObColumnRefRawExpr *doc_length_column_; + ObAggFunRawExpr *related_doc_cnt_; // count(token_cnt_column) + ObAggFunRawExpr *total_doc_cnt_; // count(doc_id_column) + ObAggFunRawExpr *doc_token_cnt_; // sum(token_cnt_column) + ObRawExpr *relevance_expr_; // BM25 +}; + class ObLogTableScan : public ObLogicalOperator { public: @@ -38,6 +86,7 @@ public: advisor_table_id_(OB_INVALID_ID), is_index_global_(false), is_spatial_index_(false), + is_multivalue_index_(false), use_das_(false), index_back_(false), is_multi_part_table_scan_(false), @@ -82,7 +131,9 @@ public: has_index_scan_filter_(false), has_index_lookup_filter_(false), table_type_(share::schema::MAX_TABLE_TYPE), - use_column_store_(false) + use_column_store_(false), + doc_id_table_id_(common::OB_INVALID_ID), + text_retrieval_info_() { } @@ -204,6 +255,15 @@ public: inline bool get_is_spatial_index() const { return is_spatial_index_; } + /* + * set is multivalue index + */ + inline void set_is_multivalue_index(bool is_multivalue_index) + { is_multivalue_index_ = is_multivalue_index; } + + inline bool get_is_multivalue_index() const + { return is_multivalue_index_; } + /** * Set scan direction */ @@ -292,6 +352,7 @@ public: inline common::ObIArray &get_pushdown_groupby_columns() { return pushdown_groupby_columns_; } inline const common::ObIArray &get_pushdown_groupby_columns() const { return pushdown_groupby_columns_; } + inline const common::ObIArray &get_domain_exprs() const { return domain_exprs_; } /** * Generate the filtering expressions @@ -474,7 +535,7 @@ public: int extract_pushdown_filters(ObIArray &nonpushdown_filters, ObIArray &scan_pushdown_filters, ObIArray &lookup_pushdown_filters, - bool ignore_pd_filter = false); + bool ignore_pd_filter = false) const; int has_nonpushdown_filter(bool &has_npd_filter); int replace_index_back_pushdown_filters(ObRawExprReplacer &replacer); int extract_virtual_gen_access_exprs(ObIArray &access_exprs, @@ -482,6 +543,16 @@ public: int adjust_print_access_info(ObIArray &access_exprs); static int replace_gen_column(ObLogPlan *plan, ObRawExpr *part_expr, ObRawExpr *&new_part_expr); int extract_file_column_exprs_recursively(ObRawExpr *expr); + inline bool is_text_retrieval_scan() const { return is_index_scan() && NULL != text_retrieval_info_.match_expr_; } + inline bool is_multivalue_index_scan() const { return is_multivalue_index_; } + inline ObTextRetrievalInfo &get_text_retrieval_info() { return text_retrieval_info_; } + inline const ObTextRetrievalInfo &get_text_retrieval_info() const { return text_retrieval_info_; } + int prepare_text_retrieval_dep_exprs(); + // jinmao TODO: 之后要判断这个标,一期统一设置为 true + inline bool need_text_retrieval_calc_relevance() const { return true; } + inline bool need_doc_id_index_back() const { return is_text_retrieval_scan() || is_multivalue_index_scan() ; } + inline void set_doc_id_index_table_id(const uint64_t doc_id_index_table_id) { doc_id_table_id_ = doc_id_index_table_id; } + inline uint64_t get_doc_id_index_table_id() const { return doc_id_table_id_; } virtual int get_card_without_filter(double &card) override; inline ObLogSet *get_rcte_op() { return rcte_op_; } inline ObRawExpr *get_identify_seq_expr() { return identify_seq_expr_; } @@ -501,6 +572,10 @@ private: // member functions int add_mapping_columns_for_vt(ObIArray &access_exprs); int get_mbr_column_exprs(const uint64_t table_id, ObIArray &mbr_exprs); int allocate_lookup_trans_info_expr(); + int extract_doc_id_index_back_expr(ObIArray &exprs); + int extract_text_retrieval_access_expr(ObIArray &exprs); + int get_text_retrieval_calc_exprs(ObIArray &all_exprs); + int print_text_retrieval_annotation(char *buf, int64_t buf_len, int64_t &pos, ExplainType type); int find_nearest_rcte_op(ObLogSet *&rcte_op); protected: // memeber variables // basic info @@ -511,6 +586,7 @@ protected: // memeber variables uint64_t advisor_table_id_; // used for duplicate table replica selection in the plan cache bool is_index_global_; bool is_spatial_index_; + bool is_multivalue_index_; // TODO yuming: tells whether the table scan uses shared data access or not // mainly designed for code generator bool use_das_; @@ -538,6 +614,8 @@ protected: // memeber variables common::ObSEArray rowkey_exprs_; common::ObSEArray part_exprs_; common::ObSEArray spatial_exprs_; + // columns required for accessing a domain index (fulltext and JSON multi-value index) + common::ObSEArray domain_exprs_; //for external table common::ObSEArray ext_file_column_exprs_; common::ObSEArray ext_column_convert_exprs_; @@ -611,6 +689,8 @@ protected: // memeber variables share::schema::ObTableType table_type_; bool use_column_store_; + uint64_t doc_id_table_id_; // used for rowkey lookup of fulltext and JSON multi-value index + ObTextRetrievalInfo text_retrieval_info_; ObPxRFStaticInfo px_rf_info_; // disallow copy and assign diff --git a/src/sql/optimizer/ob_logical_operator.cpp b/src/sql/optimizer/ob_logical_operator.cpp index 0208b323ce..373749a144 100644 --- a/src/sql/optimizer/ob_logical_operator.cpp +++ b/src/sql/optimizer/ob_logical_operator.cpp @@ -2145,7 +2145,8 @@ int ObLogicalOperator::extract_shared_exprs(ObRawExpr *raw_expr, LOG_WARN("failed to add var to array", K(ret)); } - if (!ObOptimizerUtil::find_item(ctx.inseparable_exprs_, raw_expr)) { + if (!ObOptimizerUtil::find_item(ctx.inseparable_exprs_, raw_expr) && + !raw_expr->is_match_against_expr()) { for (int64_t i = 0; OB_SUCC(ret) && i < raw_expr->get_param_count(); ++i) { ret = SMART_CALL(extract_shared_exprs(raw_expr->get_param_expr(i), ctx, @@ -4308,18 +4309,22 @@ int ObLogicalOperator::allocate_granule_nodes_above(AllocGIContext &ctx) gi_op->add_flag(GI_AFFINITIZE); gi_op->add_flag(GI_PARTITION_WISE); } - if (LOG_TABLE_SCAN == get_type() && - static_cast(this)->get_join_filter_info().is_inited_) { - ObLogTableScan *table_scan = static_cast(this); - ObOpPseudoColumnRawExpr *tablet_id_expr = NULL; - if (OB_FAIL(generate_pseudo_partition_id_expr(tablet_id_expr))) { - LOG_WARN("fail alloc partition id expr", K(ret)); - } else { - gi_op->set_tablet_id_expr(tablet_id_expr); - gi_op->set_join_filter_info(table_scan->get_join_filter_info()); - ObLogJoinFilter *jf_create_op = gi_op->get_join_filter_info().log_join_filter_create_op_; - jf_create_op->set_paired_join_filter(gi_op); - gi_op->add_flag(GI_USE_PARTITION_FILTER); + if (LOG_TABLE_SCAN == get_type()) { + if (static_cast(this)->is_text_retrieval_scan()) { + gi_op->add_flag(GI_FORCE_PARTITION_GRANULE); + } + if (static_cast(this)->get_join_filter_info().is_inited_) { + ObLogTableScan *table_scan = static_cast(this); + ObOpPseudoColumnRawExpr *tablet_id_expr = NULL; + if (OB_FAIL(generate_pseudo_partition_id_expr(tablet_id_expr))) { + LOG_WARN("fail alloc partition id expr", K(ret)); + } else { + gi_op->set_tablet_id_expr(tablet_id_expr); + gi_op->set_join_filter_info(table_scan->get_join_filter_info()); + ObLogJoinFilter *jf_create_op = gi_op->get_join_filter_info().log_join_filter_create_op_; + jf_create_op->set_paired_join_filter(gi_op); + gi_op->add_flag(GI_USE_PARTITION_FILTER); + } } } else if (LOG_GROUP_BY == get_type()) { if (static_cast(this)->force_partition_gi()) { diff --git a/src/sql/optimizer/ob_opt_est_cost_model.cpp b/src/sql/optimizer/ob_opt_est_cost_model.cpp index ac13e39eeb..bed551b3e1 100644 --- a/src/sql/optimizer/ob_opt_est_cost_model.cpp +++ b/src/sql/optimizer/ob_opt_est_cost_model.cpp @@ -1564,6 +1564,37 @@ int ObOptEstCostModel::cost_row_store_index_scan(const ObCostTableScanInfo &est_ double spatial_cost = row_count * cost_params_.get_spatial_per_row_cost(sys_stat_); index_scan_cost += spatial_cost; LOG_TRACE("OPT::[COST SPATIAL INDEX SCAN]", K(spatial_cost), K(ret)); + } else if (est_cost_info.index_meta_info_.is_fulltext_index_) { + // 全文索引一期:对于每一个 token,都需要: + // 1. 以 [token, token] 为 range 扫描 inv_index 两次,计算一个聚合函数; + // 2. 全表扫描 doc_id_rowkey_index, 计算一个聚合函数; + // 3. 用过滤后的 doc_id 对 doc_id_rowkey_index 做回表 + int token_count = 1; // 此处先假设 search query 只有一个 token,后续要调整 + double token_sel = DEFAULT_SEL; + double inv_index_range_scan_cost = 0; + double doc_id_full_scan_cost = 0; + double doc_id_index_back_cost = 0; + if (OB_FAIL(cost_range_scan(est_cost_info, + true, + row_count * token_sel, + inv_index_range_scan_cost))) { + LOG_WARN("Failed to estimate scan cost", K(ret)); + } else if (OB_FAIL(cost_range_scan(est_cost_info, + true, + row_count, + doc_id_full_scan_cost))) { + LOG_WARN("Failed to estimate scan cost", K(ret)); + } else if (OB_FAIL(cost_range_get(est_cost_info, + true, + row_count * token_sel, + doc_id_index_back_cost))) { + LOG_WARN("Failed to estimate get cost", K(ret)); + } + double aggregation_cost = (row_count * token_sel + row_count) * cost_params_.get_per_aggr_func_cost(sys_stat_); + double fulltext_scan_cost = 2 * inv_index_range_scan_cost + doc_id_full_scan_cost + + aggregation_cost + doc_id_index_back_cost; + index_scan_cost = token_count * fulltext_scan_cost; + LOG_TRACE("OPT::[COST FULLTEXT INDEX SCAN]", K(fulltext_scan_cost), K(ret)); } //add index skip scan cost if (OB_FAIL(ret)) { diff --git a/src/sql/optimizer/ob_opt_est_cost_model.h b/src/sql/optimizer/ob_opt_est_cost_model.h index 3d04734d68..ccd74a4142 100644 --- a/src/sql/optimizer/ob_opt_est_cost_model.h +++ b/src/sql/optimizer/ob_opt_est_cost_model.h @@ -108,6 +108,8 @@ struct ObIndexMetaInfo is_unique_index_(false), is_global_index_(false), is_geo_index_(false), + is_fulltext_index_(false), + is_multivalue_index_(false), index_micro_block_count_(-1) { } virtual ~ObIndexMetaInfo() @@ -117,7 +119,7 @@ struct ObIndexMetaInfo TO_STRING_KV(K_(ref_table_id), K_(index_id), K_(index_micro_block_size), K_(index_part_count), K_(index_part_size), K_(index_column_count), K_(is_index_back), - K_(is_unique_index), K_(index_micro_block_count)); + K_(is_unique_index), K_(is_fulltext_index), K_(index_micro_block_count)); uint64_t ref_table_id_; // ref table id uint64_t index_id_; // index id int64_t index_micro_block_size_; //index micro block size, same as main table when path is primary @@ -128,6 +130,8 @@ struct ObIndexMetaInfo bool is_unique_index_; // is unique index bool is_global_index_; // whether is global index bool is_geo_index_; // whether is spatial index + bool is_fulltext_index_; // is fulltext index + bool is_multivalue_index_; // is multivalue index int64_t index_micro_block_count_; // micro block count from table static info private: DISALLOW_COPY_AND_ASSIGN(ObIndexMetaInfo); diff --git a/src/sql/optimizer/ob_optimizer_util.cpp b/src/sql/optimizer/ob_optimizer_util.cpp index 4b98096804..ad251dcd07 100644 --- a/src/sql/optimizer/ob_optimizer_util.cpp +++ b/src/sql/optimizer/ob_optimizer_util.cpp @@ -8316,6 +8316,10 @@ int ObOptimizerUtil::check_filter_before_indexback(const ObIArray &f if (OB_FAIL(filter_before_index_back.push_back(false))) { LOG_WARN("failed to push back expr", K(ret)); } else { /*do nothing*/ } + } else if (expr->has_flag(CNT_MATCH_EXPR)) { + if (OB_FAIL(filter_before_index_back.push_back(false))) { + LOG_WARN("failed to push back expr", K(ret)); + } } else if (OB_FAIL(ObRawExprUtils::extract_column_ids(expr, filter_ids))) { LOG_WARN("failed to extract column ids", K(ret)); } else { diff --git a/src/sql/optimizer/ob_raw_expr_add_to_context.cpp b/src/sql/optimizer/ob_raw_expr_add_to_context.cpp index 5c89479e06..c6836e01ef 100644 --- a/src/sql/optimizer/ob_raw_expr_add_to_context.cpp +++ b/src/sql/optimizer/ob_raw_expr_add_to_context.cpp @@ -116,6 +116,11 @@ int ObRawExprAddToContext::visit(ObPseudoColumnRawExpr &expr) return add_expr(expr); } +int ObRawExprAddToContext::visit(ObMatchFunRawExpr &expr) +{ + return add_expr(expr); +} + /** * TODO(jiuman): the complexity of the algorithm used in add_expr is quite high. * We may need to revisit it later if it turns out to be an optimization we have * to do. diff --git a/src/sql/optimizer/ob_raw_expr_add_to_context.h b/src/sql/optimizer/ob_raw_expr_add_to_context.h index 8e847dbf2d..6c7b0bf6e4 100644 --- a/src/sql/optimizer/ob_raw_expr_add_to_context.h +++ b/src/sql/optimizer/ob_raw_expr_add_to_context.h @@ -49,6 +49,7 @@ namespace sql { virtual int visit(ObWinFunRawExpr &expr); virtual int visit(ObPseudoColumnRawExpr &expr); virtual int visit(ObPlQueryRefRawExpr &expr); + virtual int visit(ObMatchFunRawExpr &expr); private: int add_expr(ObRawExpr &expr); // types and constants diff --git a/src/sql/optimizer/ob_raw_expr_check_dep.cpp b/src/sql/optimizer/ob_raw_expr_check_dep.cpp index 64d9111561..90cf6aeb62 100644 --- a/src/sql/optimizer/ob_raw_expr_check_dep.cpp +++ b/src/sql/optimizer/ob_raw_expr_check_dep.cpp @@ -55,7 +55,8 @@ int ObRawExprCheckDep::check(const ObRawExpr &expr) case ObRawExpr::EXPR_CONST: case ObRawExpr::EXPR_EXEC_PARAM: case ObRawExpr::EXPR_PSEUDO_COLUMN: - case ObRawExpr::EXPR_OP_PSEUDO_COLUMN: { + case ObRawExpr::EXPR_OP_PSEUDO_COLUMN: + case ObRawExpr::EXPR_MATCH_AGAINST: { if (OB_FAIL(check_expr(expr, found))) { LOG_WARN("failed to check expr", K(expr), K(ret)); } diff --git a/src/sql/optimizer/ob_raw_expr_get_hash_value.cpp b/src/sql/optimizer/ob_raw_expr_get_hash_value.cpp index c948912a02..2eb638ecbe 100644 --- a/src/sql/optimizer/ob_raw_expr_get_hash_value.cpp +++ b/src/sql/optimizer/ob_raw_expr_get_hash_value.cpp @@ -83,3 +83,9 @@ int ObRawExprGetHashValue::visit(ObSetOpRawExpr &expr) seed_ = expr.hash(seed_); return OB_SUCCESS; } + +int ObRawExprGetHashValue::visit(ObMatchFunRawExpr &expr) +{ + seed_ = expr.hash(seed_); + return OB_SUCCESS; +} diff --git a/src/sql/optimizer/ob_raw_expr_get_hash_value.h b/src/sql/optimizer/ob_raw_expr_get_hash_value.h index 3f67d847c0..ab92bb2075 100644 --- a/src/sql/optimizer/ob_raw_expr_get_hash_value.h +++ b/src/sql/optimizer/ob_raw_expr_get_hash_value.h @@ -45,6 +45,7 @@ namespace sql { virtual int visit(ObSysFunRawExpr &expr); virtual int visit(ObSetOpRawExpr &expr); virtual int visit(ObPlQueryRefRawExpr &expr); + virtual int visit(ObMatchFunRawExpr &expr); private: int add_expr(ObRawExpr &expr); diff --git a/src/sql/parser/non_reserved_keywords_mysql_mode.c b/src/sql/parser/non_reserved_keywords_mysql_mode.c index d5398e20b7..38beb9f8f9 100644 --- a/src/sql/parser/non_reserved_keywords_mysql_mode.c +++ b/src/sql/parser/non_reserved_keywords_mysql_mode.c @@ -38,6 +38,7 @@ static const NonReservedKeyword Mysql_none_reserved_keywords[] = {"aggregate", AGGREGATE}, {"algorithm", ALGORITHM}, {"all", ALL}, + {"allow", ALLOW}, {"all_meta", ALL_META}, {"all_user", ALL_USER}, {"alter", ALTER}, @@ -51,10 +52,12 @@ static const NonReservedKeyword Mysql_none_reserved_keywords[] = {"approx_count_distinct_synopsis_merge", APPROX_COUNT_DISTINCT_SYNOPSIS_MERGE}, {"arbitration", ARBITRATION}, {"archivelog", ARCHIVELOG}, + {"array", ARRAY}, {"as", AS}, {"asc", ASC}, {"asensitive", ASENSITIVE}, {"ascii", ASCII}, + {"asis", ASIS}, {"asynchronous", ASYNCHRONOUS}, {"at", AT}, {"authors", AUTHORS}, @@ -147,6 +150,7 @@ static const NonReservedKeyword Mysql_none_reserved_keywords[] = {"concurrent", CONCURRENT}, {"condensed", CONDENSED}, {"condition", CONDITION}, + {"conditional", CONDITIONAL}, {"connection", CONNECTION}, {"consistent", CONSISTENT}, {"constraint", CONSTRAINT}, @@ -218,6 +222,7 @@ static const NonReservedKeyword Mysql_none_reserved_keywords[] = {"deterministic", DETERMINISTIC}, {"dense_rank", DENSE_RANK}, {"diagnostics", DIAGNOSTICS}, + {"disallow", DISALLOW}, {"disconnect", DISCONNECT}, {"directory", DIRECTORY}, {"disable", DISABLE}, @@ -228,6 +233,7 @@ static const NonReservedKeyword Mysql_none_reserved_keywords[] = {"distinctrow", DISTINCT}, {"div", DIV}, {"do", DO}, + {"dot", DOT}, {"double", DOUBLE}, {"drop", DROP}, {"dual", DUAL}, @@ -400,6 +406,7 @@ static const NonReservedKeyword Mysql_none_reserved_keywords[] = {"json", JSON}, {"json_arrayagg", JSON_ARRAYAGG}, {"json_objectagg", JSON_OBJECTAGG}, + {"json_query", JSON_QUERY}, {"json_value", JSON_VALUE}, {"json_table", JSON_TABLE}, {"key", KEY}, @@ -491,6 +498,7 @@ static const NonReservedKeyword Mysql_none_reserved_keywords[] = {"maximize", MAXIMIZE}, {"max_connections_per_hour", MAX_CONNECTIONS_PER_HOUR}, {"max_cpu", MAX_CPU}, + {"mismatch", MISMATCH}, {"max_file_size", MAX_FILE_SIZE}, {"log_disk_size", LOG_DISK_SIZE}, {"max_iops", MAX_IOPS}, @@ -536,6 +544,7 @@ static const NonReservedKeyword Mysql_none_reserved_keywords[] = {"multilinestring", MULTILINESTRING}, {"multipoint", MULTIPOINT}, {"multipolygon", MULTIPOLYGON}, + {"multivalue", MULTIVALUE}, {"mutex", MUTEX}, {"mysql_errno", MYSQL_ERRNO}, {"my_name", MY_NAME}, @@ -574,6 +583,7 @@ static const NonReservedKeyword Mysql_none_reserved_keywords[] = {"numeric", DECIMAL}, {"number", NUMBER}, {"nvarchar", NVARCHAR}, + {"object", OBJECT}, {"occur", OCCUR}, {"ntile", NTILE}, {"nth_value", NTH_VALUE}, @@ -773,6 +783,7 @@ static const NonReservedKeyword Mysql_none_reserved_keywords[] = {"statements", STATEMENTS}, {"statistics", STATISTICS}, {"binding", BINDING}, + {"scalars", SCALARS}, {"sharding", SHARDING}, {"schema", SCHEMA}, {"schemas", SCHEMAS}, @@ -925,6 +936,7 @@ static const NonReservedKeyword Mysql_none_reserved_keywords[] = {"to", TO}, {"top_k_fre_hist", TOP_K_FRE_HIST}, {"uncommitted", UNCOMMITTED}, + {"unconditional", UNCONDITIONAL}, {"undefined", UNDEFINED}, {"undo", UNDO}, {"undo_buffer_size", UNDO_BUFFER_SIZE}, @@ -982,6 +994,7 @@ static const NonReservedKeyword Mysql_none_reserved_keywords[] = {"weak", WEAK}, {"week", WEEK}, {"weight_string", WEIGHT_STRING}, + {"without", WITHOUT}, {"where", WHERE}, {"when", WHEN}, {"whenever", WHENEVER}, diff --git a/src/sql/parser/sql_parser_mysql_mode.y b/src/sql/parser/sql_parser_mysql_mode.y index b6a0b1cdb1..7e67d52def 100644 --- a/src/sql/parser/sql_parser_mysql_mode.y +++ b/src/sql/parser/sql_parser_mysql_mode.y @@ -121,7 +121,7 @@ extern void obsql_oracle_parse_fatal_error(int32_t errcode, yyscan_t yyscanner, %left '|' %left '&' %left SHIFT_LEFT SHIFT_RIGHT -%left JSON_EXTRACT JSON_EXTRACT_UNQUOTED MEMBER +%left JSON_EXTRACT JSON_EXTRACT_UNQUOTED MEMBER %left '+' '-' %left '*' '/' '%' MOD DIV POW %left '^' @@ -260,9 +260,9 @@ END_P SET_VAR DELIMITER //-----------------------------reserved keyword end------------------------------------------------- %token //-----------------------------non_reserved keyword begin------------------------------------------- - ACCESS ACCOUNT ACTION ACTIVE ADDDATE AFTER AGAINST AGGREGATE ALGORITHM ALL_META ALL_USER ALWAYS ANALYSE ANY + ACCESS ACCOUNT ACTION ACTIVE ADDDATE AFTER AGAINST AGGREGATE ALGORITHM ALL_META ALL_USER ALWAYS ALLOW ANALYSE ANY APPROX_COUNT_DISTINCT APPROX_COUNT_DISTINCT_SYNOPSIS APPROX_COUNT_DISTINCT_SYNOPSIS_MERGE - ARBITRATION ASCII AT AUTHORS AUTO AUTOEXTEND_SIZE AUTO_INCREMENT AUTO_INCREMENT_MODE AVG AVG_ROW_LENGTH + ARBITRATION ARRAY ASCII ASIS AT AUTHORS AUTO AUTOEXTEND_SIZE AUTO_INCREMENT AUTO_INCREMENT_MODE AVG AVG_ROW_LENGTH ACTIVATE AVAILABILITY ARCHIVELOG ASYNCHRONOUS AUDIT ADMIN BACKUP BACKUP_COPIES BALANCE BANDWIDTH BASE BASELINE BASELINE_ID BASIC BEGI BINDING SHARDING BINLOG BIT BIT_AND @@ -273,13 +273,13 @@ END_P SET_VAR DELIMITER CACHE CALIBRATION CALIBRATION_INFO CANCEL CASCADED CAST CATALOG_NAME CHAIN CHANGED CHARSET CHECKSUM CHECKPOINT CHUNK CIPHER CLASS_ORIGIN CLEAN CLEAR CLIENT CLONE CLOG CLOSE CLUSTER CLUSTER_ID CLUSTER_NAME COALESCE COLUMN_STAT CODE COLLATION COLUMN_FORMAT COLUMN_NAME COLUMNS COMMENT COMMIT COMMITTED COMPACT COMPLETION COMPLETE - COMPRESSED COMPRESSION COMPUTATION COMPUTE CONCURRENT CONDENSED CONNECTION CONSISTENT CONSISTENT_MODE CONSTRAINT_CATALOG + COMPRESSED COMPRESSION COMPUTATION COMPUTE CONCURRENT CONDENSED CONDITIONAL CONNECTION CONSISTENT CONSISTENT_MODE CONSTRAINT_CATALOG CONSTRAINT_NAME CONSTRAINT_SCHEMA CONTAINS CONTEXT CONTRIBUTORS COPY COUNT CPU CREATE_TIMESTAMP CTXCAT CTX_ID CUBE CURDATE CURRENT STACKED CURTIME CURSOR_NAME CUME_DIST CYCLE CALC_PARTITION_ID CONNECT DAG DATA DATAFILE DATA_TABLE_ID DATE DATE_ADD DATE_SUB DATETIME DAY DEALLOCATE DECRYPTION DEFAULT_AUTH DEFAULT_LOB_INROW_THRESHOLD DEFINER DELAY DELAY_KEY_WRITE DEPTH DES_KEY_FILE DENSE_RANK DESCRIPTION DESTINATION DIAGNOSTICS - DIRECTORY DISABLE DISCARD DISK DISKGROUP DO DUMP DUMPFILE DUPLICATE DUPLICATE_SCOPE DYNAMIC + DIRECTORY DISABLE DISALLOW DISCARD DISK DISKGROUP DO DOT DUMP DUMPFILE DUPLICATE DUPLICATE_SCOPE DYNAMIC DATABASE_ID DEFAULT_TABLEGROUP DISCONNECT DEMAND EFFECTIVE EMPTY ENABLE ENABLE_ARBITRATION_SERVICE ENABLE_EXTENDED_ROWID ENCRYPTED ENCRYPTION END ENDS ENFORCED ENGINE_ ENGINES ENUM ENTITY ERROR_CODE ERROR_P ERRORS ESTIMATE @@ -299,7 +299,7 @@ END_P SET_VAR DELIMITER INNODB INSERT_METHOD INSTALL INSTANCE INVOKER IO IOPS_WEIGHT IO_THREAD IPC ISOLATE ISOLATION ISSUER INCREMENT IS_TENANT_SYS_POOL INVISIBLE MERGE ISNULL INTERSECT INCREMENTAL INNER_PARSE ILOGCACHE INPUT INDEXED - JOB JSON JSON_ARRAYAGG JSON_OBJECTAGG JSON_VALUE JSON_TABLE + JOB JSON JSON_ARRAYAGG JSON_OBJECTAGG JSON_QUERY JSON_VALUE JSON_TABLE KEY_BLOCK_SIZE KEY_VERSION KVCACHE KV_ATTRIBUTES @@ -315,21 +315,21 @@ END_P SET_VAR DELIMITER MASTER_SSL_CRL MASTER_SSL_CRLPATH MASTER_SSL_KEY MASTER_USER MAX MAX_CONNECTIONS_PER_HOUR MAX_CPU MAX_FILE_SIZE LOG_DISK_SIZE MAX_IOPS MEMORY_SIZE MAX_QUERIES_PER_HOUR MAX_ROWS MAX_SIZE MAX_UPDATES_PER_HOUR MAX_USER_CONNECTIONS MEDIUM MEMORY MEMTABLE MESSAGE_TEXT META MICROSECOND - MIGRATE MIN MIN_CPU MIN_IOPS MIN_MAX MINOR MIN_ROWS MINUS MINUTE MODE MODIFY MONTH MOVE - MULTILINESTRING MULTIPOINT MULTIPOLYGON MUTEX MYSQL_ERRNO MIGRATION MAX_USED_PART_ID MAXIMIZE + MIGRATE MIN MIN_CPU MIN_IOPS MIN_MAX MINOR MIN_ROWS MINUS MINUTE MISMATCH MODE MODIFY MONTH MOVE + MULTILINESTRING MULTIPOINT MULTIPOLYGON MULTIVALUE MUTEX MYSQL_ERRNO MIGRATION MAX_USED_PART_ID MAXIMIZE MATERIALIZED MEMBER MEMSTORE_PERCENT MINVALUE MY_NAME NAME NAMES NAMESPACE NATIONAL NCHAR NDB NDBCLUSTER NESTED NEW NEXT NO NOAUDIT NODEGROUP NONE NORMAL NOW NOWAIT NEVER NOMINVALUE NOMAXVALUE NOORDER NOCYCLE NOCACHE NO_WAIT NULLS NUMBER NVARCHAR NTILE NTH_VALUE NOARCHIVELOG NETWORK NOPARALLEL NULL_IF_EXETERNAL - OBSOLETE OCCUR OF OFF OFFSET OLD OLD_PASSWORD ONE ONE_SHOT ONLY OPEN OPTIONS ORDINALITY ORIG_DEFAULT OWNER OLD_KEY OVER + OBSOLETE OBJECT OCCUR OF OFF OFFSET OLD OLD_PASSWORD ONE ONE_SHOT ONLY OPEN OPTIONS ORDINALITY ORIG_DEFAULT OWNER OLD_KEY OVER OBCONFIG_URL OJ OBJECT_ID PACK_KEYS PAGE PARALLEL PARAMETERS PARSER PARTIAL PARTITION_ID PARTITIONING PARTITIONS PASSWORD PATH PAUSE PERCENTAGE PERCENT_RANK PHASE PLAN PHYSICAL PLANREGRESS PLUGIN PLUGIN_DIR PLUGINS POINT POLYGON PERFORMANCE - PROTECTION OBJECT PRIORITY PL POLICY POOL PORT POSITION PREPARE PRESERVE PRETTY PRETTY_COLOR PREV PRIMARY_ZONE PRIVILEGES PROCESS + PROTECTION PRIORITY PL POLICY POOL PORT POSITION PREPARE PRESERVE PRETTY PRETTY_COLOR PREV PRIMARY_ZONE PRIVILEGES PROCESS PROCESSLIST PROFILE PROFILES PROXY PRECEDING PCTFREE P_ENTITY P_CHUNK PUBLIC PROGRESSIVE_MERGE_NUM PREVIEW PS PLUS PATTERN PARTITION_TYPE @@ -343,7 +343,7 @@ END_P SET_VAR DELIMITER RECYCLEBIN ROTATE ROW_NUMBER RUDUNDANT RECURSIVE RANDOM REDO_TRANSPORT_OPTIONS REMOTE_OSS RT RANK READ_ONLY RECOVERY REJECT ROLE - SAMPLE SAVEPOINT SCHEDULE SCHEMA_NAME SCN SCOPE SECOND SECURITY SEED SEQUENCES SERIAL SERIALIZABLE SERVER + SAMPLE SAVEPOINT SCALARS SCHEDULE SCHEMA_NAME SCN SCOPE SECOND SECURITY SEED SEQUENCES SERIAL SERIALIZABLE SERVER SERVER_IP SERVER_PORT SERVER_TYPE SERVICE SESSION SESSION_USER SET_MASTER_CLUSTER SET_SLAVE_CLUSTER SET_TP SHARE SHUTDOWN SIGNED SIMPLE SINGLE SKIP_INDEX SLAVE SLOW SLOT_IDX SNAPSHOT SOCKET SOME SONAME SOUNDS SOURCE SPFILE SPLIT SQL_AFTER_GTIDS SQL_AFTER_MTS_GAPS SQL_BEFORE_GTIDS SQL_BUFFER_RESULT @@ -361,7 +361,7 @@ END_P SET_VAR DELIMITER TABLEGROUP_ID TENANT_ID THROTTLE TIME_ZONE_INFO TOP_K_FRE_HIST TIMES TRIM_SPACE TTL TRANSFER - UNCOMMITTED UNDEFINED UNDO_BUFFER_SIZE UNDOFILE UNICODE UNINSTALL UNIT UNIT_GROUP UNIT_NUM UNLOCKED UNTIL + UNCOMMITTED UNCONDITIONAL UNDEFINED UNDO_BUFFER_SIZE UNDOFILE UNICODE UNINSTALL UNIT UNIT_GROUP UNIT_NUM UNLOCKED UNTIL UNUSUAL UPGRADE USE_BLOOM_FILTER UNKNOWN USE_FRM USER USER_RESOURCES UNBOUNDED UP UNLIMITED USER_SPECIFIED VALID VALUE VARIANCE VARIABLES VERBOSE VERIFY VIEW VISIBLE VIRTUAL_COLUMN_ID VALIDATE VAR_POP @@ -453,7 +453,7 @@ END_P SET_VAR DELIMITER %type revoke_stmt opt_with_admin_option opt_ignore_unknown_user set_role_stmt default_set_role_clause set_role_clause %type opt_limit opt_for_grant_user opt_using_role %type parameterized_trim -%type opt_with_consistent_snapshot opt_config_scope opt_index_keyname opt_full opt_extended opt_extended_or_full +%type opt_with_consistent_snapshot opt_config_scope opt_index_keyname opt_full opt_mode_flag opt_extended opt_extended_or_full %type opt_work begin_stmt commit_stmt rollback_stmt opt_ignore xa_begin_stmt xa_end_stmt xa_prepare_stmt xa_commit_stmt xa_rollback_stmt %type alter_table_stmt alter_table_actions alter_table_action_list alter_table_action alter_column_option alter_index_option alter_constraint_option standalone_alter_action alter_partition_option opt_to alter_tablegroup_option opt_table opt_tablegroup_option_list alter_tg_partition_option alter_column_group_option %type tablegroup_option_list tablegroup_option alter_tablegroup_actions alter_tablegroup_action tablegroup_option_list_space_seperated @@ -524,6 +524,7 @@ END_P SET_VAR DELIMITER %type opt_recover_tenant recover_table_list recover_table_relation_name restore_remap_list remap_relation_name table_relation_name opt_recover_remap_item_list restore_remap_item_list restore_remap_item remap_item remap_table_val opt_tenant %type new_or_old new_or_old_column_ref diagnostics_info_ref %type on_empty on_error json_on_response opt_returning_type opt_on_empty_or_error json_value_expr opt_ascii opt_truncate_clause +%type json_extract_unquote_expr json_extract_expr json_query_expr opt_multivalue opt_asis opt_array opt_pretty opt_wrapper opt_scalars opt_query_on_error_or_empty_or_mismatch on_empty_query on_error_query on_mismatch_query opt_response_query %type ws_nweights opt_ws_as_char opt_ws_levels ws_level_flag_desc ws_level_flag_reverse ws_level_flags ws_level_list ws_level_list_item ws_level_number ws_level_range ws_level_list_or_range %type get_diagnostics_stmt get_statement_diagnostics_stmt get_condition_diagnostics_stmt statement_information_item_list condition_information_item_list statement_information_item condition_information_item statement_information_item_name condition_information_item_name condition_arg %type method_opt method_list method extension mvt_param @@ -1668,6 +1669,13 @@ simple_expr collation %prec NEG } malloc_non_terminal_node($$, result->malloc_pool_, T_OP_EXISTS, 1, $2); } +| MATCH '(' column_list ')' AGAINST '(' expr_const opt_mode_flag ')' +{ + ParseNode *column_list_node = NULL; + merge_nodes(column_list_node, result, T_MATCH_COLUMN_LIST, $3); + malloc_non_terminal_node($$, result->malloc_pool_, T_FUN_MATCH_AGAINST, 2, column_list_node, $7); + $$->value_ = $8[0]; +} | case_expr { $$ = $1; @@ -1690,32 +1698,7 @@ simple_expr collation %prec NEG malloc_non_terminal_node($$, result->malloc_pool_, T_OP_GET_USER_VAR, 1, $1); } ; -| column_definition_ref JSON_EXTRACT complex_string_literal -{ - ParseNode *json_extract_node = NULL; - make_name_node(json_extract_node, result->malloc_pool_, "JSON_EXTRACT"); - ParseNode *link_params = NULL; - malloc_non_terminal_node(link_params, result->malloc_pool_, T_LINK_NODE, 2, $1, $3); - ParseNode *params = NULL; - merge_nodes(params, result, T_EXPR_LIST, link_params); - malloc_non_terminal_node($$, result->malloc_pool_, T_FUN_SYS, 2, json_extract_node, params); - store_pl_ref_object_symbol($$, result, REF_FUNC); -} -| column_definition_ref JSON_EXTRACT_UNQUOTED complex_string_literal -{ - ParseNode *json_extract_node = NULL; - make_name_node(json_extract_node, result->malloc_pool_, "JSON_EXTRACT"); - ParseNode *link_params = NULL; - malloc_non_terminal_node(link_params, result->malloc_pool_, T_LINK_NODE, 2, $1, $3); - ParseNode *params = NULL; - merge_nodes(params, result, T_EXPR_LIST, link_params); - malloc_non_terminal_node(json_extract_node, result->malloc_pool_, T_FUN_SYS, 2, json_extract_node, params); - ParseNode *json_unquoted_node = NULL; - make_name_node(json_unquoted_node, result->malloc_pool_, "JSON_UNQUOTE"); - merge_nodes(params, result, T_EXPR_LIST, json_extract_node); - malloc_non_terminal_node($$, result->malloc_pool_, T_FUN_SYS, 2, json_unquoted_node, params); - store_pl_ref_object_symbol($$, result, REF_FUNC); -} + | relation_name '.' relation_name USER_VARIABLE { ParseNode *dblink_node = $4; @@ -1755,6 +1738,29 @@ simple_expr collation %prec NEG malloc_non_terminal_node($$, result->malloc_pool_, T_REMOTE_SEQUENCE, 4, db_node, tb_node, col_node, dblink_node); } ; + +opt_mode_flag: +IN NATURAL LANGUAGE MODE +{ + $$[0] = 0; +} +| IN NATURAL LANGUAGE MODE WITH QUERY EXPANSION +{ + $$[0] = 1; +} +| IN BOOLEAN MODE +{ + $$[0] = 2; +} +| WITH QUERY EXPANSION +{ + $$[0] = 3; +} +| /*empty*/ +{ + $$[0] = 0; +}; + expr: expr AND expr %prec AND { @@ -2554,13 +2560,104 @@ MOD '(' expr ',' expr ')' { $$ = $1; } -| CAST '(' expr AS cast_data_type ')' +| CAST '(' expr AS cast_data_type opt_array ')' { - //cast_data_type is a T_CAST_ARGUMENT rather than a T_INT to avoid being parameterized automatically - ParseNode *params = NULL; - malloc_non_terminal_node(params, result->malloc_pool_, T_EXPR_LIST, 2, $3, $5); - make_name_node($$, result->malloc_pool_, "cast"); - malloc_non_terminal_node($$, result->malloc_pool_, T_FUN_SYS, 2, $$, params); + // opt_array add for multivalue index, CAST(... AS UNSIGNED ARRAY) syntax support + + if (OB_ISNULL($6) || $6->value_ == 0) { + //cast_data_type is a T_CAST_ARGUMENT rather than a T_INT to avoid being parameterized automatically + + ParseNode *params = NULL; + malloc_non_terminal_node(params, result->malloc_pool_, T_EXPR_LIST, 2, $3, $5); + make_name_node($$, result->malloc_pool_, "cast"); + malloc_non_terminal_node($$, result->malloc_pool_, T_FUN_SYS, 2, $$, params); + } else { + // for multivalue index, CAST(... AS UNSIGNED ARRAY) + ParseNode *truncate = NULL; + malloc_terminal_node(truncate, result->malloc_pool_, T_INT); + truncate->value_ = 0; + truncate->is_hidden_const_ = 1; + + ParseNode *scalar = NULL; + malloc_terminal_node(scalar, result->malloc_pool_, T_INT); + scalar->value_ = 2; + scalar->is_hidden_const_ = 1; + + ParseNode *pretty = NULL; + malloc_terminal_node(pretty, result->malloc_pool_, T_INT); + pretty->value_ = 0; + pretty->is_hidden_const_ = 1; + + + ParseNode *ascii = NULL; + malloc_terminal_node(ascii, result->malloc_pool_, T_INT); + ascii->value_ = 0; + ascii->is_hidden_const_ = 1; + + ParseNode *wrapper = NULL; + malloc_terminal_node(wrapper, result->malloc_pool_, T_INT); + wrapper->value_ = 1; + wrapper->is_hidden_const_ = 1; + + ParseNode *asis = NULL; + malloc_terminal_node(asis, result->malloc_pool_, T_INT); + asis->value_ = 1; + asis->is_hidden_const_ = 1; + + ParseNode *empty = NULL; + malloc_terminal_node(empty, result->malloc_pool_, T_INT); + empty->value_ = 1; + empty->is_hidden_const_ = 1; + + ParseNode *error = NULL; + malloc_terminal_node(error, result->malloc_pool_, T_INT); + error->value_ = 0; + error->is_hidden_const_ = 1; + + ParseNode *mismatch = NULL; + malloc_terminal_node(mismatch, result->malloc_pool_, T_INT); + mismatch->value_ = 1; + mismatch->is_hidden_const_ = 1; + + ParseNode *multivalue = NULL; + malloc_terminal_node(multivalue, result->malloc_pool_, T_INT); + multivalue->value_ = 0; + multivalue->is_hidden_const_ = 1; + + ParseNode *path = NULL; + ParseNode *data = NULL; + + if (OB_NOT_NULL($3) && $3->num_child_ == 2 && $3->type_ == T_FUN_SYS) { + ParseNode* expr_param = $3->children_[1]; + ParseNode* expr_name = $3->children_[0]; + if ((OB_NOT_NULL(expr_name->str_value_) && strcasecmp(expr_name->str_value_, "JSON_EXTRACT") == 0) + && expr_param->num_child_ == 2) { + path = expr_param->children_[1]; + data = expr_param->children_[0]; + } else if ((OB_NOT_NULL(expr_name->str_value_) && strcasecmp(expr_name->str_value_, "JSON_UNQUOTE") == 0) + && expr_param->num_child_ == 1 + && OB_NOT_NULL(expr_param->children_[0]) + && expr_param->children_[0]->num_child_ == 2) { + expr_name = expr_param->children_[0]->children_[0]; + expr_param = expr_param->children_[0]->children_[1]; + if ((OB_NOT_NULL(expr_name->str_value_) && strcasecmp(expr_name->str_value_, "JSON_EXTRACT") == 0) + && expr_param->num_child_ == 2) { + path = expr_param->children_[1]; + data = expr_param->children_[0]; + } + } + } + + if (OB_ISNULL(path) || OB_ISNULL(data)) { + yyerror(NULL, result, "Incorrect arguments to CAST (... AS ... ARRAY)\n"); + YYABORT_PARSE_SQL_ERROR; + } else { + malloc_non_terminal_node($$, result->malloc_pool_, T_FUN_SYS_JSON_QUERY, 13, + data, path, $5, truncate, scalar, + pretty, ascii, wrapper, asis, error, + empty, mismatch, multivalue); + } + } } | INSERT '(' expr ',' expr ',' expr ',' expr ')' { @@ -3002,10 +3099,22 @@ MOD '(' expr ',' expr ')' make_name_node($$, result->malloc_pool_, "weight_string"); malloc_non_terminal_node($$, result->malloc_pool_, T_FUN_SYS, 2, $$, params); } +| json_extract_unquote_expr +{ + $$ = $1; +} +| json_extract_expr +{ + $$ = $1; +} | json_value_expr { $$ = $1; } +| json_query_expr +{ + $$ = $1; +} | POINT '(' expr ',' expr ')' { malloc_non_terminal_node($$, result->malloc_pool_, T_FUN_SYS_POINT, 2, $3, $5); @@ -5149,6 +5258,16 @@ column_definition malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX, 6, $5 ? $5 : $2, col_list, index_option, $6, NULL, $11); $$->value_ = 1; } +| FULLTEXT opt_key_or_index opt_index_name opt_index_using_algorithm '(' sort_column_list ')' opt_index_option_list +{ + (void)($2); + ParseNode *col_list = NULL; + ParseNode *index_option = NULL; + merge_nodes(col_list, result, T_INDEX_COLUMN_LIST, $6); + merge_nodes(index_option, result, T_TABLE_OPTION_LIST, $8); + malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX, 5, $3, col_list, index_option, $4, NULL); + $$->value_ = 3; +} | CONSTRAINT opt_constraint_name FOREIGN KEY opt_index_name '(' column_name_list ')' REFERENCES relation_factor '(' column_name_list ')' opt_match_option opt_reference_option_list { ParseNode *child_col_list= NULL; @@ -8459,7 +8578,8 @@ ALTER {$$ = NULL;} ; opt_index_keyname: -SPATIAL { $$[0] = 2; } +FULLTEXT { $$[0] = 3; } +| SPATIAL { $$[0] = 2; } | UNIQUE { $$[0] = 1; } | /*EMPTY*/ { $$[0] = 0; } ; @@ -8651,7 +8771,7 @@ GLOBAL { malloc_terminal_node($$, result->malloc_pool_, T_WITH_ROWID); } -| WITH PARSER STRING_VALUE +| WITH PARSER relation_name { malloc_non_terminal_node($$, result->malloc_pool_, T_PARSER_NAME, 1, $3); } @@ -16408,6 +16528,16 @@ ADD add_key_or_index_opt { $$ = $2; } +| ADD FULLTEXT opt_key_or_index opt_index_name opt_index_using_algorithm '(' sort_column_list ')' opt_index_option_list +{ + (void)($3); + ParseNode *col_list = NULL; + ParseNode *index_option = NULL; + merge_nodes(col_list, result, T_INDEX_COLUMN_LIST, $7); + merge_nodes(index_option, result, T_TABLE_OPTION_LIST, $9); + malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX_ADD, 6, $4, col_list, index_option, $5, NULL, NULL); + $$->value_ = 3; +} | ADD add_constraint_pri_key_opt { $$ = $2; @@ -20266,6 +20396,378 @@ opt_on_mismatch: } ; +/*=========================================================== + * + * json query + * + *===========================================================*/ +json_query_expr: +JSON_QUERY '(' simple_expr ',' complex_string_literal opt_returning_type opt_truncate_clause opt_scalars opt_pretty opt_ascii opt_wrapper opt_asis opt_query_on_error_or_empty_or_mismatch opt_multivalue ')' +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_FUN_SYS_JSON_QUERY, 13, $3, $5, $6, $7, $8, $9, $10, $11, $12, $13->children_[0], $13->children_[1], $13->children_[2], $14); +} +; + +opt_array: +ARRAY +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 1; + $$->is_hidden_const_ = 1; +} +| +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 0; + $$->is_hidden_const_ = 1; +} + +json_extract_unquote_expr: +column_definition_ref JSON_EXTRACT_UNQUOTED complex_string_literal +{ + ParseNode *json_extract_node = NULL; + make_name_node(json_extract_node, result->malloc_pool_, "JSON_EXTRACT"); + ParseNode *link_params = NULL; + malloc_non_terminal_node(link_params, result->malloc_pool_, T_LINK_NODE, 2, $1, $3); + ParseNode *params = NULL; + merge_nodes(params, result, T_EXPR_LIST, link_params); + malloc_non_terminal_node(json_extract_node, result->malloc_pool_, T_FUN_SYS, 2, json_extract_node, params); + ParseNode *json_unquoted_node = NULL; + make_name_node(json_unquoted_node, result->malloc_pool_, "JSON_UNQUOTE"); + merge_nodes(params, result, T_EXPR_LIST, json_extract_node); + malloc_non_terminal_node($$, result->malloc_pool_, T_FUN_SYS, 2, json_unquoted_node, params); + store_pl_ref_object_symbol($$, result, REF_FUNC); +} +; + +json_extract_expr: +column_definition_ref JSON_EXTRACT complex_string_literal +{ + ParseNode *json_extract_node = NULL; + make_name_node(json_extract_node, result->malloc_pool_, "JSON_EXTRACT"); + ParseNode *link_params = NULL; + malloc_non_terminal_node(link_params, result->malloc_pool_, T_LINK_NODE, 2, $1, $3); + ParseNode *params = NULL; + merge_nodes(params, result, T_EXPR_LIST, link_params); + malloc_non_terminal_node($$, result->malloc_pool_, T_FUN_SYS, 2, json_extract_node, params); + store_pl_ref_object_symbol($$, result, REF_FUNC); +} +; + +opt_multivalue: +MULTIVALUE +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 1; + $$->is_hidden_const_ = 1; +} +| +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 0; + $$->is_hidden_const_ = 1; +}; + + +opt_asis: +ASIS +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 1; + $$->is_hidden_const_ = 1; +} +| +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 0; + $$->is_hidden_const_ = 1; +}; + +opt_scalars: +ALLOW SCALARS +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 0; + $$->is_hidden_const_ = 1; +} +| DISALLOW SCALARS +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 1; + $$->is_hidden_const_ = 1; +} +| +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 2; + $$->is_hidden_const_ = 1; +} +; + +opt_pretty: +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 0; + $$->is_hidden_const_ = 1; +} +| PRETTY +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 1; + $$->is_hidden_const_ = 1; +} +; + +opt_query_on_error_or_empty_or_mismatch: +/* empty */ +{ + ParseNode *empty_type = NULL; + malloc_terminal_node(empty_type, result->malloc_pool_, T_INT); + empty_type->value_ = 5; + empty_type->is_hidden_const_ = 1; + + ParseNode *error_type = NULL; + malloc_terminal_node(error_type, result->malloc_pool_, T_INT); + error_type->value_ = 5; + error_type->is_hidden_const_ = 1; + + ParseNode *mismatch_type = NULL; + malloc_terminal_node(mismatch_type, result->malloc_pool_, T_INT); + mismatch_type->value_ = 2; + mismatch_type->is_hidden_const_ = 1; + + malloc_non_terminal_node($$, result->malloc_pool_, T_LINK_NODE, 3, error_type, empty_type, mismatch_type); +} +| on_empty_query +{ + ParseNode *error_type = NULL; + malloc_terminal_node(error_type, result->malloc_pool_, T_INT); + error_type->value_ = 5; + error_type->is_hidden_const_ = 1; + + ParseNode *mismatch_type = NULL; + malloc_terminal_node(mismatch_type, result->malloc_pool_, T_INT); + mismatch_type->value_ = 2; + mismatch_type->is_hidden_const_ = 1; + + malloc_non_terminal_node($$, result->malloc_pool_, T_LINK_NODE, 3, error_type, $1, mismatch_type); +} +| on_error_query +{ + ParseNode *empty_type = NULL; + malloc_terminal_node(empty_type, result->malloc_pool_, T_INT); + empty_type->value_ = 5; + empty_type->is_hidden_const_ = 1; + + ParseNode *mismatch_type = NULL; + malloc_terminal_node(mismatch_type, result->malloc_pool_, T_INT); + mismatch_type->value_ = 2; + mismatch_type->is_hidden_const_ = 1; + + malloc_non_terminal_node($$, result->malloc_pool_, T_LINK_NODE, 3, $1, empty_type, mismatch_type); +} +| on_mismatch_query +{ + ParseNode *error_type = NULL; + malloc_terminal_node(error_type, result->malloc_pool_, T_INT); + error_type->value_ = 5; + error_type->is_hidden_const_ = 1; + + ParseNode *empty_type = NULL; + malloc_terminal_node(empty_type, result->malloc_pool_, T_INT); + empty_type->value_ = 5; + empty_type->is_hidden_const_ = 1; + + malloc_non_terminal_node($$, result->malloc_pool_, T_LINK_NODE, 3, error_type, empty_type, $1); +} +| on_error_query on_empty_query +{ + ParseNode *mismatch_type = NULL; + malloc_terminal_node(mismatch_type, result->malloc_pool_, T_INT); + mismatch_type->value_ = 2; + mismatch_type->is_hidden_const_ = 1; + + malloc_non_terminal_node($$, result->malloc_pool_, T_LINK_NODE, 3, $1, $2, mismatch_type); +} +| on_empty_query on_error_query +{ + ParseNode *mismatch_type = NULL; + malloc_terminal_node(mismatch_type, result->malloc_pool_, T_INT); + mismatch_type->value_ = 2; + mismatch_type->is_hidden_const_ = 1; + + malloc_non_terminal_node($$, result->malloc_pool_, T_LINK_NODE, 3, $2, $1, mismatch_type); +} +| on_error_query on_mismatch_query +{ + ParseNode *empty_type = NULL; + malloc_terminal_node(empty_type, result->malloc_pool_, T_INT); + empty_type->value_ = 5; + empty_type->is_hidden_const_ = 1; + + malloc_non_terminal_node($$, result->malloc_pool_, T_LINK_NODE, 3, $1, empty_type, $2); +} +| on_empty_query on_mismatch_query +{ + ParseNode *error_type = NULL; + malloc_terminal_node(error_type, result->malloc_pool_, T_INT); + error_type->value_ = 5; + error_type->is_hidden_const_ = 1; + + malloc_non_terminal_node($$, result->malloc_pool_, T_LINK_NODE, 3, error_type, $1, $2); +} +| on_error_query on_empty_query on_mismatch_query +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_LINK_NODE, 3, $1, $2, $3); +} +| on_empty_query on_error_query on_mismatch_query +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_LINK_NODE, 3, $2, $1, $3); +} +; + +opt_response_query: +ERROR_P +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 0; + $$->is_hidden_const_ = 1; +} +| NULLX +{ + (void)($1) ; /* make bison mute */ + malloc_terminal_node($$, result->malloc_pool_, T_NULLX_CLAUSE); + $$->value_ = 1; + $$->param_num_ = 1; + $$->sql_str_off_ = $1->sql_str_off_; +} +; + +/* mismatch error : 0 NULL : 1 EMPTY : 2 */ +on_mismatch_query: +opt_response_query ON MISMATCH +{ + $$ =$1; +} +| DOT ON MISMATCH +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 3; + $$->is_hidden_const_ = 1; +} +; + +/* error error : 0 NULL : 1 empty : 2 empty ARRAY : 3 empty object : 4 (EMPTY) : 5*/ +on_error_query: +opt_response_query ON ERROR_P +{ + $$ = $1; +} +| EMPTY ON ERROR_P +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 2; + $$->is_hidden_const_ = 1; +} +| EMPTY ARRAY ON ERROR_P +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 3; + $$->is_hidden_const_ = 1; +} +| EMPTY OBJECT ON ERROR_P +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 4; + $$->is_hidden_const_ = 1; +} +; + +/* empty error : 0 NULL : 1 empty : 2 empty ARRAY : 3 empty object : 4 (EMPTY) : 5*/ + +on_empty_query: +opt_response_query ON EMPTY +{ + $$ = $1; +} +| EMPTY ON EMPTY +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 2; + $$->is_hidden_const_ = 1; +} +| EMPTY ARRAY ON EMPTY +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 3; + $$->is_hidden_const_ = 1; +} +| EMPTY OBJECT ON EMPTY +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 4; + $$->is_hidden_const_ = 1; +} +; + + +opt_wrapper: +WITHOUT WRAPPER +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 0; + $$->is_hidden_const_ = 1; +} +| WITHOUT ARRAY WRAPPER +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 1; + $$->is_hidden_const_ = 1; +} +| WITH WRAPPER +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 2; + $$->is_hidden_const_ = 1; +} +| WITH ARRAY WRAPPER +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 3; + $$->is_hidden_const_ = 1; +} +| WITH UNCONDITIONAL WRAPPER +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 4; + $$->is_hidden_const_ = 1; +} +| WITH CONDITIONAL WRAPPER +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 5; + $$->is_hidden_const_ = 1; +} +| WITH UNCONDITIONAL ARRAY WRAPPER +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 6; + $$->is_hidden_const_ = 1; +} +| WITH CONDITIONAL ARRAY WRAPPER +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 7; + $$->is_hidden_const_ = 1; +} +| +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->value_ = 8; + $$->is_hidden_const_ = 1; +} +; + /*=========================================================== * * json value @@ -20539,6 +21041,7 @@ ACCOUNT | AGAINST | AGGREGATE | ALGORITHM +| ALLOW | ALL_META | ALL_USER | ALWAYS @@ -20549,6 +21052,8 @@ ACCOUNT | APPROX_COUNT_DISTINCT_SYNOPSIS_MERGE | ARCHIVELOG | ARBITRATION +| ARRAY +| ASIS | ASCII | ASYNCHRONOUS | AT @@ -20635,6 +21140,7 @@ ACCOUNT | COMPUTE | CONCURRENT | CONDENSED +| CONDITIONAL | CONNECTION %prec KILL_EXPR | CONSISTENT | CONSISTENT_MODE @@ -20682,11 +21188,13 @@ ACCOUNT | DIAGNOSTICS | DIRECTORY | DISABLE +| DISALLOW | DISCARD | DISK | DISKGROUP | DISCONNECT | DO +| DOT | DUMP | DUMPFILE | DUPLICATE @@ -20806,6 +21314,7 @@ ACCOUNT | JSON_VALUE | JSON_ARRAYAGG | JSON_OBJECTAGG +| JSON_QUERY | JSON_TABLE | KEY_BLOCK_SIZE | KEY_VERSION @@ -20893,6 +21402,7 @@ ACCOUNT | MIN_ROWS | MINUTE | MINUS +| MISMATCH | MODE | MODIFY | MONTH @@ -20900,6 +21410,7 @@ ACCOUNT | MULTILINESTRING | MULTIPOINT | MULTIPOLYGON +| MULTIVALUE | MUTEX | MYSQL_ERRNO | MAX_USED_PART_ID @@ -20933,6 +21444,7 @@ ACCOUNT | NULL_IF_EXETERNAL | NULLS | NVARCHAR +| OBJECT | OCCUR | OF | OFF @@ -21067,6 +21579,7 @@ ACCOUNT | RUN | SAMPLE | SAVEPOINT +| SCALARS | SCHEDULE | SCHEMA_NAME | SCN @@ -21209,6 +21722,7 @@ ACCOUNT | TABLEGROUP_ID | TOP_K_FRE_HIST | UNCOMMITTED +| UNCONDITIONAL | UNDEFINED | UNDO_BUFFER_SIZE | UNDOFILE @@ -21249,6 +21763,7 @@ ACCOUNT | WEIGHT_STRING | WHENEVER | WINDOW +| WITHOUT | WORK | WRAPPER | X509 @@ -21274,7 +21789,6 @@ ACCOUNT | AVAILABILITY | PERFORMANCE | PROTECTION -| OBJECT | OBSOLETE | HIDDEN | INDEXED @@ -21297,7 +21811,6 @@ ACCOUNT | TRANSFER | SUM_OPNSIZE | VALIDATION -| WITHOUT ; unreserved_keyword_special: diff --git a/src/sql/plan_cache/ob_sql_parameterization.cpp b/src/sql/plan_cache/ob_sql_parameterization.cpp index c813e00309..f1c9272738 100644 --- a/src/sql/plan_cache/ob_sql_parameterization.cpp +++ b/src/sql/plan_cache/ob_sql_parameterization.cpp @@ -1903,13 +1903,13 @@ int ObSqlParameterization::mark_tree(ParseNode *tree ,SqlInfo &sql_info) } } } else if(T_FUN_SYS_JSON_QUERY == tree->type_) { - if (11 != tree->num_child_) { + if (13 != tree->num_child_) { ret = OB_INVALID_ARGUMENT; SQL_PC_LOG(WARN, "invalid json query expr argument", K(ret), K(tree->num_child_)); } else { - const int64_t ARGS_NUMBER_ELEVEN = 11; - bool mark_arr[ARGS_NUMBER_ELEVEN] = {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; // json doc type will affect returning type, - if (OB_FAIL(mark_args(tree, mark_arr, ARGS_NUMBER_ELEVEN, sql_info))) { + const int64_t ARGS_NUMBER_THIRTEEN = 13; + bool mark_arr[ARGS_NUMBER_THIRTEEN] = {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; // json doc type will affect returning type, + if (OB_FAIL(mark_args(tree, mark_arr, ARGS_NUMBER_THIRTEEN, sql_info))) { SQL_PC_LOG(WARN, "fail to mark substr arg", K(ret)); } } diff --git a/src/sql/printer/ob_raw_expr_printer.cpp b/src/sql/printer/ob_raw_expr_printer.cpp index 0882ae6a04..0a6ab4102f 100644 --- a/src/sql/printer/ob_raw_expr_printer.cpp +++ b/src/sql/printer/ob_raw_expr_printer.cpp @@ -196,6 +196,11 @@ int ObRawExprPrinter::print(ObRawExpr *expr) PRINT_EXPR(set_op_expr); break; } + case ObRawExpr::EXPR_MATCH_AGAINST: { + ObMatchFunRawExpr *match_against_expr = static_cast(expr); + PRINT_EXPR(match_against_expr); + break; + } default: { ret = OB_ERR_UNEXPECTED; LOG_WARN("unknown expr class", K(ret), K(expr->get_expr_class())); @@ -1884,11 +1889,13 @@ int ObRawExprPrinter::print_json_value(ObSysFunRawExpr *expr) DATA_PRINTF(" error"); break; case JsnValueType::JSN_VALUE_NULL: - case JsnValueType::JSN_VALUE_IMPLICIT: if (lib::is_mysql_mode() || type == 1) { DATA_PRINTF(" null"); } break; + + case JsnValueType::JSN_VALUE_IMPLICIT: + break; case JsnValueType::JSN_VALUE_DEFAULT: DATA_PRINTF(" default "); PRINT_EXPR(expr->get_param_expr(JSN_VAL_EMPTY_DEF)); @@ -1915,9 +1922,10 @@ int ObRawExprPrinter::print_json_value(ObSysFunRawExpr *expr) DATA_PRINTF(" error"); break; case JsnValueType::JSN_VALUE_NULL: - case JsnValueType::JSN_VALUE_IMPLICIT: DATA_PRINTF(" null"); break; + case JsnValueType::JSN_VALUE_IMPLICIT: + break; case JsnValueType::JSN_VALUE_DEFAULT: DATA_PRINTF(" default "); PRINT_EXPR(expr->get_param_expr(JSN_VAL_ERROR_DEF)); @@ -1928,7 +1936,10 @@ int ObRawExprPrinter::print_json_value(ObSysFunRawExpr *expr) break; } if (OB_SUCC(ret)) { - DATA_PRINTF(" on error"); + if (type == JsnValueType::JSN_VALUE_IMPLICIT) { + } else { + DATA_PRINTF(" on error"); + } } } } @@ -2161,6 +2172,25 @@ int ObRawExprPrinter::print_json_query(ObSysFunRawExpr *expr) LOG_WARN("type value isn't int value"); } else { int64_t type = static_cast(expr->get_param_expr(8))->get_value().get_int(); + switch (type) { + case 0: + break; + case 1: + DATA_PRINTF(" asis"); + break; + default: + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid type value.", K(type)); + break; + } + } + } + if (OB_SUCC(ret)) { + if (!static_cast(expr->get_param_expr(9))->get_value().is_int()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("type value isn't int value"); + } else { + int64_t type = static_cast(expr->get_param_expr(9))->get_value().get_int(); switch (type) { case 0: DATA_PRINTF(" error on error"); @@ -2187,11 +2217,11 @@ int ObRawExprPrinter::print_json_query(ObSysFunRawExpr *expr) } } if (OB_SUCC(ret)) { - if (!static_cast(expr->get_param_expr(9))->get_value().is_int()) { + if (!static_cast(expr->get_param_expr(10))->get_value().is_int()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("type value isn't int value"); } else { - int64_t type = static_cast(expr->get_param_expr(9))->get_value().get_int(); + int64_t type = static_cast(expr->get_param_expr(10))->get_value().get_int(); switch (type) { case 0: DATA_PRINTF(" error on empty"); @@ -2219,11 +2249,11 @@ int ObRawExprPrinter::print_json_query(ObSysFunRawExpr *expr) } if (OB_SUCC(ret)) { - if (!static_cast(expr->get_param_expr(10))->get_value().is_int()) { + if (!static_cast(expr->get_param_expr(11))->get_value().is_int()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("type value isn't int value"); } else { - int64_t type = static_cast(expr->get_param_expr(10))->get_value().get_int(); + int64_t type = static_cast(expr->get_param_expr(11))->get_value().get_int(); switch (type) { case 0: DATA_PRINTF(" error on mismatch"); @@ -2242,6 +2272,27 @@ int ObRawExprPrinter::print_json_query(ObSysFunRawExpr *expr) } } } + + if (OB_SUCC(ret)) { + if (!static_cast(expr->get_param_expr(12))->get_value().is_int()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("type value isn't int value"); + } else { + int64_t type = static_cast(expr->get_param_expr(12))->get_value().get_int(); + switch (type) { + case 0: + break; + case 1: + DATA_PRINTF(" multivalue"); + break; + default: + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid type value.", K(type)); + break; + } + } + } + if (OB_SUCC(ret)) { DATA_PRINTF(")"); } @@ -2481,13 +2532,13 @@ int ObRawExprPrinter::print_json_expr(ObSysFunRawExpr *expr) case T_FUN_SYS_JSON_QUERY: { // json query (json doc, json path, (returning cast_type) opt_scalars opt_pretty opt_ascii opt_wrapper on_error on_empty on_mismatch). int64_t type = 0; - if (OB_UNLIKELY(expr->get_param_count() != 11)) { + if (OB_UNLIKELY(expr->get_param_count() != 13)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected param count of expr to type", K(ret), KPC(expr), K(expr->get_param_count())); - } else if (!static_cast(expr->get_param_expr(10))->get_value().is_int()) { + } else if (!static_cast(expr->get_param_expr(11))->get_value().is_int()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("type value isn't int value"); - } else if (static_cast(expr->get_param_expr(10))->get_value().get_int() == 3) { + } else if (static_cast(expr->get_param_expr(11))->get_value().get_int() == 3) { if (OB_FAIL(print_dot_notation(expr))) { LOG_WARN("fail to print dot notation", K(ret)); } @@ -3165,7 +3216,9 @@ int ObRawExprPrinter::print(ObSysFunRawExpr *expr) if (lib::is_mysql_mode() && (expr_type == T_FUN_SYS_JSON_ARRAY || expr_type == T_FUN_SYS_JSON_MERGE_PATCH)) { DATA_PRINTF("%.*s", LEN_AND_PTR(func_name)); OZ(inner_print_fun_params(*expr)); - } else if(lib::is_oracle_mode() || T_FUN_SYS_JSON_VALUE == expr_type) { + } else if (lib::is_oracle_mode() + || T_FUN_SYS_JSON_QUERY == expr_type + || T_FUN_SYS_JSON_VALUE == expr_type) { if (OB_FAIL(print_json_expr(expr))) { LOG_WARN("fail to print json expr", K(ret), K(*expr)); } @@ -3964,6 +4017,64 @@ int ObRawExprPrinter::print(ObPseudoColumnRawExpr *expr) return ret; } +int ObRawExprPrinter::print(ObMatchFunRawExpr *expr) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(buf_) || OB_ISNULL(pos_) || OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret), K(buf_), K(pos_), K(expr)); + } else if (is_mysql_mode()) { + DATA_PRINTF("MATCH("); + int64_t i = 0; + for (; OB_SUCC(ret) && i < expr->get_match_columns().count() - 1; ++i) { + if (OB_ISNULL(expr->get_match_columns().at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else { + PRINT_EXPR(expr->get_match_columns().at(i)); + DATA_PRINTF(","); + } + } + if (OB_SUCC(ret)) { + if (OB_ISNULL(expr->get_match_columns().at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (OB_ISNULL(expr->get_search_key())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else { + PRINT_EXPR(expr->get_match_columns().at(i)); + DATA_PRINTF(") AGAINST("); + PRINT_EXPR(expr->get_search_key()); + switch (expr->get_mode_flag()) { + case NATURAL_LANGUAGE_MODE: { + DATA_PRINTF(")"); + break; + } + case NATURAL_LANGUAGE_MODE_WITH_QUERY_EXPANSION: { + DATA_PRINTF(" IN NATURAL LANGUAGE MODE WITH QUERY EXPANSION)"); + break; + } + case BOOLEAN_MODE: { + DATA_PRINTF(" IN BOOLEAN MODE)"); + break; + } + case WITH_QUERY_EXPANSION: { + DATA_PRINTF(" WITH QUERY EXPANSION)"); + break; + } + default: { + DATA_PRINTF(")"); + } + } + } + } + } else if (is_oracle_mode()) { + // jinmao TODO: print oracle contains() + } else {} + return ret; +} + int ObRawExprPrinter::print_partition_exprs(ObWinFunRawExpr *expr) { int ret = OB_SUCCESS; diff --git a/src/sql/printer/ob_raw_expr_printer.h b/src/sql/printer/ob_raw_expr_printer.h index 276dc1fb51..68caa06ac9 100644 --- a/src/sql/printer/ob_raw_expr_printer.h +++ b/src/sql/printer/ob_raw_expr_printer.h @@ -131,6 +131,7 @@ private: int print(ObUDFRawExpr *expr); int print(ObWinFunRawExpr *expr); int print(ObPseudoColumnRawExpr *expr); + int print(ObMatchFunRawExpr *expr); int print_date_unit(ObRawExpr *expr); int print_get_format_unit(ObRawExpr *expr); diff --git a/src/sql/resolver/cmd/ob_variable_set_resolver.cpp b/src/sql/resolver/cmd/ob_variable_set_resolver.cpp index e3c74dc2f2..05a1226d78 100644 --- a/src/sql/resolver/cmd/ob_variable_set_resolver.cpp +++ b/src/sql/resolver/cmd/ob_variable_set_resolver.cpp @@ -252,6 +252,7 @@ int ObVariableSetResolver::resolve_value_expr(ParseNode &val_node, ObRawExpr *&v ObArray sys_vars; ObArray op_exprs; ObSEArray user_var_exprs; + ObSEArray match_exprs; ObCollationType collation_connection = CS_TYPE_INVALID; ObCharsetType character_set_connection = CHARSET_INVALID; if (OB_ISNULL(params_.expr_factory_) || OB_ISNULL(params_.session_info_)) { @@ -277,11 +278,14 @@ int ObVariableSetResolver::resolve_value_expr(ParseNode &val_node, ObRawExpr *&v LOG_WARN("fail to get name case mode", K(ret)); } else if (OB_FAIL(expr_resolver.resolve(&val_node, value_expr, columns, sys_vars, sub_query_info, aggr_exprs, win_exprs, - udf_info, op_exprs, user_var_exprs))) { + udf_info, op_exprs, user_var_exprs, match_exprs))) { LOG_WARN("resolve expr failed", K(ret)); } else if (udf_info.count() > 0) { ret = OB_ERR_UNEXPECTED; LOG_ERROR("UDFInfo should not found be here!!!", K(ret)); + } else if (OB_UNLIKELY(match_exprs.count() > 0)) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "fulltext search func"); } else if (value_expr->get_expr_type() == T_SP_CPARAM) { ObCallParamRawExpr *call_expr = static_cast(value_expr); if (OB_ISNULL(call_expr->get_expr())) { diff --git a/src/sql/resolver/ddl/ob_alter_table_resolver.cpp b/src/sql/resolver/ddl/ob_alter_table_resolver.cpp index 7945ccadf3..ca06f6650e 100644 --- a/src/sql/resolver/ddl/ob_alter_table_resolver.cpp +++ b/src/sql/resolver/ddl/ob_alter_table_resolver.cpp @@ -24,6 +24,7 @@ #include "sql/resolver/dml/ob_delete_resolver.h" #include "sql/rewrite/ob_transform_utils.h" #include "share/ob_index_builder_util.h" +#include "share/ob_fts_index_builder_util.h" #include "sql/engine/expr/ob_expr_sql_udt_utils.h" #include "sql/engine/expr/ob_expr_lob_utils.h" #include "lib/xml/ob_xml_parser.h" @@ -843,6 +844,8 @@ int ObAlterTableResolver::resolve_action_list(const ParseNode &node) // bug: // index_tid_array: 包含index和mv, 这里只需要处理索引即可 // so do-nothing for mv + } else if (index_table_schema->is_built_in_fts_index()) { + // skip built-in fts index } else if (OB_FAIL(index_table_schema->get_index_name(index_name))) { LOG_WARN("failed to get index name", K(ret)); } else { @@ -1502,6 +1505,12 @@ int ObAlterTableResolver::resolve_index_column_list(const ParseNode &node, } sort_item.column_name_.assign_ptr(sort_column_node->children_[0]->str_value_, static_cast(sort_column_node->children_[0]->str_len_)); + bool is_multi_value_index = false; + if (OB_FAIL(ObMulValueIndexBuilderUtil::adjust_index_type(sort_item.column_name_, + is_multi_value_index, + reinterpret_cast(&index_keyname_)))) { + LOG_WARN("failed to resolve index type", K(ret)); + } } if (OB_FAIL(ret)) { //do nothing @@ -1517,10 +1526,20 @@ int ObAlterTableResolver::resolve_index_column_list(const ParseNode &node, sort_item.prefix_len_ = 0; } - // spatial index constraint if (OB_FAIL(ret)) { // do nothing - } else { + } else if (index_keyname_ == FTS_KEY) { + if (!GCONF._enable_add_fulltext_index_to_existing_table) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("experimental feature: build fulltext index afterward is experimental feature", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "build fulltext index afterward"); + } else if (OB_FAIL(resolve_fts_index_constraint(*table_schema_, + sort_item.column_name_, + index_name_value))) { + SQL_RESV_LOG(WARN, "check fts index constraint fail",K(ret), + K(sort_item.column_name_)); + } + } else { // spatial index, NOTE resolve_spatial_index_constraint() will set index_keyname ObSEArray resolved_cols; ObAlterTableStmt *alter_table_stmt = get_alter_table_stmt(); bool is_explicit_order = (NULL != sort_column_node->children_[2] @@ -1634,6 +1653,7 @@ int ObAlterTableResolver::resolve_add_index(const ParseNode &node) SQL_RESV_LOG(WARN, "invalid parse tree!", K(ret)); } else { bool is_unique_key = 1 == node.value_; + bool is_fulltext_index = 3 == node.value_; ParseNode *index_name_node = nullptr; ParseNode *column_list_node = nullptr; ParseNode *table_option_node = nullptr; @@ -1659,7 +1679,9 @@ int ObAlterTableResolver::resolve_add_index(const ParseNode &node) index_name_node = node.children_[0]; column_list_node = node.children_[1]; table_option_node = node.children_[2]; - index_partition_option = node.children_[4]; + if (!is_fulltext_index) { + index_partition_option = node.children_[4]; + } colulmn_group_node = node.children_[5]; } } @@ -1905,10 +1927,30 @@ int ObAlterTableResolver::resolve_add_index(const ParseNode &node) if (OB_SUCC(ret)) { if (OB_FAIL(create_index_arg->assign(index_arg))) { LOG_WARN("fail to assign create index arg", K(ret)); - } else if (OB_FAIL(resolve_results.push_back(resolve_result))) { - LOG_WARN("fail to push back index_stmt_list", K(ret), K(resolve_result)); - } else if (OB_FAIL(index_arg_list.push_back(create_index_arg))) { - LOG_WARN("fail to push back index_arg", K(ret)); + } else if (share::schema::is_fts_index(index_arg.index_type_)) { + if (OB_FAIL(ObDDLResolver::append_domain_index_args(*table_schema_, + resolve_result, + create_index_arg, + have_generate_fts_arg_, + resolve_results, + index_arg_list, + allocator_))) { + LOG_WARN("failed to append domain index args", K(ret), K(index_arg)); + } else { + // record allocator to free fts arg in desctructor + alter_table_stmt->set_fts_arg_allocator(allocator_); + } + } else if (is_multivalue_index(index_arg.index_type_)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("dynamic add multivalue index not supported yet", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "dynamic add multivalue index not supported yet"); + } else { + if (OB_FAIL(resolve_results.push_back(resolve_result))) { + LOG_WARN("fail to push back index_stmt_list", K(ret), + K(resolve_result)); + } else if (OB_FAIL(index_arg_list.push_back(create_index_arg))) { + LOG_WARN("fail to push back index_arg", K(ret)); + } } } } @@ -2385,20 +2427,47 @@ int ObAlterTableResolver::generate_index_arg(obrpc::ObCreateIndexArg &index_arg, } else { type = INDEX_TYPE_UNIQUE_LOCAL; } + + if (index_keyname_ == MULTI_KEY) { + type = INDEX_TYPE_UNIQUE_MULTIVALUE_LOCAL; + if (global_) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("global index, multivalue index not supported", K(ret), K(tenant_data_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "global multivalue index"); + } + } } else { if (tenant_data_version < DATA_VERSION_4_1_0_0 && index_keyname_ == SPATIAL_KEY) { ret = OB_NOT_SUPPORTED; LOG_WARN("tenant data version is less than 4.1, spatial index is not supported", K(ret), K(tenant_data_version)); LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.1, spatial index"); + } else if (tenant_data_version < DATA_VERSION_4_3_1_0 && index_keyname_ == FTS_KEY) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("tenant data version is less than 4.3.1, fulltext index not supported", K(ret), K(tenant_data_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.3.1, fulltext index"); + } else if (tenant_data_version < DATA_VERSION_4_3_1_0 && index_keyname_ == MULTI_KEY) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("tenant data version is less than 4.3.1, multivalue index not supported", K(ret), K(tenant_data_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.3.1, multivalue index"); } else if (global_) { if (index_keyname_ == SPATIAL_KEY) { type = INDEX_TYPE_SPATIAL_GLOBAL; + } else if (index_keyname_ == FTS_KEY) { + type = INDEX_TYPE_DOC_ID_ROWKEY_GLOBAL; + } else if (index_keyname_ == MULTI_KEY) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("global multivalue index not supported", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.3.1, multivalue index"); } else { type = INDEX_TYPE_NORMAL_GLOBAL; } } else { if (index_keyname_ == SPATIAL_KEY) { type = INDEX_TYPE_SPATIAL_LOCAL; + } else if (index_keyname_ == FTS_KEY) { + type = INDEX_TYPE_DOC_ID_ROWKEY_LOCAL; + } else if (index_keyname_ == MULTI_KEY) { + type = INDEX_TYPE_NORMAL_MULTIVALUE_LOCAL; } else { type = INDEX_TYPE_NORMAL_LOCAL; } diff --git a/src/sql/resolver/ddl/ob_alter_table_stmt.cpp b/src/sql/resolver/ddl/ob_alter_table_stmt.cpp index 7412071202..ae5e58bf1c 100644 --- a/src/sql/resolver/ddl/ob_alter_table_stmt.cpp +++ b/src/sql/resolver/ddl/ob_alter_table_stmt.cpp @@ -20,7 +20,7 @@ namespace sql ObAlterTableStmt::ObAlterTableStmt(common::ObIAllocator *name_pool) : ObTableStmt(name_pool, stmt::T_ALTER_TABLE), is_comment_table_(false), - is_alter_system_(false), is_alter_triggers_(false), + is_alter_system_(false), fts_arg_allocator_(nullptr), is_alter_triggers_(false), interval_expr_(NULL), transition_expr_(NULL), alter_table_action_count_(0), alter_external_table_type_(0) { @@ -28,13 +28,23 @@ ObAlterTableStmt::ObAlterTableStmt(common::ObIAllocator *name_pool) ObAlterTableStmt::ObAlterTableStmt() : ObTableStmt(stmt::T_ALTER_TABLE), is_comment_table_(false), is_alter_system_(false), - is_alter_triggers_(false), interval_expr_(NULL), transition_expr_(NULL), alter_table_action_count_(0), + fts_arg_allocator_(nullptr), is_alter_triggers_(false), interval_expr_(NULL), transition_expr_(NULL), alter_table_action_count_(0), alter_external_table_type_(0) { } ObAlterTableStmt::~ObAlterTableStmt() { + for (int64_t i = 0; i < index_arg_list_.count(); ++i) { + obrpc::ObCreateIndexArg *index_arg = index_arg_list_.at(i); + if (is_fts_index(index_arg->index_type_) + || is_multivalue_index(index_arg->index_type_)) { + index_arg->~ObCreateIndexArg(); + fts_arg_allocator_->free(index_arg); + } + } + index_arg_list_.reuse(); + fts_arg_allocator_ = nullptr; } int ObAlterTableStmt::add_column(const share::schema::AlterColumnSchema &column_schema) diff --git a/src/sql/resolver/ddl/ob_alter_table_stmt.h b/src/sql/resolver/ddl/ob_alter_table_stmt.h index 5926d19ee9..8ff1f3c9c2 100644 --- a/src/sql/resolver/ddl/ob_alter_table_stmt.h +++ b/src/sql/resolver/ddl/ob_alter_table_stmt.h @@ -98,6 +98,7 @@ public: { return alter_table_arg_.has_rename_action(); } virtual obrpc::ObDDLArg &get_ddl_arg() { return alter_table_arg_; } common::ObSArray &get_index_arg_list() { return index_arg_list_; } + void set_fts_arg_allocator(common::ObIAllocator *fts_arg_allocator) { fts_arg_allocator_ = fts_arg_allocator; } common::ObSArray &get_foreign_key_arg_list() { return alter_table_arg_.foreign_key_arg_list_; } const common::ObSArray &get_read_only_foreign_key_arg_list() const @@ -118,6 +119,7 @@ private: bool is_comment_table_; bool is_alter_system_; common::ObSArray index_arg_list_; + common::ObIAllocator *fts_arg_allocator_; bool is_alter_triggers_; obrpc::ObAlterTriggerArg tg_arg_; ObRawExpr *interval_expr_; diff --git a/src/sql/resolver/ddl/ob_create_index_resolver.cpp b/src/sql/resolver/ddl/ob_create_index_resolver.cpp index 6fa502198b..a1277d7757 100644 --- a/src/sql/resolver/ddl/ob_create_index_resolver.cpp +++ b/src/sql/resolver/ddl/ob_create_index_resolver.cpp @@ -13,6 +13,7 @@ #define USING_LOG_PREFIX SQL_RESV #include "sql/resolver/ddl/ob_create_index_resolver.h" #include "share/ob_index_builder_util.h" +#include "share/ob_fts_index_builder_util.h" #include "share/schema/ob_table_schema.h" #include "sql/resolver/ddl/ob_create_index_stmt.h" #include "sql/session/ob_sql_session_info.h" @@ -172,6 +173,17 @@ int ObCreateIndexResolver::resolve_index_column_node( } sort_item.column_name_.assign_ptr(const_cast(col_node->children_[0]->str_value_), static_cast(col_node->children_[0]->str_len_)); + bool is_multivalue_index = false; + if (OB_FAIL(ObMulValueIndexBuilderUtil::is_multivalue_index_type(sort_item.column_name_, + is_multivalue_index))) { + LOG_WARN("failed to resolve index type", K(ret)); + } else if (is_multivalue_index) { + // not support dynamic create multi-value index + // todo: weiyouchao.wyc + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support dynaimic create multivlaue index", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "not support dynaimic create multivlaue index"); + } } // 前缀索引的前缀长度 if (OB_FAIL(ret)) { @@ -186,10 +198,20 @@ int ObCreateIndexResolver::resolve_index_column_node( sort_item.prefix_len_ = 0; } - // spatial index constraint if (OB_FAIL(ret)) { // do nothing - } else { + } else if (index_keyname_ == FTS_KEY) { + if (!GCONF._enable_add_fulltext_index_to_existing_table) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("build fulltext index afterward is experimental feature", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "experimental feature: build fulltext index afterward"); + } else if (OB_FAIL(resolve_fts_index_constraint(*tbl_schema, + sort_item.column_name_, + index_keyname_value))) { + SQL_RESV_LOG(WARN, "check fts index constraint fail",K(ret), + K(sort_item.column_name_)); + } + } else { // spatial index, NOTE resolve_spatial_index_constraint() will set index_keyname bool is_explicit_order = (NULL != col_node->children_[2] && 1 != col_node->children_[2]->is_empty_); if (OB_FAIL(resolve_spatial_index_constraint(*tbl_schema, sort_item.column_name_, @@ -728,6 +750,9 @@ int ObCreateIndexResolver::set_table_option_to_stmt(bool is_partitioned) } else { index_arg.index_type_ = INDEX_TYPE_SPATIAL_LOCAL; } + } else if (FTS_KEY == index_keyname_) { + // TODO hanxuan + ret = OB_NOT_SUPPORTED; } index_arg.data_table_id_ = data_table_id_; index_arg.index_table_id_ = index_table_id_; @@ -742,7 +767,8 @@ int ObCreateIndexResolver::set_table_option_to_stmt(bool is_partitioned) index_arg.sql_mode_ = session_info_->get_sql_mode(); create_index_stmt->set_comment(comment_); create_index_stmt->set_tablespace_id(tablespace_id_); - if (OB_FAIL(create_index_stmt->set_encryption_str(encryption_))) { + if (OB_FAIL(ret)) { + } else if (OB_FAIL(create_index_stmt->set_encryption_str(encryption_))) { LOG_WARN("fail to set encryption str", K(ret)); } } diff --git a/src/sql/resolver/ddl/ob_create_table_resolver.cpp b/src/sql/resolver/ddl/ob_create_table_resolver.cpp index c8dbd3f6ba..58cf46a53c 100644 --- a/src/sql/resolver/ddl/ob_create_table_resolver.cpp +++ b/src/sql/resolver/ddl/ob_create_table_resolver.cpp @@ -21,6 +21,7 @@ #include "common/ob_store_format.h" #include "share/schema/ob_table_schema.h" #include "share/config/ob_server_config.h" +#include "share/ob_fts_index_builder_util.h" #include "sql/resolver/ddl/ob_create_table_stmt.h" #include "sql/resolver/expr/ob_raw_expr_util.h" #include "sql/resolver/expr/ob_raw_expr_resolver_impl.h" @@ -2287,6 +2288,40 @@ int ObCreateTableResolver::generate_index_arg() } else { type = INDEX_TYPE_SPATIAL_LOCAL; } + } else if (FTS_KEY == index_keyname_) { + if (tenant_data_version < DATA_VERSION_4_3_1_0) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("tenant data version is less than 4.3.1, fulltext index not supported", K(ret), K(tenant_data_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.3.1, fulltext index"); + } else if (global_) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support global fts index now", K(ret)); + } else { + // set type to fts_doc_rowkey first, append other fts arg later + type = INDEX_TYPE_DOC_ID_ROWKEY_LOCAL; + } + } else if (MULTI_KEY == index_keyname_) { + if (tenant_data_version < DATA_VERSION_4_3_1_0) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("tenant data version is less than 4.3.1, multivalue index is not supported", K(ret), K(tenant_data_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.3.1, multivalue index"); + } else if (global_) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support global fts index now", K(ret)); + } else { + type = INDEX_TYPE_NORMAL_MULTIVALUE_LOCAL; + } + } else if (MULTI_UNIQUE_KEY == index_keyname_) { + if (tenant_data_version < DATA_VERSION_4_3_1_0) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("tenant data version is less than 4.3.1, multivalue index is not supported", K(ret), K(tenant_data_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.3.1, multivalue index"); + } else if (global_) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support global multivalue index now", K(ret)); + } else { + type = INDEX_TYPE_UNIQUE_MULTIVALUE_LOCAL; + } } } if(OB_SUCC(ret)) { @@ -2538,6 +2573,7 @@ int ObCreateTableResolver::resolve_index_node(const ParseNode *node) SQL_RESV_LOG(WARN, "add session id key failed", K(ret)); } bool cnt_func_index_mysql = false; + bool is_multi_value_index = false; for (int32_t i = 0; OB_SUCC(ret) && i < index_column_list_node->num_child_; ++i) { ObString &column_name = sort_item.column_name_; if (NULL == index_column_list_node->children_[i] @@ -2565,7 +2601,11 @@ int ObCreateTableResolver::resolve_index_node(const ParseNode *node) column_name.assign_ptr( const_cast(index_column_node->children_[0]->str_value_), static_cast(index_column_node->children_[0]->str_len_)); - if (NULL != index_column_node->children_[1]) { + if (OB_FAIL(ObMulValueIndexBuilderUtil::adjust_index_type(column_name, + is_multi_value_index, + reinterpret_cast(&index_keyname_)))) { + LOG_WARN("failed to resolve index type", K(ret)); + } else if (NULL != index_column_node->children_[1]) { sort_item.prefix_len_ = static_cast(index_column_node->children_[1]->value_); if (0 == sort_item.prefix_len_) { ret = OB_KEY_PART_0; @@ -2578,8 +2618,41 @@ int ObCreateTableResolver::resolve_index_node(const ParseNode *node) } if (OB_SUCC(ret)) { if (sort_item.is_func_index_) { + bool is_mulvalue_index = (index_keyname_ == MULTI_KEY || index_keyname_ == MULTI_UNIQUE_KEY); ObRawExpr *expr = NULL; - if (OB_FAIL(ObRawExprUtils::build_generated_column_expr(NULL, + if (is_mulvalue_index) { + ObColumnSchemaV2 *budy_column_schema = NULL; + if (OB_FAIL(ObMulValueIndexBuilderUtil::build_and_generate_multivalue_column( + sort_item, + *params_.expr_factory_, + *session_info_, + tbl_schema, + schema_checker_, + column_schema, + budy_column_schema))) { + LOG_WARN("failed to build index schema failed", K(ret)); + } else if (OB_ISNULL(column_schema) || OB_ISNULL(budy_column_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("multivalue index generate column, or budy column is null.", + K(ret), KP(column_schema), KP(budy_column_schema)); + } else { + ObColumnNameHashWrapper column_name_key(column_schema->get_column_name_str()); + if (OB_FAIL(column_name_set_.set_refactored(column_name_key))) { + LOG_WARN("add column name to map failed", K(column_schema->get_column_name_str()), K(ret)); + } else { + ObColumnSortItem budy_sort_item; + budy_sort_item.is_func_index_ = true; + budy_sort_item.column_name_ = budy_column_schema->get_column_name_str(); + + ObColumnNameHashWrapper budy_column_name_key(budy_column_schema->get_column_name_str()); + if (OB_FAIL(column_name_set_.set_refactored(budy_column_name_key))) { + LOG_WARN("add column name to map failed", K(column_schema->get_column_name_str()), K(ret)); + } else if (OB_FAIL(add_sort_column(budy_sort_item))) { + LOG_WARN("failed to add sort item", K(ret)); + } + } + } + } else if (OB_FAIL(ObRawExprUtils::build_generated_column_expr(NULL, column_name, *params_.expr_factory_, *session_info_, @@ -2629,7 +2702,8 @@ int ObCreateTableResolver::resolve_index_node(const ParseNode *node) } else if (sort_item.prefix_len_ > column_schema->get_data_length()) { ret = OB_WRONG_SUB_KEY; SQL_RESV_LOG(WARN, "prefix length is longer than column length", K(sort_item), K(column_schema->get_data_length()), K(ret)); - } else if (ob_is_text_tc(column_schema->get_data_type())) { + } else if (ob_is_text_tc(column_schema->get_data_type()) + && static_cast(INDEX_KEYNAME::FTS_KEY) != node->value_) { if (column_schema->is_hidden()) { //functional index in mysql mode ret = OB_ERR_FUNCTIONAL_INDEX_ON_LOB; @@ -2642,7 +2716,13 @@ int ObCreateTableResolver::resolve_index_node(const ParseNode *node) index_column_list_node->num_child_, node->value_, is_oracle_mode, NULL != index_column_node->children_[2] && 1 != index_column_node->children_[2]->is_empty_))) { SQL_RESV_LOG(WARN, "fail to resolve spatial index constraint", K(ret), K(column_name)); + } else if (OB_FAIL(resolve_fts_index_constraint(*column_schema, + node->value_))) { + SQL_RESV_LOG(WARN, "fail to resolve fts index constraint", K(ret), K(column_name)); + } else if (OB_FAIL(resolve_multivalue_index_constraint(*column_schema, index_keyname_))) { + SQL_RESV_LOG(WARN, "fail to resolve multivalue index constraint", K(ret), K(column_name)); } + if (OB_SUCC(ret) && ob_is_string_type(column_schema->get_data_type())) { int64_t length = 0; if (OB_FAIL(column_schema->get_byte_length(length, is_oracle_mode, false))) { @@ -2652,7 +2732,8 @@ int ObCreateTableResolver::resolve_index_node(const ParseNode *node) } else { /*do nothing*/ } if (OB_SUCC(ret)) { - if ((index_data_length += length) > OB_MAX_USER_ROW_KEY_LENGTH) { + if ((index_data_length += length) > OB_MAX_USER_ROW_KEY_LENGTH + && static_cast(INDEX_KEYNAME::FTS_KEY) != node->value_) { ret = OB_ERR_TOO_LONG_KEY_LENGTH; LOG_USER_ERROR(OB_ERR_TOO_LONG_KEY_LENGTH, OB_MAX_USER_ROW_KEY_LENGTH); } else if (index_data_length <= 0) { @@ -2791,7 +2872,7 @@ int ObCreateTableResolver::resolve_index_node(const ParseNode *node) if (OB_FAIL(resolve_index_name( ObItemType::T_INDEX == node->type_ ? node->children_[0] : NULL, first_column_name, - UNIQUE_KEY == index_keyname_ ? true : false, + (UNIQUE_KEY == index_keyname_ || MULTI_UNIQUE_KEY == index_keyname_) ? true : false, uk_name))) { SQL_RESV_LOG(WARN, "resolve index name failed", K(ret)); } else if (ObItemType::T_INDEX == node->type_ && OB_FAIL(resolve_table_options(node->children_[2], true))) { @@ -2869,10 +2950,30 @@ int ObCreateTableResolver::resolve_index_node(const ParseNode *node) } } if (OB_SUCC(ret)) { - if (OB_FAIL(resolve_results.push_back(resolve_result))) { - LOG_WARN("fail to push back index_stmt_list", K(ret), K(resolve_result)); - } else if (OB_FAIL(index_arg_list.push_back(create_index_arg))) { - LOG_WARN("fail to push back index_arg", K(ret)); + if (is_fts_index(index_arg_.index_type_)) { + if (OB_FAIL(ObDDLResolver::append_fts_args(resolve_result, + create_index_arg, + have_generate_fts_arg_, + resolve_results, + index_arg_list, + allocator_))) { + LOG_WARN("failed to append fts args", K(ret)); + } + } else if (is_multivalue_index(index_arg_.index_type_)) { + if (OB_FAIL(ObDDLResolver::append_multivalue_args(resolve_result, + create_index_arg, + have_generate_fts_arg_, + resolve_results, + index_arg_list, + allocator_))) { + LOG_WARN("failed to append fts args", K(ret)); + } + } else { + if (OB_FAIL(resolve_results.push_back(resolve_result))) { + LOG_WARN("fail to push back index_stmt_list", K(ret), K(resolve_result)); + } else if (OB_FAIL(index_arg_list.push_back(create_index_arg))) { + LOG_WARN("fail to push back index_arg", K(ret)); + } } } } diff --git a/src/sql/resolver/ddl/ob_ddl_resolver.cpp b/src/sql/resolver/ddl/ob_ddl_resolver.cpp index 4a638ae341..aabe9fe4a0 100644 --- a/src/sql/resolver/ddl/ob_ddl_resolver.cpp +++ b/src/sql/resolver/ddl/ob_ddl_resolver.cpp @@ -31,6 +31,7 @@ #include "sql/printer/ob_raw_expr_printer.h" #include "sql/resolver/expr/ob_raw_expr_part_func_checker.h" #include "share/ob_index_builder_util.h" +#include "share/ob_fts_index_builder_util.h" #include "share/object/ob_obj_cast.h" #include "observer/omt/ob_tenant_config_mgr.h" #include "ob_sequence_stmt.h" @@ -114,6 +115,7 @@ ObDDLResolver::ObDDLResolver(ObResolverParams ¶ms) ttl_definition_(), kv_attributes_(), name_generated_type_(GENERATED_TYPE_UNKNOWN), + have_generate_fts_arg_(false), is_set_lob_inrow_threshold_(false), lob_inrow_threshold_(OB_DEFAULT_LOB_INROW_THRESHOLD) { @@ -124,6 +126,241 @@ ObDDLResolver::~ObDDLResolver() { } +int ObDDLResolver::append_fts_args( + const ObPartitionResolveResult &resolve_result, + const obrpc::ObCreateIndexArg *index_arg, + bool &fts_common_aux_table_exist, + ObIArray &resolve_results, + ObIArray &index_arg_list, + ObIAllocator *arg_allocator) +{ + int ret = OB_SUCCESS; + ObSArray fts_args; + if (OB_ISNULL(arg_allocator) || OB_ISNULL(index_arg)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(arg_allocator), KP(index_arg)); + } else if (OB_FAIL(append_fts_args(resolve_result, + *index_arg, + fts_common_aux_table_exist, + resolve_results, + fts_args, + arg_allocator))) { + LOG_WARN("failed to append fts args", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < fts_args.count(); ++i) { + ObCreateIndexArg *index_arg = NULL; + void *tmp_ptr = NULL; + if (NULL == (tmp_ptr = (ObCreateIndexArg *)arg_allocator->alloc( + sizeof(obrpc::ObCreateIndexArg)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory", K(ret)); + } else if (FALSE_IT(index_arg = new (tmp_ptr) ObCreateIndexArg())) { + } else if (OB_FAIL(index_arg->assign(fts_args.at(i)))) { + LOG_WARN("failed to assign", K(ret)); + } else if (OB_FAIL(index_arg_list.push_back(index_arg))) { + index_arg->~ObCreateIndexArg(); + arg_allocator->free(index_arg); + LOG_WARN("failed to push back", K(ret)); + } + } + return ret; +} + +int ObDDLResolver::append_fts_args( + const ObPartitionResolveResult &resolve_result, + const obrpc::ObCreateIndexArg &index_arg, + bool &fts_common_aux_table_exist, + ObIArray &resolve_results, + ObIArray &index_arg_list, + ObIAllocator *allocator) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(allocator)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator is null", K(ret)); + } else if (!fts_common_aux_table_exist) { + const int64_t num_fts_args = 4; + if (OB_FAIL(ObFtsIndexBuilderUtil::append_fts_rowkey_doc_arg(index_arg, + allocator, + index_arg_list))) { + LOG_WARN("failed to append fts_rowkey_doc arg", K(ret)); + } else if (OB_FAIL(ObFtsIndexBuilderUtil::append_fts_doc_rowkey_arg(index_arg, + allocator, + index_arg_list))) { + LOG_WARN("failed to append fts_doc_rowkey arg", K(ret)); + } else if (OB_FAIL(ObFtsIndexBuilderUtil::append_fts_index_arg(index_arg, + allocator, + index_arg_list))) { + LOG_WARN("failed to append fts_index arg", K(ret)); + } else if (OB_FAIL(ObFtsIndexBuilderUtil::append_fts_doc_word_arg(index_arg, + allocator, + index_arg_list))) { + LOG_WARN("failed to append fts_doc_word arg", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < num_fts_args; ++i) { + if (OB_FAIL(resolve_results.push_back(resolve_result))) { + LOG_WARN("fail to push back index_stmt_list", K(ret), K(resolve_result)); + } + } + if (OB_SUCC(ret)) { + fts_common_aux_table_exist = true; + } + } else { + const int64_t num_fts_args = 2; + if (OB_FAIL(ObFtsIndexBuilderUtil::append_fts_index_arg(index_arg, + allocator, + index_arg_list))) { + LOG_WARN("failed to append fts_index arg", K(ret)); + } else if (OB_FAIL(ObFtsIndexBuilderUtil::append_fts_doc_word_arg(index_arg, + allocator, + index_arg_list))) { + LOG_WARN("failed to append fts_doc_word arg", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < num_fts_args; ++i) { + if (OB_FAIL(resolve_results.push_back(resolve_result))) { + LOG_WARN("fail to push back index_stmt_list", K(ret), K(resolve_result)); + } + } + } + return ret; +} + +int ObDDLResolver::append_multivalue_args( + const ObPartitionResolveResult &resolve_result, + const obrpc::ObCreateIndexArg *index_arg, + bool &common_aux_table_exist, + ObIArray &resolve_results, + ObIArray &index_arg_list, + ObIAllocator *arg_allocator) +{ + int ret = OB_SUCCESS; + ObSArray multivalue_args; + if (OB_ISNULL(arg_allocator) || OB_ISNULL(index_arg)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(arg_allocator), KP(index_arg)); + } else if (OB_FAIL(append_multivalue_args(resolve_result, + *index_arg, + common_aux_table_exist, + resolve_results, + multivalue_args, + arg_allocator))) { + LOG_WARN("failed to append multivalue args", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < multivalue_args.count(); ++i) { + ObCreateIndexArg *index_arg = NULL; + void *tmp_ptr = NULL; + if (NULL == (tmp_ptr = (ObCreateIndexArg *)arg_allocator->alloc( + sizeof(obrpc::ObCreateIndexArg)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory", K(ret)); + } else if (FALSE_IT(index_arg = new (tmp_ptr) ObCreateIndexArg())) { + } else if (OB_FAIL(index_arg->assign(multivalue_args.at(i)))) { + LOG_WARN("failed to assign", K(ret)); + } else if (OB_FAIL(index_arg_list.push_back(index_arg))) { + index_arg->~ObCreateIndexArg(); + arg_allocator->free(index_arg); + LOG_WARN("failed to push back", K(ret)); + } + } + return ret; +} + +int ObDDLResolver::append_multivalue_args( + const ObPartitionResolveResult &resolve_result, + const obrpc::ObCreateIndexArg &index_arg, + bool &common_aux_table_exist, + ObIArray &resolve_results, + ObIArray &index_arg_list, + ObIAllocator *allocator) +{ + int ret = OB_SUCCESS; + int64_t num_mulvalue_args = 3; + + if (OB_ISNULL(allocator)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator is null", K(ret)); + } else if (!common_aux_table_exist) { + ObCreateIndexArg tmp_index_arg; + if (OB_FAIL(tmp_index_arg.assign(index_arg))) { + LOG_WARN("failed to assign arg", K(ret)); + } else if (FALSE_IT(tmp_index_arg.index_columns_.reuse())) { + } else if (OB_FAIL(ObFtsIndexBuilderUtil::append_fts_rowkey_doc_arg(tmp_index_arg, + allocator, + index_arg_list))) { + LOG_WARN("failed to append fts_rowkey_doc arg", K(ret)); + } else if (OB_FAIL(ObFtsIndexBuilderUtil::append_fts_doc_rowkey_arg(tmp_index_arg, + allocator, + index_arg_list))) { + LOG_WARN("failed to append fts_doc_rowkey arg", K(ret)); + } else if (OB_FAIL(ObMulValueIndexBuilderUtil::append_mulvalue_arg(index_arg, + allocator, + index_arg_list))) { + LOG_WARN("failed to append fts_index arg", K(ret)); + } + } else { + num_mulvalue_args = 1; + if (OB_FAIL(ObMulValueIndexBuilderUtil::append_mulvalue_arg(index_arg, + allocator, + index_arg_list))) { + LOG_WARN("failed to append mulvalue index arg", K(ret)); + } + } + + + for (int64_t i = 0; OB_SUCC(ret) && i < num_mulvalue_args; ++i) { + if (OB_FAIL(resolve_results.push_back(resolve_result))) { + LOG_WARN("fail to push back index_stmt_list", K(ret), K(resolve_result)); + } + } + if (OB_SUCC(ret)) { + common_aux_table_exist = true; + } + return ret; +} + +int ObDDLResolver::append_domain_index_args( + const ObTableSchema &table_schema, + const ObPartitionResolveResult &resolve_result, + const obrpc::ObCreateIndexArg *index_arg, + bool &common_aux_table_exist, + ObIArray &resolve_results, + ObIArray &index_arg_list, + ObIAllocator *arg_allocator) +{ + int ret = OB_SUCCESS; + + const ObColumnSchemaV2 *doc_id_col = nullptr; + if (OB_FAIL(ObFtsIndexBuilderUtil::get_doc_id_col(table_schema, + doc_id_col))) { + LOG_WARN("failed to get doc id col", K(ret)); + } else if (OB_NOT_NULL(doc_id_col)) { + common_aux_table_exist = true; + } + + if (OB_FAIL(ret)) { + } else if (is_multivalue_index_aux(index_arg->index_type_)) { + if (OB_FAIL(ObDDLResolver::append_multivalue_args(resolve_result, + index_arg, + common_aux_table_exist, + resolve_results, + index_arg_list, + arg_allocator))) { + LOG_WARN("failed to append multivalue args", K(ret), K(index_arg->index_type_)); + } + } else if (is_fts_index(index_arg->index_type_)) { + if (OB_FAIL(ObDDLResolver::append_fts_args(resolve_result, + index_arg, + common_aux_table_exist, + resolve_results, + index_arg_list, + arg_allocator))) { + LOG_WARN("failed to append fts args", K(ret)); + } + } + + return ret; +} + int ObDDLResolver::get_part_str_with_type( const bool is_oracle_mode, ObPartitionFuncType part_func_type, @@ -6753,6 +6990,125 @@ int ObDDLResolver::resolve_spatial_index_constraint( return ret; } +int ObDDLResolver::resolve_fts_index_constraint( + const share::schema::ObTableSchema &table_schema, + const common::ObString &column_name, + const int64_t index_keyname_value) +{ + int ret = OB_SUCCESS; + const ObColumnSchemaV2 *column_schema = NULL; + if (!table_schema.is_valid() || column_name.empty()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argumnet", K(ret), K(table_schema), K(column_name)); + } else if (OB_ISNULL(session_info_) || OB_ISNULL(allocator_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret), K(session_info_), K(allocator_)); + } else if (OB_ISNULL(column_schema = table_schema.get_column_schema(column_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, + column_name.length(), + column_name.ptr()); + } else if (OB_FAIL(resolve_fts_index_constraint(*column_schema, + index_keyname_value))) { + LOG_WARN("resolve fts index constraint fail", K(ret), K(index_keyname_value)); + } + return ret; +} + +// Fts index can only be built on text columns. +// CREATE TABLE fts_index_constraint (id int, +// title varchar(100), +// content text, +// FULLTEXT(title, content)); +int ObDDLResolver::resolve_fts_index_constraint( + const share::schema::ObColumnSchemaV2 &column_schema, + const int64_t index_keyname_value) +{ + int ret = OB_SUCCESS; + if (!column_schema.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argumnet", K(ret), K(column_schema)); + } else { + bool is_fts_index = + (index_keyname_value == static_cast(INDEX_KEYNAME::FTS_KEY)); + uint64_t tenant_id = column_schema.get_tenant_id(); + bool is_text_column = ob_is_string_tc(column_schema.get_data_type()) || + ob_is_text_tc(column_schema.get_data_type()); + uint64_t tenant_data_version = 0; + if (!is_fts_index) { + // do nothing + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { + LOG_WARN("get tenant data version failed", K(ret)); + } else if (tenant_data_version < DATA_VERSION_4_3_1_0) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("tenant data version is less than 4.3.1, fulltext index not supported", K(ret), K(tenant_data_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.3.1, fulltext index"); + } else if (!is_text_column) { + ret = OB_ERR_FTS_MUST_HAVE_TEXT_COL; + LOG_USER_ERROR(OB_ERR_FTS_MUST_HAVE_TEXT_COL); + LOG_WARN("fts index can only be built on text column", K(ret), K(column_schema)); + } else { + index_keyname_ = FTS_KEY; + } + } + return ret; +} + +int ObDDLResolver::resolve_multivalue_index_constraint( + const share::schema::ObTableSchema &table_schema, + const common::ObString &column_name, + const int64_t index_keyname_value) +{ + int ret = OB_SUCCESS; + const ObColumnSchemaV2 *column_schema = NULL; + if (!table_schema.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argumnet", K(ret), K(table_schema)); + } else if (OB_ISNULL(session_info_) || OB_ISNULL(allocator_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret), K(session_info_), K(allocator_)); + } else if (OB_ISNULL(column_schema = table_schema.get_column_schema(column_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, column_name.length(), column_name.ptr()); + } else if (OB_FAIL(resolve_multivalue_index_constraint(*column_schema, + index_keyname_value))) { + LOG_WARN("resolve multivalue index constraint fail", K(ret), K(index_keyname_value)); + } + return ret; +} + +// multi value index can only be built on json columns. +// CREATE TABLE multivalue_index_constraint (id int, +// title varchar(100), +// content json, +// index mvi ((cast(content as unsigned array)))); +int ObDDLResolver::resolve_multivalue_index_constraint( + const share::schema::ObColumnSchemaV2 &column_schema, + const int64_t index_keyname_value) +{ + int ret = OB_SUCCESS; + if (!column_schema.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argumnet", K(ret), K(column_schema)); + } else { + bool is_multival_index = (index_keyname_value == static_cast(INDEX_KEYNAME::MULTI_KEY) + || index_keyname_value == static_cast(INDEX_KEYNAME::MULTI_UNIQUE_KEY)); + uint64_t tenant_id = column_schema.get_tenant_id(); + uint64_t tenant_data_version = 0; + if (!is_multival_index) { + // do nothing + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { + LOG_WARN("get tenant data version failed", K(ret)); + } else if (tenant_data_version < DATA_VERSION_4_3_1_0) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("tenant data version is less than 4.3.1, multivalue index not supported", K(ret), K(tenant_data_version)); + } else { + index_keyname_ = static_cast(index_keyname_value); + } + } + return ret; +} + int ObDDLResolver::resolve_list_partition_elements(ParseNode *node, const bool is_subpartition, const ObPartitionFuncType part_type, diff --git a/src/sql/resolver/ddl/ob_ddl_resolver.h b/src/sql/resolver/ddl/ob_ddl_resolver.h index 2cf6fa3772..1dd554d383 100644 --- a/src/sql/resolver/ddl/ob_ddl_resolver.h +++ b/src/sql/resolver/ddl/ob_ddl_resolver.h @@ -164,6 +164,42 @@ public: static const int64_t DEFAULT_TABLE_DOP = 1; explicit ObDDLResolver(ObResolverParams ¶ms); virtual ~ObDDLResolver(); + static int append_fts_args( + const ObPartitionResolveResult &resolve_result, + const obrpc::ObCreateIndexArg *index_arg, + bool &fts_common_aux_table_exist, + ObIArray &resolve_results, + ObIArray &index_arg_list, + ObIAllocator *arg_allocator); + static int append_fts_args( + const ObPartitionResolveResult &resolve_result, + const obrpc::ObCreateIndexArg &index_arg, + bool &fts_common_aux_table_exist, + ObIArray &resolve_results, + ObIArray &index_arg_list, + ObIAllocator *allocator); + static int append_multivalue_args( + const ObPartitionResolveResult &resolve_result, + const obrpc::ObCreateIndexArg *index_arg, + bool &fts_common_aux_table_exist, + ObIArray &resolve_results, + ObIArray &index_arg_list, + ObIAllocator *arg_allocator); + static int append_multivalue_args( + const ObPartitionResolveResult &resolve_result, + const obrpc::ObCreateIndexArg &index_arg, + bool &fts_common_aux_table_exist, + ObIArray &resolve_results, + ObIArray &index_arg_list, + ObIAllocator *allocator); + static int append_domain_index_args( + const ObTableSchema &table_schema, + const ObPartitionResolveResult &resolve_result, + const obrpc::ObCreateIndexArg *index_arg, + bool &common_aux_table_exist, + ObIArray &resolve_results, + ObIArray &index_arg_list, + ObIAllocator *arg_allocator); static int check_text_length(ObCharsetType cs_type, ObCollationType co_type, const char *name, ObObjType &type, int32_t &length, @@ -467,6 +503,20 @@ public: const int64_t index_keyname_value, bool is_oracle_mode, bool is_explicit_order); + int resolve_fts_index_constraint( + const share::schema::ObTableSchema &table_schema, + const common::ObString &column_name, + const int64_t index_keyname_value); + int resolve_fts_index_constraint( + const share::schema::ObColumnSchemaV2 &column_schema, + const int64_t index_keyname_value); + int resolve_multivalue_index_constraint( + const share::schema::ObTableSchema &table_schema, + const common::ObString &column_name, + const int64_t index_keyname_value); + int resolve_multivalue_index_constraint( + const share::schema::ObColumnSchemaV2 &column_schema, + const int64_t index_keyname_value); protected: static int get_part_str_with_type( const bool is_oracle_mode, @@ -974,6 +1024,7 @@ protected: common::ObString ttl_definition_; common::ObString kv_attributes_; ObNameGeneratedType name_generated_type_; + bool have_generate_fts_arg_; bool is_set_lob_inrow_threshold_; int64_t lob_inrow_threshold_; private: diff --git a/src/sql/resolver/dml/ob_any_value_checker.cpp b/src/sql/resolver/dml/ob_any_value_checker.cpp index 7f88343715..336c8fb21f 100644 --- a/src/sql/resolver/dml/ob_any_value_checker.cpp +++ b/src/sql/resolver/dml/ob_any_value_checker.cpp @@ -118,6 +118,12 @@ int ObAnyValueChecker::visit(ObPseudoColumnRawExpr &expr) return OB_SUCCESS; } +int ObAnyValueChecker::visit(ObMatchFunRawExpr &expr) +{ + UNUSED(expr); + return OB_SUCCESS; +} + int ObAnyValueChecker::check_any_value(const ObRawExpr *expr, const ObColumnRefRawExpr * undefined_column) { int ret = OB_SUCCESS; diff --git a/src/sql/resolver/dml/ob_any_value_checker.h b/src/sql/resolver/dml/ob_any_value_checker.h index caeee806fd..b064ddb187 100644 --- a/src/sql/resolver/dml/ob_any_value_checker.h +++ b/src/sql/resolver/dml/ob_any_value_checker.h @@ -43,6 +43,7 @@ public: virtual int visit(ObWinFunRawExpr &expr); virtual int visit(ObPseudoColumnRawExpr &expr); virtual int visit(ObPlQueryRefRawExpr &expr); + virtual int visit(ObMatchFunRawExpr &expr); // set expr skip virtual bool skip_child(ObRawExpr &expr) { return skip_expr_ == &expr; } diff --git a/src/sql/resolver/dml/ob_default_value_utils.cpp b/src/sql/resolver/dml/ob_default_value_utils.cpp index 64c7b1daee..64b5c82224 100644 --- a/src/sql/resolver/dml/ob_default_value_utils.cpp +++ b/src/sql/resolver/dml/ob_default_value_utils.cpp @@ -377,6 +377,9 @@ int ObDefaultValueUtils::build_default_expr_strict(const ColumnItem *column, ObR if (OB_FAIL(resolver_->build_heap_table_hidden_pk_expr(expr, column->get_expr()))) { LOG_WARN("failed to build next_val expr", K(ret), KPC(column->get_expr())); } + } else if (OB_ISNULL(column->get_expr())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, column expr is nullptr", K(ret), KPC(column)); } else if (column->is_auto_increment()) { if (OB_FAIL(resolver_->build_autoinc_nextval_expr(expr, column->base_tid_, diff --git a/src/sql/resolver/dml/ob_del_upd_resolver.cpp b/src/sql/resolver/dml/ob_del_upd_resolver.cpp index cfb17c9b96..6fe60c46f1 100644 --- a/src/sql/resolver/dml/ob_del_upd_resolver.cpp +++ b/src/sql/resolver/dml/ob_del_upd_resolver.cpp @@ -3110,47 +3110,70 @@ int ObDelUpdResolver::generate_autoinc_params(ObInsertTableInfo &table_info) if (OB_ISNULL(column_schema)) { ret = OB_ERR_UNEXPECTED; LOG_ERROR("invalid column schema", K(column_schema)); - } else { - uint64_t column_id = column_schema->get_column_id(); - if (column_schema->is_autoincrement()) { - del_upd_stmt->set_affected_last_insert_id(true); - AutoincParam param; - param.tenant_id_ = params_.session_info_->get_effective_tenant_id(); - param.autoinc_table_id_ = table_info.ref_table_id_; - param.autoinc_first_part_num_ = table_schema->get_first_part_num(); - param.autoinc_table_part_num_ = table_schema->get_all_part_num(); - param.autoinc_col_id_ = column_id; - param.auto_increment_cache_size_ = auto_increment_cache_size; - param.part_level_ = table_schema->get_part_level(); - ObObjType column_type = table_schema->get_column_schema(column_id)->get_data_type(); - param.autoinc_col_type_ = column_type; - param.autoinc_desired_count_ = 0; - param.autoinc_mode_is_order_ = table_schema->is_order_auto_increment_mode(); - param.autoinc_version_ = table_schema->get_truncate_version(); - param.autoinc_auto_increment_ = table_schema->get_auto_increment(); - - // hidden pk auto-increment variables' default value is 1 - // auto-increment variables for other columns are set in ob_sql.cpp - // because physical plan may come from plan cache; it need be reset every time - if (OB_HIDDEN_PK_INCREMENT_COLUMN_ID == column_id) { - param.autoinc_increment_ = 1; - param.autoinc_offset_ = 1; - param.part_value_no_order_ = true; - } else if (column_schema->is_tbl_part_key_column()) { - // don't keep intra-partition value asc order when partkey column is auto inc - param.part_value_no_order_ = true; - } - - if (OB_FAIL(get_value_row_size(param.total_value_count_))) { - LOG_WARN("fail to get value row size", K(ret)); - } else if (OB_FAIL(del_upd_stmt->get_autoinc_params().push_back(param))) { - LOG_WARN("failed to push autoinc_param", K(param), K(ret)); - } + } else if (column_schema->is_autoincrement()) { + const ObTableSchema *t_schema = table_schema; + uint64_t table_id = table_info.ref_table_id_; + AutoincParam param; + del_upd_stmt->set_affected_last_insert_id(true); + if (FAILEDx(build_autoinc_param(table_id, t_schema, column_schema, auto_increment_cache_size, param))) { + LOG_WARN("fail to build auto param", K(ret), K(table_id), K(table_info), KPC(column_schema)); + } else if (OB_FAIL(del_upd_stmt->get_autoinc_params().push_back(param))) { + LOG_WARN("failed to push autoinc_param", K(param), K(ret)); } } }//end for } - LOG_DEBUG("generate autoinc_params", "autoin_params", del_upd_stmt->get_autoinc_params()); + LOG_DEBUG("generate autoinc_params", "autoinc_params", del_upd_stmt->get_autoinc_params()); + return ret; +} + +int ObDelUpdResolver::build_autoinc_param( + const uint64_t table_id, + const ObTableSchema *table_schema, + const ObColumnSchemaV2 *column_schema, + const int64_t auto_increment_cache_size, + AutoincParam ¶m) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(OB_INVALID_ID == table_id || auto_increment_cache_size < 0) + || OB_ISNULL(table_schema) + || OB_ISNULL(column_schema) + || OB_ISNULL(params_.session_info_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(table_id), KP(table_schema), KP(column_schema), + K(auto_increment_cache_size), KP(params_.session_info_)); + } else { + const uint64_t column_id = column_schema->get_column_id(); + const ObObjType column_type = column_schema->get_data_type(); + param.tenant_id_ = params_.session_info_->get_effective_tenant_id(); + param.autoinc_table_id_ = table_id; + param.autoinc_first_part_num_ = table_schema->get_first_part_num(); + param.autoinc_table_part_num_ = table_schema->get_all_part_num(); + param.autoinc_col_id_ = column_id; + param.auto_increment_cache_size_ = auto_increment_cache_size; + param.part_level_ = table_schema->get_part_level(); + param.autoinc_col_type_ = column_type; + param.autoinc_desired_count_ = 0; + param.autoinc_mode_is_order_ = table_schema->is_order_auto_increment_mode(); + param.autoinc_version_ = table_schema->get_truncate_version(); + param.autoinc_auto_increment_ = table_schema->get_auto_increment(); + + // hidden pk auto-increment variables' default value is 1 + // auto-increment variables for other columns are set in ob_sql.cpp + // because physical plan may come from plan cache; it need be reset every time + if (OB_HIDDEN_PK_INCREMENT_COLUMN_ID == column_id) { + param.autoinc_increment_ = 1; + param.autoinc_offset_ = 1; + param.part_value_no_order_ = true; + } else if (column_schema->is_tbl_part_key_column()) { + // don't keep intra-partition value asc order when partkey column is auto inc + param.part_value_no_order_ = true; + } + + if (OB_FAIL(get_value_row_size(param.total_value_count_))) { + LOG_WARN("fail to get value row size", K(ret)); + } + } return ret; } diff --git a/src/sql/resolver/dml/ob_del_upd_resolver.h b/src/sql/resolver/dml/ob_del_upd_resolver.h index 90f4a3703b..10cc0638a3 100644 --- a/src/sql/resolver/dml/ob_del_upd_resolver.h +++ b/src/sql/resolver/dml/ob_del_upd_resolver.h @@ -271,6 +271,12 @@ protected: int add_default_sequence_id_to_stmt(const uint64_t table_id); int recursive_search_sequence_expr(const ObRawExpr *default_expr); int check_need_match_all_params(const common::ObIArray &value_desc, bool &need_match); + int build_autoinc_param( + const uint64_t table_id, + const ObTableSchema *table_schema, + const ObColumnSchemaV2 *column_schema, + const int64_t auto_increment_cache_size, + AutoincParam ¶m); int resolve_json_partial_update_flag(ObIArray &table_assigns, ObStmtScope scope); int mark_json_partial_update_flag(const ObColumnRefRawExpr *ref_expr, ObRawExpr *expr, int depth, bool &allow_json_partial_update); int add_select_item_func(ObSelectStmt &select_stmt, ColumnItem &col); diff --git a/src/sql/resolver/dml/ob_del_upd_stmt.cpp b/src/sql/resolver/dml/ob_del_upd_stmt.cpp index c34c86a2d8..ae9d77467b 100644 --- a/src/sql/resolver/dml/ob_del_upd_stmt.cpp +++ b/src/sql/resolver/dml/ob_del_upd_stmt.cpp @@ -547,10 +547,11 @@ int ObDelUpdStmt::update_base_tid_cid() ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(col), K(ret)); } else { + const bool is_rowkey_doc = col->get_table_name().suffix_match("rowkey_doc"); col_item->base_tid_ = col->get_table_id(); col_item->base_cid_ = col->get_column_id(); if (OB_UNLIKELY(col_item->base_tid_ == OB_INVALID_ID) || - OB_UNLIKELY(j != 0 && col_item->base_tid_ != base_tid)) { + OB_UNLIKELY(j != 0 && col_item->base_tid_ != base_tid && !is_rowkey_doc)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("base table id is invalid", K(ret), K(col_item->base_tid_), K(base_tid)); } else if (j == 0) { diff --git a/src/sql/resolver/dml/ob_delete_resolver.cpp b/src/sql/resolver/dml/ob_delete_resolver.cpp index aafedc6467..c700d77517 100644 --- a/src/sql/resolver/dml/ob_delete_resolver.cpp +++ b/src/sql/resolver/dml/ob_delete_resolver.cpp @@ -214,6 +214,7 @@ int ObDeleteResolver::check_multi_delete_table_conflict() int ObDeleteResolver::resolve_table_list(const ParseNode &table_list, bool &is_multi_table_delete) { int ret = OB_SUCCESS; + JoinedTable *joined_table = nullptr; TableItem *table_item = NULL; is_multi_table_delete = false; const ParseNode *delete_list = NULL; @@ -244,6 +245,9 @@ int ObDeleteResolver::resolve_table_list(const ParseNode &table_list, bool &is_m LOG_WARN("invalid table name", K(ret)); } else if (OB_FAIL(column_namespace_checker_.add_reference_table(table_item))) { LOG_WARN("add reference table to namespace checker failed", K(ret)); + } else if (OB_FAIL(try_add_join_table_for_fts(table_item, joined_table))) { + LOG_WARN("fail to try add join table for fts", K(ret), KPC(table_item)); + } else if (nullptr != joined_table && FALSE_IT(table_item = static_cast(joined_table))) { } else if (OB_FAIL(delete_stmt->add_from_item(table_item->table_id_, table_item->is_joined_table()))) { LOG_WARN("failed to add from item", K(ret)); @@ -261,7 +265,9 @@ int ObDeleteResolver::resolve_table_list(const ParseNode &table_list, bool &is_m if (NULL == delete_list) { bool has_tg = false; //single table delete, delete list is same with from list - CK(delete_stmt->get_table_size() == 1); + if (nullptr == joined_table) { // no fulltext index + CK(delete_stmt->get_table_size() == 1); + } OZ(delete_tables_.push_back(delete_stmt->get_table_item(0))); OZ (check_need_fired_trigger(table_item)); } else { @@ -447,7 +453,9 @@ int ObDeleteResolver::generate_delete_table_info(const TableItem &table_item) } } if (OB_SUCC(ret)) { - if (OB_FAIL(delete_stmt->get_delete_table_info().push_back(table_info))) { + if (OB_FAIL(try_update_column_expr_for_fts(table_item, table_info->column_exprs_))) { + LOG_WARN("fail to try update column expr for fts", K(ret), K(table_item)); + } else if (OB_FAIL(delete_stmt->get_delete_table_info().push_back(table_info))) { LOG_WARN("failed to push back table info", K(ret)); } else if (gindex_cnt > 0) { delete_stmt->set_has_global_index(true); diff --git a/src/sql/resolver/dml/ob_dml_resolver.cpp b/src/sql/resolver/dml/ob_dml_resolver.cpp index 2688039d4e..86fa9ef628 100755 --- a/src/sql/resolver/dml/ob_dml_resolver.cpp +++ b/src/sql/resolver/dml/ob_dml_resolver.cpp @@ -908,20 +908,26 @@ int ObDMLResolver::transform_geo_dot_notation_attr(ParseNode &node, const ObStri return ret; } -/* -JSON_QUERY '(' js_doc_expr ',' js_literal opt_js_query_returning_type opt_scalars opt_pretty opt_ascii opt_wrapper opt_query_on_error_or_empty_or_mismatch ')' -*/ +/********************************************************* +* JSON_QUERY '(' +* js_doc_expr ',' js_literal opt_js_query_returning_type +* opt_scalars opt_pretty opt_ascii +* opt_wrapper opt_asis opt_query_on_error_or_empty_or_mismatch +* opt_multivalue ')' +**********************************************************/ int ObDMLResolver::transform_dot_notation2_json_query(ParseNode &node, const ObString &sql_str) { INIT_SUCC(ret); - int64_t alloc_vec_size = sizeof(ParseNode *) * 11; - ParseNode **param_vec = NULL; // children_ - ParseNode *opt_node = NULL; // clause node - ParseNode *ret_node = NULL; // returning node - ParseNode *truncate_node = NULL; // truncate node - ParseNode *path_node = NULL; // path node - ParseNode *table_node = NULL; // table node - ParseNode *tmp_node = NULL; // json doc node + const int64_t alloc_vec_size = sizeof(ParseNode *) * 13; + ParseNode **param_vec = NULL; // children_ + ParseNode *opt_node = NULL; // clause node + ParseNode *ret_node = NULL; // returning node + ParseNode *truncate_node = NULL; // truncate node + ParseNode *path_node = NULL; // path node + ParseNode *table_node = NULL; // table node + ParseNode *tmp_node = NULL; // json doc node + ParseNode *asis_node = NULL; // asis flag + ParseNode *multivalue_node = NULL; // multivalue flag ObColumnRefRawExpr *col_expr = NULL; bool is_json_cst = false; bool is_json_type = false; @@ -1019,7 +1025,7 @@ int ObDMLResolver::transform_dot_notation2_json_query(ParseNode &node, const ObS param_vec[2] = ret_node; // return type pos is 2 in json value clause } // opt_scalars opt_pretty opt_ascii opt_wrapper opt_query_on_error_or_empty_or_mismatch 7 - for (int8_t i = 3; OB_SUCC(ret) && i < 11; i++) { + for (int8_t i = 3; OB_SUCC(ret) && i < 13; i++) { opt_node = NULL; if (OB_ISNULL(opt_node = static_cast(allocator_->alloc(sizeof(ParseNode))))) { ret = OB_ALLOCATE_MEMORY_FAILED; @@ -1031,15 +1037,15 @@ int ObDMLResolver::transform_dot_notation2_json_query(ParseNode &node, const ObS LOG_WARN("create path node failed", K(ret)); } else { int8_t val = 0; - if (i == 3) { + if (i == 3 || i == 12 || i == 8) { val = 0; } else if (i == 4) { val = 2; - } else if (i == 8) { + } else if (i == 9) { val = 1; - } else if (i == 9 || i == 7) { + } else if (i == 10 || i == 7) { val = 5; - } else if (i == 10) { + } else if (i == 11) { val = 3; // mismatch default is 3 from dot notation } opt_node->value_ = val; @@ -1051,7 +1057,7 @@ int ObDMLResolver::transform_dot_notation2_json_query(ParseNode &node, const ObS } // create json query node if (OB_SUCC(ret)) { - node.num_child_ = 11; + node.num_child_ = 13; node.type_ = T_FUN_SYS_JSON_QUERY; node.children_ = param_vec; } @@ -1664,6 +1670,7 @@ int ObDMLResolver::resolve_sql_expr(const ParseNode &node, ObRawExpr *&expr, ObArray op_exprs; ObCollationType collation_connection = CS_TYPE_INVALID; ObCharsetType character_set_connection = CHARSET_INVALID; + ObSEArray match_exprs; CK( OB_NOT_NULL(params_.expr_factory_), OB_NOT_NULL(stmt_), OB_NOT_NULL(get_stmt()), @@ -1719,7 +1726,8 @@ int ObDMLResolver::resolve_sql_expr(const ParseNode &node, ObRawExpr *&expr, win_exprs, udf_info, op_exprs, - user_var_exprs)); + user_var_exprs, + match_exprs)); } if (OB_SUCC(ret)) { @@ -1791,6 +1799,12 @@ int ObDMLResolver::resolve_sql_expr(const ParseNode &node, ObRawExpr *&expr, } } + if (OB_SUCC(ret) && match_exprs.count() > 0) { + if (OB_FAIL(resolve_match_against_exprs(expr, match_exprs, current_scope_))) { + LOG_WARN("failed to resolve match against expr", K(ret)); + } + } + //process oracle compatible implimental cast LOG_DEBUG("is oracle mode", K(lib::is_oracle_mode()), K(lib::is_oracle_mode()), K(op_exprs)); if (OB_SUCC(ret) && op_exprs.count() > 0) { @@ -5489,8 +5503,22 @@ int ObDMLResolver::resolve_base_or_alias_table_item_normal(uint64_t tenant_id, true /* for index table */, cte_table_fisrt, is_hidden, - tschema))) { - LOG_WARN("table or index doesn't exist", K(tenant_id), K(database_id), K(tbl_name), K(ret)); + tschema, + false/*is_built_in_index*/))) { + if (OB_TABLE_NOT_EXIST == ret && stmt->is_select_stmt() && select_index_enabled) { + if (OB_FAIL(schema_checker_->get_table_schema(tenant_id, + database_id, + tbl_name, + true /* for index table */, + cte_table_fisrt, + is_hidden, + tschema, + true/*is_built_in_index*/))) { + LOG_WARN("table or index doesn't exist", K(tenant_id), K(database_id), K(tbl_name), K(ret)); + } + } else { + LOG_WARN("table or index doesn't exist", K(tenant_id), K(database_id), K(tbl_name), K(ret)); + } } } else { LOG_WARN("table or index get schema failed", K(ret)); @@ -6377,7 +6405,8 @@ int ObDMLResolver::resolve_partition_expr( } else if (OB_FAIL(resolve_columns_for_fk_partition_expr(expr, columns, table_item, table_schema, fk_info))) { LOG_WARN("resolve columns for parent table partition expr failed", K(ret)); } - } else if (OB_FAIL(resolve_columns_for_partition_expr(expr, columns, table_item, table_schema.is_oracle_tmp_table()))) { + } else if (OB_FAIL(resolve_columns_for_partition_expr(expr, columns, table_item, + table_schema.is_oracle_tmp_table() || table_schema.is_fts_index()))) { LOG_WARN("resolve columns for partition expr failed", K(ret)); } } @@ -6399,6 +6428,7 @@ int ObDMLResolver::resolve_partition_expr(const ParseNode &part_expr_node, ObRaw ObArray udf_info; ObArray op_exprs; ObSEArray user_var_exprs; + ObSEArray match_exprs; ObCollationType collation_connection = CS_TYPE_INVALID; ObCharsetType character_set_connection = CHARSET_INVALID; if (OB_ISNULL(params_.expr_factory_) || OB_ISNULL(params_.session_info_)) { @@ -6422,7 +6452,7 @@ int ObDMLResolver::resolve_partition_expr(const ParseNode &part_expr_node, ObRaw LOG_WARN("fail to get name case mode", K(ret)); } else if (OB_FAIL(expr_resolver.resolve(&part_expr_node, expr, columns, sys_vars, sub_query_info, aggr_exprs, win_exprs, udf_info, - op_exprs, user_var_exprs))) { + op_exprs, user_var_exprs, match_exprs))) { LOG_WARN("resolve expr failed", K(ret)); } else if (sub_query_info.count() > 0 || sys_vars.count() > 0 || aggr_exprs.count() > 0 || columns.count() <= 0 || udf_info.count() > 0 || op_exprs.count() > 0) { @@ -8132,6 +8162,7 @@ int ObDMLResolver::resolve_generated_column_expr(const ObString &expr_str, ObSQLSessionInfo *session_info = NULL; const ObTableSchema *table_schema = NULL; const bool allow_sequence = !used_for_generated_column; + bool include_hidden = false; ObSQLMode sql_mode = 0; ObCollationType cs_type = CS_TYPE_INVALID; if (OB_ISNULL(expr_factory = params_.expr_factory_) @@ -8168,6 +8199,9 @@ int ObDMLResolver::resolve_generated_column_expr(const ObString &expr_str, this, schema_checker_))) { LOG_WARN("build generated column expr failed", K(ret)); + } else if (OB_NOT_NULL(column_schema) && column_schema->is_doc_id_column() + && OB_FAIL(fill_doc_id_expr_param(table_item.table_id_, table_item.ref_id_, table_schema, ref_expr))) { + LOG_WARN("fail to fill doc id expr param", K(ret), K(table_item), KP(table_schema), KP(ref_expr)); } bool is_default_udt_constructor = false; @@ -8263,7 +8297,7 @@ int ObDMLResolver::resolve_generated_column_expr(const ObString &expr_str, LOG_TRACE("add external file column", KPC(real_ref_expr), K(columns.at(i).col_name_), K(table_item)); } else { if (OB_FAIL(resolve_basic_column_item(table_item, columns.at(i).col_name_, - false, col_item, stmt))) { + include_hidden, col_item, stmt))) { LOG_WARN("resolve basic column item failed", K(ret)); } else if (OB_ISNULL(col_item) || OB_ISNULL(col_item->expr_)) { ret = OB_ERR_UNEXPECTED; @@ -9289,9 +9323,14 @@ int ObDMLResolver::check_table_exist_or_not(uint64_t tenant_id, LOG_WARN("fail to get select_index_enabled", K(ret)); } else if ((select_index_enabled && is_select_resolver()) || session_info_->get_ddl_info().is_ddl()) { if (OB_FAIL(schema_checker_->check_table_or_index_exists( - tenant_id, database_id, table_name, is_hidden, is_exist))) { + tenant_id, database_id, table_name, is_hidden, false/*is_built_in_index*/, is_exist))) { LOG_WARN("fail to check table or index exist", K(tenant_id), K(database_id), K(table_name), K(ret)); + } else if (select_index_enabled && is_select_resolver() && !is_exist) { + if (OB_FAIL(schema_checker_->check_table_or_index_exists(tenant_id, database_id, table_name, + is_hidden, true/*is_built_in_index*/, is_exist))) { + LOG_WARN("fail to check table or hidden index exist", K(ret), K(tenant_id), K(database_id), K(table_name)); + } } } else { const bool is_index = false; @@ -17242,6 +17281,556 @@ int ObDMLResolver::adjust_values_desc_position(ObInsertTableInfo& table_info, return ret; } +int ObDMLResolver::fill_doc_id_expr_param( + const uint64_t table_id, + const uint64_t index_tid, + const ObTableSchema *table_schema, + ObRawExpr *&doc_id_expr) +{ + int ret = OB_SUCCESS; + ObDMLStmt *stmt = get_stmt(); + if (OB_ISNULL(table_schema) || OB_ISNULL(doc_id_expr)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KP(table_schema), KP(doc_id_expr)); + } else if (OB_UNLIKELY(index_tid != table_schema->get_table_id())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid index table id", K(ret), K(index_tid), K(table_schema->get_table_id())); + } else if (OB_UNLIKELY(T_FUN_SYS_DOC_ID != doc_id_expr->get_expr_type())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("not doc id expr", K(ret), "expr type", doc_id_expr->get_expr_type()); + } else if (OB_ISNULL(session_info_) || OB_ISNULL(params_.expr_factory_) || OB_ISNULL(stmt)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("session info is NULL", KP_(session_info), KP_(params_.expr_factory), KP(stmt)); + } else { + ObSysFunRawExpr *expr = static_cast(doc_id_expr); + ObRawExpr *part_expr = stmt->get_part_expr(table_id, index_tid); + ObRawExpr *subpart_expr = stmt->get_subpart_expr(table_id, index_tid); + schema::ObPartitionLevel part_level = table_schema->get_part_level(); + ObRawExpr *calc_tablet_id_expr = nullptr; + if (OB_FAIL(ObRawExprUtils::build_calc_tablet_id_expr(*params_.expr_factory_, *session_info_, index_tid, + part_level, part_expr, subpart_expr, calc_tablet_id_expr))) { + LOG_WARN("fail to build calculate tablet id expr", K(ret), K(index_tid), KPC(table_schema)); + } else if (OB_ISNULL(calc_tablet_id_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null expr", K(ret), KP(calc_tablet_id_expr)); + } else if (OB_FAIL(expr->add_param_expr(calc_tablet_id_expr))) { + LOG_WARN("fail to add param expr", K(ret), KP(calc_tablet_id_expr)); + } else if (OB_FAIL(expr->formalize(session_info_))) { + LOG_WARN("fail to formalize", K(ret), KP(session_info_)); + } + } + STORAGE_FTS_LOG(DEBUG, "The dml resolver fills doc id expr parameter", K(ret), K(table_id), K(index_tid), + KPC(doc_id_expr), KPC(table_schema)); + return ret; +} + +int ObDMLResolver::try_add_join_table_for_fts(const TableItem *left_table, JoinedTable *&joined_table) +{ + int ret = OB_SUCCESS; + uint64_t tenant_id = OB_INVALID_TENANT_ID; + const ObTableSchema *table_schema = nullptr; + TableItem *right_table = nullptr; + uint64_t rowkey_doc_tid = OB_INVALID_ID; + uint64_t table_id = OB_INVALID_ID; + ObArray left_column_exprs; + ObArray right_column_exprs; + bool has_table_with_fulltext_index = false; + if (OB_ISNULL(left_table) || OB_ISNULL(schema_checker_) || OB_ISNULL(session_info_) || + OB_ISNULL(params_.query_ctx_) || OB_ISNULL(get_stmt())) { + ret = OB_ERR_UNEXPECTED; + STORAGE_FTS_LOG(WARN, "unexpected null", K(ret)); + } else if (!left_table->is_basic_table()) { + if (OB_FAIL(ObTransformUtils::check_table_with_fulltext_recursively(const_cast(left_table), + schema_checker_, + session_info_, + has_table_with_fulltext_index))) { + STORAGE_FTS_LOG(WARN, "fail to check table with fulltext recursively", K(ret)); + } else if (has_table_with_fulltext_index) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "complex dml operations on table with fulltext index"); + STORAGE_FTS_LOG(WARN, "not support complex dml operations on table with fulltext index", K(ret)); + } else { + // no fulltext index, nothing to do + } + } else if (FALSE_IT(tenant_id = session_info_->get_effective_tenant_id())) { + } else if (OB_UNLIKELY(OB_INVALID_ID == (table_id = left_table->ref_id_))) { + ret = OB_INVALID_ARGUMENT; + STORAGE_FTS_LOG(WARN, "invalid argument", K(ret)); + } else if (OB_FAIL(schema_checker_->get_table_schema(tenant_id, table_id, table_schema))) { + STORAGE_FTS_LOG(WARN, "fail to get table schema", K(ret), K(tenant_id), K(table_id)); + } else if (OB_ISNULL(table_schema) || OB_ISNULL(get_stmt()) || OB_ISNULL(params_.expr_factory_) + || OB_ISNULL(allocator_)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_FTS_LOG(WARN, "unexpected error, rowkey doc table schema, stmt or expr factory is nullptr", K(ret), + K(tenant_id), K(rowkey_doc_tid), K(table_id), KP(get_stmt()), KP(params_.expr_factory_), KP(allocator_)); + } else if (OB_FAIL(table_schema->get_rowkey_doc_tid(rowkey_doc_tid)) && OB_ERR_INDEX_KEY_NOT_FOUND != ret) { + STORAGE_FTS_LOG(WARN, "fail to get rowkey doc table id", K(ret), KPC(table_schema)); + } else if (OB_ERR_INDEX_KEY_NOT_FOUND == ret) { + // no fulltext index, nothing to do + ret = OB_SUCCESS; + } else if (OB_FAIL(schema_checker_->get_table_schema(tenant_id, rowkey_doc_tid, table_schema))) { + STORAGE_FTS_LOG(WARN, "fail to get index table schema", K(ret), K(tenant_id), K(rowkey_doc_tid)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_FTS_LOG(WARN, "unexpected error, rowkey doc table schema is nullptr", K(ret), K(tenant_id), + K(rowkey_doc_tid), K(table_id)); + } else if (OB_ISNULL(right_table = get_stmt()->create_table_item(*allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + STORAGE_FTS_LOG(WARN, "fail to allocate right table item", K(ret)); + } else { + right_table->type_ = TableItem::BASE_TABLE; + right_table->ref_id_ = table_schema->get_table_id(); + right_table->table_id_ = table_schema->get_table_id(); + right_table->is_system_table_ = table_schema->is_sys_table(); + right_table->is_view_table_ = table_schema->is_view_table(); + right_table->table_name_ = table_schema->get_table_name_str(); + right_table->alias_name_ = table_schema->get_table_name_str(); + right_table->table_type_ = table_schema->get_table_type(); + if (OB_FAIL(get_stmt()->add_table_item(session_info_, right_table))) { + STORAGE_FTS_LOG(WARN, "fail to add right table item", K(ret), K(right_table)); + } else if (OB_FAIL(resolve_table_partition_expr(*right_table, *table_schema))) { + STORAGE_FTS_LOG(WARN, "fail to resolve table partition expr", K(ret), KPC(right_table), KPC(table_schema)); + } else if (OB_FAIL(create_joined_table_item(ObJoinType::INNER_JOIN, left_table, right_table, joined_table))) { + STORAGE_FTS_LOG(WARN, "fail to create joined table item", K(ret), KPC(left_table), KPC(right_table)); + } else if (OB_FAIL(add_all_rowkey_columns_to_stmt(*left_table, left_column_exprs))) { + STORAGE_FTS_LOG(WARN, "fail to add all rowkey columns to stmt", K(ret)); + } else if (OB_FAIL(add_all_rowkey_columns_to_stmt(*right_table, right_column_exprs))) { + STORAGE_FTS_LOG(WARN, "fail to add all rowkey columns to stmt", K(ret)); + } else if (OB_UNLIKELY(left_column_exprs.count() != right_column_exprs.count())) { + ret = OB_ERR_UNEXPECTED; + STORAGE_FTS_LOG(WARN, "unexpected error, left column isn't equal to right", K(ret), K(left_column_exprs), K(right_column_exprs)); + } else { + for (int64_t i = 0; i < left_column_exprs.count() && OB_SUCC(ret); ++i) { + ObOpRawExpr *b_expr = nullptr; + if (OB_FAIL(params_.expr_factory_->create_raw_expr(T_OP_EQ, b_expr))) { // make equal expr: t_left_N.ck = t_right.ck + STORAGE_FTS_LOG(WARN, "fail to create join condition raw expr", K(ret)); + } else if (OB_ISNULL(b_expr)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_FTS_LOG(WARN, "unexpected error, b_expr is null", K(ret)); + } else if (OB_FAIL(b_expr->set_param_exprs(left_column_exprs.at(i), right_column_exprs.at(i)))) { + STORAGE_FTS_LOG(WARN, "fail to set b_expr param exprs", K(ret)); + } else if (OB_FAIL(b_expr->formalize(session_info_))) { + STORAGE_FTS_LOG(WARN, "fail to resolve formalize expression", K(ret)); + } else if (OB_FAIL(joined_table->join_conditions_.push_back(b_expr))) { + STORAGE_FTS_LOG(WARN, "fail to add expression", K(ret)); + } + } + } + OZ((get_stmt()->add_joined_table)(joined_table)); + OZ(join_infos_.push_back(ResolverJoinInfo(joined_table->table_id_))); + // add hint to forcibly use nested loop join + ObQueryHint &query_hint = params_.query_ctx_->get_query_hint_for_update(); + bool filter_embedded_hint = query_hint.has_outline_data() || query_hint.has_user_def_outline(); + if (OB_SUCC(ret) && !filter_embedded_hint) { + ObSEArray hints; + ObJoinHint *main_join_hint = NULL; + ObJoinHint *rowkey_join_hint = NULL; + ObJoinOrderHint *ordered_hint = NULL; + ObSEArray main_hint_tables; + ObSEArray rowkey_hint_tables; + ObTableInHint main_table; + ObTableInHint rowkey_doc; + main_table.qb_name_ = left_table->qb_name_; + main_table.db_name_ = left_table->database_name_; + main_table.table_name_ = left_table->table_name_; + rowkey_doc.qb_name_ = right_table->qb_name_; + rowkey_doc.db_name_ = right_table->database_name_; + rowkey_doc.table_name_ = right_table->table_name_; + if (OB_FAIL(main_hint_tables.push_back(main_table))) { + STORAGE_FTS_LOG(WARN, "fail to push back main table into hint tables", K(ret)); + } else if (OB_FAIL(rowkey_hint_tables.push_back(rowkey_doc))) { + STORAGE_FTS_LOG(WARN, "fail to push back rowkey doc table into hint tables", K(ret)); + } else if (OB_FAIL(ObQueryHint::create_hint(allocator_, T_USE_NL, main_join_hint))) { + STORAGE_FTS_LOG(WARN, "failed to create hint", K(ret)); + } else if (OB_FAIL(ObQueryHint::create_hint(allocator_, T_USE_NL, rowkey_join_hint))) { + STORAGE_FTS_LOG(WARN, "failed to create hint", K(ret)); + } else if (OB_FAIL(ObQueryHint::create_hint(allocator_, T_ORDERED, ordered_hint))) { + LOG_WARN("failed to create hint", K(ret)); + } else if (OB_FAIL(main_join_hint->get_tables().assign(main_hint_tables))) { + STORAGE_FTS_LOG(WARN, "fail to assign hint tables", K(ret)); + } else if (OB_FAIL(rowkey_join_hint->get_tables().assign(rowkey_hint_tables))) { + STORAGE_FTS_LOG(WARN, "fail to assign hint tables", K(ret)); + } else if (OB_FAIL(hints.push_back(main_join_hint))) { + STORAGE_FTS_LOG(WARN, "fail to push back hint", K(ret)); + } else if (OB_FAIL(hints.push_back(rowkey_join_hint))) { + STORAGE_FTS_LOG(WARN, "fail to push back hint", K(ret)); + } else if (OB_FAIL(hints.push_back(ordered_hint))) { + STORAGE_FTS_LOG(WARN, "fail to push back hint", K(ret)); + } else if (OB_FAIL(query_hint.append_hints(get_stmt()->get_stmt_id(), hints))) { + STORAGE_FTS_LOG(WARN, "fail to append hints", K(ret)); + } + } + } + return ret; +} + +int ObDMLResolver::try_update_column_expr_for_fts( + const TableItem &table_item, + common::ObIArray &column_exprs) +{ + int ret = OB_SUCCESS; + uint64_t tenant_id = OB_INVALID_TENANT_ID; + const ObTableSchema *table_schema = nullptr; + const uint64_t table_id = table_item.ref_id_; + uint64_t rowkey_doc_tid = OB_INVALID_ID; + if (OB_UNLIKELY(TableItem::BASE_TABLE != table_item.type_)) { + // There is a fulltext index in only base table. So, not base table, just skip. + } else if (OB_UNLIKELY(OB_INVALID_ID == table_id)) { + ret = OB_INVALID_ARGUMENT; + STORAGE_FTS_LOG(WARN, "invalid arguments", K(ret), K(table_id)); + } else if (OB_ISNULL(session_info_) || OB_ISNULL(schema_checker_) || OB_ISNULL(get_stmt())) { + ret = OB_ERR_UNEXPECTED; + STORAGE_FTS_LOG(WARN, "unexpected error, session info, schema checker or get_stmt() is nullptr", KP(session_info_), + KP(schema_checker_), KP(get_stmt())); + } else if (FALSE_IT(tenant_id = session_info_->get_effective_tenant_id())) { + } else if (OB_FAIL(schema_checker_->get_table_schema(tenant_id, table_id, table_schema))) { + STORAGE_FTS_LOG(WARN, "fail to get table schema", K(ret), K(tenant_id), K(table_id)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_FTS_LOG(WARN, "unexpected error, table schema is nullptr", K(ret), K(tenant_id), K(table_id)); + } else if (OB_FAIL(table_schema->get_rowkey_doc_tid(rowkey_doc_tid)) && OB_ERR_INDEX_KEY_NOT_FOUND != ret) { + STORAGE_FTS_LOG(WARN, "fail to get rowkey doc table id", K(ret), KPC(table_schema)); + } else if (OB_ERR_INDEX_KEY_NOT_FOUND == ret) { + // no fulltext index, nothing to do + ret = OB_SUCCESS; + } else { + ObString doc_id_col_name; + uint64_t col_id = OB_INVALID_ID; + for (ObTableSchema::const_column_iterator iter = table_schema->column_begin(); + OB_SUCC(ret) && iter != table_schema->column_end(); + ++iter) { + const ObColumnSchemaV2 *column = *iter; + if (OB_ISNULL(column)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_FTS_LOG(WARN, "invalid column schema", K(ret), KP(column)); + } else if (!column->is_doc_id_column()) { + continue; // nothing to do, just skip this column and continue. + } else { + col_id = column->get_column_id(); + doc_id_col_name = column->get_column_name_str(); + break; + } + } + if (OB_FAIL(ret)) { + } else if (OB_UNLIKELY(OB_INVALID_ID == col_id)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_FTS_LOG(WARN, "Don't found doc id column id", K(ret), K(col_id)); + } else { + const TableItem *index_item = get_stmt()->get_table_item_by_id(rowkey_doc_tid); + ColumnItem *column_item = get_stmt()->get_column_item_by_id(rowkey_doc_tid, col_id); + if (OB_ISNULL(index_item)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, index item is nullptr", K(ret), KP(index_item)); + } else if (NULL == column_item) { + if (OB_FAIL(resolve_basic_column_item(*index_item, doc_id_col_name, true, column_item, get_stmt()))) { + STORAGE_FTS_LOG(WARN, "fail to add column doc id item to array", K(ret)); + } else if (OB_ISNULL(column_item) || OB_ISNULL(column_item->expr_)) { + ret = OB_ERR_BAD_FIELD_ERROR; + STORAGE_FTS_LOG(WARN, "failed to add column item", K(ret), KPC(column_item)); + } + STORAGE_FTS_LOG(DEBUG, "fts resovle", K(ret), K(rowkey_doc_tid), K(col_id), KPC(column_item), K(column_exprs)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < column_exprs.count(); ++i) { + if (OB_ISNULL(column_exprs.at(i))) { + ret = OB_ERR_UNEXPECTED; + STORAGE_FTS_LOG(WARN, "unexpected error, column_expr is nullptr", K(ret), K(i)); + } else if (column_exprs.at(i)->get_column_id() == col_id) { + column_exprs.at(i) = column_item->expr_; + STORAGE_FTS_LOG(DEBUG, "fts resovle", K(ret), K(rowkey_doc_tid), K(col_id), KPC(column_item)); + break; + } + } + } + } + return ret; +} + +int ObDMLResolver::resolve_match_against_exprs(ObRawExpr *&expr, + ObIArray &match_exprs, + const ObStmtScope scope) +{ + int ret = OB_SUCCESS; + ObDMLStmt *stmt = get_stmt(); + ObQuestionmarkEqualCtx check_ctx; + ObRawExprReplacer replacer; + if (OB_ISNULL(stmt) || OB_ISNULL(expr) || OB_ISNULL(params_.query_ctx_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret), K(stmt), K(expr)); + } else if (match_exprs.count() > 1) { + // jinmao TODO: 之后存储层支持返回未匹配行,并且 SQL 层支持计算之后可以删掉这里的一系列限制 + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "match expr can only be used in simple filter for now"); + LOG_WARN("match expr can only be used in simple filter for now", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < match_exprs.count(); i++) { + uint64_t table_id = OB_INVALID_ID; + ObMatchFunRawExpr *cur_match_expr = NULL; + ObMatchFunRawExpr *match_expr_on_table = NULL; + bool table_on_null_side = false; + bool is_simple_filter = false; + ObSEArray constraints; + if (OB_ISNULL(cur_match_expr = match_exprs.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (OB_FAIL(cur_match_expr->get_table_id(table_id))) { + LOG_WARN("failed to get table id", K(ret)); + } else if (OB_FAIL(stmt->get_match_expr_on_table(table_id, match_expr_on_table))) { + LOG_WARN("failed to get fulltext search expr on table", K(ret), K(table_id)); + } else if (OB_FAIL(resolve_match_against_expr(*cur_match_expr))) { + LOG_WARN("failed to resolve match index", K(ret)); + } else if (OB_ISNULL(match_expr_on_table)) { + if (scope != T_WHERE_SCOPE) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "fulltext search expr defined beyond where clause"); + LOG_WARN("fulltext search expr not found in condition expr", K(ret)); + } else if (OB_FAIL(ObOptimizerUtil::is_table_on_null_side(stmt, table_id, table_on_null_side))) { + LOG_WARN("failed to check table on null side", K(ret)); + } else if (table_on_null_side) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "fulltext search on null side of joined table"); + LOG_WARN("fulltext search on null side of joined table is not supported", K(ret)); + } else if (OB_FAIL(check_fulltext_search_simple_filter(expr, cur_match_expr, is_simple_filter, constraints))) { + LOG_WARN("failed to check fulltext search simple filter", K(ret)); + } else if (is_simple_filter) { + if (OB_FAIL(stmt->get_match_exprs().push_back(cur_match_expr))) { + LOG_WARN("failed to push back expr", K(ret)); + } else if (OB_FAIL(append(params_.query_ctx_->all_expr_constraints_, constraints))) { + LOG_WARN("failed to append constraints", K(ret)); + } + } else { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "filter that can't imply match_score not equal to 0"); + LOG_WARN("filter that can't imply match_score not equal to 0 is not supported", K(ret), KPC(expr)); + } + } else if (!cur_match_expr->same_as(*match_expr_on_table, &check_ctx)) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "non-shareable match exprs on same base table"); + LOG_WARN("non-shareable match exprs on same base table are not supported", K(ret), KPC(cur_match_expr), KPC(match_expr_on_table)); + } else if (OB_FAIL(replacer.add_replace_expr(cur_match_expr, match_expr_on_table))) { + LOG_WARN("failed to add replace expr", K(ret)); + } else if (OB_FAIL(replacer.replace(expr))) { + LOG_WARN("failed to replace expr", K(ret)); + } else if (OB_FAIL(append(params_.query_ctx_->all_equal_param_constraints_, check_ctx.equal_pairs_))) { + LOG_WARN("failed to append equal param info", K(ret)); + } + } + } + return ret; +} + +int ObDMLResolver::resolve_match_against_expr(ObMatchFunRawExpr &expr) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(expr.get_param_expr(0)) || OB_ISNULL(schema_checker_) || OB_ISNULL(session_info_) + || OB_ISNULL(get_stmt())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column list is invalid", K(expr.get_param_expr(0)), K(get_stmt())); + } else { + const TableItem *table_item = NULL; + const ObTableSchema *table_schema = nullptr; + ObIArray &column_list = expr.get_match_columns(); + uint64_t table_id = OB_INVALID_ID; + ColumnReferenceSet column_set; + const ObColumnSchemaV2 *fulltext_col = NULL; + + // get matched fulltext index + for (int64_t i = 0; OB_SUCC(ret) && i < column_list.count(); ++i) { + ObColumnRefRawExpr *col_ref = nullptr; + if (OB_UNLIKELY(OB_ISNULL(column_list.at(i)) || !column_list.at(i)->is_column_ref_expr())) { + ret = OB_INVALID_ARGUMENT; + LOG_USER_ERROR(OB_INVALID_ARGUMENT, "match against column"); + } else if (FALSE_IT(col_ref = static_cast(column_list.at(i)))) { + } else if (OB_FAIL(column_set.add_member(col_ref->get_column_id()))) { + LOG_WARN("add to column set failed", K(ret)); + } else if (0 == i) { + table_id = col_ref->get_table_id(); + } else if (OB_UNLIKELY(col_ref->get_table_id() != table_id)) { + //check all table id + ret = OB_INVALID_ARGUMENT; + LOG_USER_ERROR(OB_INVALID_ARGUMENT, "match against columns on different tables"); + } + } + if (OB_FAIL(ret)) { + //do nothing + } else if (OB_ISNULL(table_item = get_stmt()->get_table_item_by_id(table_id))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("table item don't exist", K(table_id)); + } else if (!table_item->is_basic_table()) { + ret = OB_INVALID_ARGUMENT; + LOG_USER_ERROR(OB_INVALID_ARGUMENT, "match against column on non-base table"); + } else if (OB_FAIL(schema_checker_->get_table_schema(session_info_->get_effective_tenant_id(), + table_item->ref_id_, + table_schema))) { + LOG_WARN("failed to get main table schema", K(ret)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr to table schema", K(ret)); + } else if (OB_FAIL(resolve_match_index(column_set, *table_schema, expr))) { + LOG_WARN("failed to resolve fulltext index access exprs", K(ret)); + } else if (OB_FAIL(expr.formalize(session_info_))) { + LOG_WARN("failed to formalize expr", K(ret)); + } + } + return ret; +} + +int ObDMLResolver::resolve_match_index( + const ColumnReferenceSet &match_column_set, + const ObTableSchema &table_schema, + ObMatchFunRawExpr &match_against) +{ + int ret = OB_SUCCESS; + int64_t inv_idx_tid = OB_INVALID_ID; + int64_t fwd_idx_tid = OB_INVALID_ID; + int64_t doc_rowkey_tid = OB_INVALID_ID; + uint64_t column_id = OB_INVALID_ID; + uint64_t database_id = OB_INVALID_ID; + ObSEArray index_infos; + const ObTableSchema *inv_idx_schema = nullptr; + const ObTableSchema *fwd_idx_schema = nullptr; + + if (OB_FAIL(table_schema.get_simple_index_infos(index_infos))) { + LOG_WARN("failed to get index infos", K(ret)); + } else { + database_id = table_schema.get_database_id(); + for (int64_t i = 0; i < index_infos.count(); ++i) { + if (share::schema::is_doc_rowkey_aux(index_infos.at(i).index_type_)) { + doc_rowkey_tid = index_infos.at(i).table_id_; + break; + } + } + if (OB_UNLIKELY(OB_INVALID_ID == doc_rowkey_tid)) { + ret = OB_ERR_FT_COLUMN_NOT_INDEXED; + LOG_WARN("No matched fulltext index exists", K(ret)); + } + } + bool found_matched_index = false; + for (int64_t i = 0; OB_SUCC(ret) && i < index_infos.count() && !found_matched_index; ++i) { + inv_idx_schema = nullptr; + const ObAuxTableMetaInfo &index_info = index_infos.at(i); + if (!share::schema::is_fts_index_aux(index_info.index_type_)) { + // skip + } else if (OB_FAIL(schema_checker_->get_table_schema( + session_info_->get_effective_tenant_id(), index_info.table_id_, inv_idx_schema))) { + LOG_WARN("failed to get index schema", K(ret)); + } else if (OB_ISNULL(inv_idx_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected index schema", K(ret), KPC(inv_idx_schema)); + } else if (OB_FAIL(ObTransformUtils::check_fulltext_index_match_column(match_column_set, + &table_schema, + inv_idx_schema, + found_matched_index))) { + LOG_WARN("failed to check fulltext index match column", K(ret)); + } else if (found_matched_index) { + inv_idx_tid = index_info.table_id_; + } + } + + if (OB_FAIL(ret)) { + } else if (OB_UNLIKELY(!found_matched_index)) { + ret = OB_ERR_FT_COLUMN_NOT_INDEXED; + LOG_WARN("No matched fulltext index exists", K(ret)); + } else { + // find matched inverted index and forward index + bool found_fwd_idx = false; + const ObString &inv_idx_name = inv_idx_schema->get_table_name_str(); + for (int64_t i = 0; OB_SUCC(ret) && i < index_infos.count(); ++i) { + const ObAuxTableMetaInfo &index_info = index_infos.at(i); + if (!share::schema::is_fts_doc_word_aux(index_info.index_type_)) { + // skip + } else if (OB_FAIL(schema_checker_->get_table_schema( + session_info_->get_effective_tenant_id(), index_info.table_id_, fwd_idx_schema))) { + LOG_WARN("failed to get table schema", K(ret)); + } else if (OB_ISNULL(fwd_idx_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpecter nullptr to fwd idx schema", K(ret)); + } else if (fwd_idx_schema->get_table_name_str().prefix_match(inv_idx_name)) { + found_fwd_idx = true; + fwd_idx_tid = fwd_idx_schema->get_table_id(); + } + } + + if (OB_SUCC(ret)) { + if (OB_UNLIKELY(!found_fwd_idx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("found matched inverted index table, but corresponding forward index table not found", + K(ret), K(inv_idx_tid), K(index_infos)); + } + } + LOG_DEBUG("fulltext retrieval matched fulltex index id", K(ret), + K(inv_idx_tid), K(fwd_idx_tid), K(doc_rowkey_tid)); + } + return ret; +} + +// check that the fulltext search filter can imply a condition where match_score is not equal to zero. +int ObDMLResolver::check_fulltext_search_simple_filter(ObRawExpr *expr, + ObRawExpr *match_expr, + bool &is_simple_filter, + ObIArray &constraints) +{ + int ret = OB_SUCCESS; + is_simple_filter = false; + if (expr->get_expr_type() == T_FUN_MATCH_AGAINST) { + // bool expr will be added above in where scope + is_simple_filter = true; + } else { + ObRawExprCopier copier(*params_.expr_factory_); + ObSEArray match_exprs; + ObSEArray zero_exprs; + ObConstRawExpr *zero_expr = NULL; + ObObj obj_zero; + obj_zero.set_double(ObDoubleType, 0); + ObRawExpr *false_null_expr = NULL; + ObRawExpr *lnnvl_expr = NULL; + bool got_result = false; + ObObj result; + if (OB_ISNULL(params_.expr_factory_) || OB_ISNULL(params_.session_info_) || OB_ISNULL(allocator_) || + OB_ISNULL(params_.session_info_->get_cur_exec_ctx())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (OB_FAIL(params_.expr_factory_->create_raw_expr(T_DOUBLE, zero_expr))) { + LOG_WARN("create raw expr fail", K(ret)); + } else if (OB_ISNULL(zero_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null expr", K(ret)); + } else if (OB_FALSE_IT(zero_expr->set_value(obj_zero))) { + } else if (OB_FAIL(match_exprs.push_back(match_expr))) { + LOG_WARN("failed to push back expr", K(ret)); + } else if (OB_FAIL(zero_exprs.push_back(zero_expr))) { + LOG_WARN("failed to push back expr", K(ret)); + } else if (OB_FAIL(copier.add_replaced_expr(match_exprs, zero_exprs))) { + LOG_WARN("failed to add replace pair", K(ret)); + } else if (OB_FAIL(copier.copy_on_replace(expr, false_null_expr))) { + LOG_WARN("failed to do expr copy on replace", K(ret)); + } else if (OB_FAIL(ObRawExprUtils::build_lnnvl_expr(*params_.expr_factory_, false_null_expr, lnnvl_expr))) { + LOG_WARN("failed to build lnnvl expr", K(ret)); + } else if (OB_ISNULL(lnnvl_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (OB_FAIL(lnnvl_expr->formalize(params_.session_info_))) { + LOG_WARN("failed to formalize lnnvl expr", K(ret)); + } else if (!lnnvl_expr->is_static_const_expr()) { + is_simple_filter = false; + } else if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(params_.session_info_->get_cur_exec_ctx(), + lnnvl_expr, + result, + got_result, + *allocator_))) { + LOG_WARN("failed to calc cosnt or calculable expr", K(ret)); + } else if (!got_result || result.is_false() || result.is_null()) { + is_simple_filter = false; + } else { + is_simple_filter = true; + ObExprConstraint true_constraint(lnnvl_expr, PreCalcExprExpectResult::PRE_CALC_RESULT_TRUE); + if (OB_FAIL(constraints.push_back(true_constraint))) { + LOG_WARN("failed to push back true constraint", K(ret)); + } + } + } + return ret; +} + int ObDMLResolver::add_udt_dependency(const pl::ObUserDefinedType &udt_type) { int ret = OB_SUCCESS; diff --git a/src/sql/resolver/dml/ob_dml_resolver.h b/src/sql/resolver/dml/ob_dml_resolver.h index 2cee4f017f..95b981dc19 100644 --- a/src/sql/resolver/dml/ob_dml_resolver.h +++ b/src/sql/resolver/dml/ob_dml_resolver.h @@ -450,6 +450,11 @@ protected: const uint64_t autoinc_col_id, const ObString autoinc_table_name, const ObString autoinc_column_name); + int fill_doc_id_expr_param( + const uint64_t table_id, + const uint64_t index_tid, + const ObTableSchema *table_schema, + ObRawExpr *&doc_id_expr); int build_partid_expr(ObRawExpr *&expr, const uint64_t table_id); virtual int resolve_subquery_info(const common::ObIArray &subquery_info); virtual int resolve_aggr_exprs(ObRawExpr *&expr, common::ObIArray &aggr_exprs, @@ -838,6 +843,7 @@ protected: const share::schema::ObTableSchema *table_schema, common::ObIArray &check_exprs, ObIArray *check_flags = NULL); + int resolve_match_against_expr(ObMatchFunRawExpr &expr); private: int resolve_function_table_column_item_udf(const TableItem &table_item, common::ObIArray &col_items); @@ -987,7 +993,20 @@ private: ObIArray &values_desc, ObRawExpr *&expr); int build_row_for_empty_values(ObIArray &values_vector); - + int resolve_match_against_exprs(ObRawExpr *&expr, + ObIArray &match_exprs, + const ObStmtScope scope); + int resolve_match_index(const ColumnReferenceSet &match_column_set, + const ObTableSchema &table_schema, + ObMatchFunRawExpr &match_against); + int check_fulltext_search_simple_filter(ObRawExpr *expr, + ObRawExpr *match_expr, + bool &is_simple_filter, + ObIArray &constraints); + int build_and_check_true_expr(ObRawExpr *const_expr, + ObItemType compare_op, + bool &is_true, + ObIArray &constraints); int add_udt_dependency(const pl::ObUserDefinedType &udt_type); protected: struct GenColumnExprInfo { @@ -1008,6 +1027,13 @@ protected: common::ObString column_name_; //生成列的名称 }; int add_parent_gen_col_exprs(const ObArray &gen_col_exprs); + + int try_add_join_table_for_fts( + const TableItem *left_table, + JoinedTable *&joined_table); + int try_update_column_expr_for_fts( + const TableItem &table_item, + common::ObIArray &column_exprs); protected: ObStmtScope current_scope_; int32_t current_level_; @@ -1018,7 +1044,7 @@ protected: //these generated column exprs are not the reference by query expression, //just some expr template in schema, //only the generated column expr referenced by query can be deposited to stmt - common::ObArray gen_col_exprs_; + common::ObArray gen_col_exprs_ ; common::ObArray from_items_order_; ObIArray *query_ref_exec_params_; diff --git a/src/sql/resolver/dml/ob_dml_stmt.cpp b/src/sql/resolver/dml/ob_dml_stmt.cpp index bdceb1f58e..b2f81b3628 100644 --- a/src/sql/resolver/dml/ob_dml_stmt.cpp +++ b/src/sql/resolver/dml/ob_dml_stmt.cpp @@ -406,7 +406,8 @@ ObDMLStmt::ObDMLStmt(stmt::StmtType type) user_var_exprs_(), check_constraint_items_(), dblink_id_(OB_INVALID_ID), - is_reverse_link_(false) + is_reverse_link_(false), + match_exprs_() { } @@ -502,6 +503,8 @@ int ObDMLStmt::assign(const ObDMLStmt &other) LOG_WARN("assign user var exprs fail", K(ret)); } else if (OB_FAIL(check_constraint_items_.assign(other.check_constraint_items_))) { LOG_WARN("faield to assign check constraint items", K(ret)); + } else if (OB_FAIL(match_exprs_.assign(other.match_exprs_))) { + LOG_WARN("faield to assign fulltext search exprs", K(ret)); } else { limit_count_expr_ = other.limit_count_expr_; limit_offset_expr_ = other.limit_offset_expr_; @@ -654,6 +657,9 @@ int ObDMLStmt::deep_copy_stmt_struct(ObIAllocator &allocator, } else if (OB_FAIL(expr_copier.copy(other.user_var_exprs_, user_var_exprs_))) { LOG_WARN("deep copy user var exprs failed", K(ret)); + } else if (OB_FAIL(expr_copier.copy(other.match_exprs_, + match_exprs_))) { + LOG_WARN("deep copy user var exprs failed", K(ret)); } else if (OB_FAIL(deep_copy_stmt_objects(expr_copier, other.check_constraint_items_, check_constraint_items_))) { @@ -929,6 +935,8 @@ int ObDMLStmt::iterate_stmt_expr(ObStmtExprVisitor &visitor) } else if (NULL != limit_percent_expr_ && OB_FAIL(visitor.visit(limit_percent_expr_, SCOPE_LIMIT))) { LOG_WARN("failed to visit limit percent exprs", K(ret)); + } else if (OB_FAIL(visitor.visit(match_exprs_, SCOPE_DICT_FIELDS))) { + LOG_WARN("failed to visit fts exprs", K(ret)); } else {} } @@ -1802,7 +1810,8 @@ int ObDMLStmt::formalize_relation_exprs(ObSQLSessionInfo *session_info) if (OB_ISNULL(column_expr = column_items_.at(i).expr_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("expr is NULL", K(ret)); - } else if (column_expr->is_virtual_generated_column()) { + } else if (column_expr->is_virtual_generated_column() && + (!column_expr->is_fulltext_column() && !column_expr->is_multivalue_generated_column())) { ObRawExpr *dependant_expr = static_cast( column_expr)->get_dependant_expr(); if (OB_FAIL(dependant_expr->formalize(session_info))) { @@ -1875,7 +1884,7 @@ int ObDMLStmt::formalize_stmt_expr_reference(ObRawExprFactory *expr_factory, if (OB_ISNULL(column_item.expr_) || OB_ISNULL(table_item = get_table_item_by_id(column_item.table_id_))) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(column_item.expr_), K(table_item), K(ret)); + LOG_WARN("get unexpected null", K(column_item.table_id_), K(column_item.expr_), K(table_item), K(ret)); } else if (table_item->is_function_table() || table_item->is_json_table() || table_item->for_update_ || @@ -2019,6 +2028,10 @@ int ObDMLStmt::set_sharable_expr_reference(ObRawExpr &expr, ExplicitedRefType re // SQL DEFENSIVE CODE ret = OB_ERR_UNEXPECTED; LOG_WARN("query ref expr does not exist in the stmt", K(ret), K(expr)); + } else if (expr.is_match_against_expr() && + !ObRawExprUtils::find_expr(get_match_exprs(), &expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fulltext search expr does not exist in the stmt", K(ret), K(expr)); } else if (is_select_stmt() && OB_FAIL(static_cast(this)->check_aggr_and_winfunc(expr))) { // SQL DEFENSIVE CODE @@ -2040,7 +2053,7 @@ int ObDMLStmt::set_sharable_expr_reference(ObRawExpr &expr, ExplicitedRefType re expr.has_flag(CNT_WINDOW_FUNC) || expr.has_flag(CNT_SUB_QUERY) || expr.has_flag(CNT_ROWNUM) || expr.has_flag(CNT_SEQ_EXPR) || expr.has_flag(CNT_PSEUDO_COLUMN) || expr.has_flag(CNT_ONETIME) || - expr.has_flag(CNT_DYNAMIC_PARAM))) { + expr.has_flag(CNT_DYNAMIC_PARAM) || expr.has_flag(CNT_MATCH_EXPR))) { for (int64_t i = 0; OB_SUCC(ret) && i < expr.get_param_count(); i++) { if (OB_ISNULL(expr.get_param_expr(i))) { ret = OB_ERR_UNEXPECTED; @@ -5048,3 +5061,24 @@ int ObJsonTableDef::assign(const ObJsonTableDef& src) return ret; } + +int ObDMLStmt::get_match_expr_on_table(uint64_t table_id, ObMatchFunRawExpr *&match_expr) const +{ + int ret = OB_SUCCESS; + match_expr = NULL; + for (int64_t i = 0; OB_SUCC(ret) && i < get_match_exprs().count(); i++) { + uint64_t cur_tid = OB_INVALID_ID; + if (OB_ISNULL(get_match_exprs().at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (OB_FAIL(get_match_exprs().at(i)->get_table_id(cur_tid))) { + LOG_WARN("failed to get fulltext search exprs", K(ret)); + } else if (OB_NOT_NULL(match_expr) && cur_tid == table_id) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument, find more than one match expr on current table", K(ret), K(table_id)); + } else if (cur_tid == table_id) { + match_expr = get_match_exprs().at(i); + } else { /*do nothing*/ } + } + return ret; +} diff --git a/src/sql/resolver/dml/ob_dml_stmt.h b/src/sql/resolver/dml/ob_dml_stmt.h index 29c72692a6..49e8aef0a0 100644 --- a/src/sql/resolver/dml/ob_dml_stmt.h +++ b/src/sql/resolver/dml/ob_dml_stmt.h @@ -960,6 +960,11 @@ public: { return pseudo_column_like_exprs_; } const common::ObIArray &get_pseudo_column_like_exprs() const { return pseudo_column_like_exprs_; } + const common::ObIArray &get_match_exprs() const + { return match_exprs_; } + common::ObIArray &get_match_exprs() + { return match_exprs_; } + int get_match_expr_on_table(uint64_t table_id, ObMatchFunRawExpr *&match_expr) const; int get_table_pseudo_column_like_exprs(uint64_t table_id, ObIArray &pseudo_columns); int get_table_pseudo_column_like_exprs(ObIArray &table_id, ObIArray &pseudo_columns); int rebuild_tables_hash(); @@ -1270,6 +1275,8 @@ protected: */ int64_t dblink_id_; bool is_reverse_link_; + // fulltext search exprs + common::ObSEArray match_exprs_; }; template diff --git a/src/sql/resolver/dml/ob_group_by_checker.cpp b/src/sql/resolver/dml/ob_group_by_checker.cpp index 5c486819e9..a170884a69 100644 --- a/src/sql/resolver/dml/ob_group_by_checker.cpp +++ b/src/sql/resolver/dml/ob_group_by_checker.cpp @@ -905,6 +905,16 @@ int ObGroupByChecker::visit(ObCaseOpRawExpr &expr) return ret; } +int ObGroupByChecker::visit(ObMatchFunRawExpr &expr) +{ + int ret = OB_SUCCESS; + if (find_in_group_by(expr) || find_in_rollup(expr) || + find_in_cube(expr) || find_in_grouping_sets(expr)) { + set_skip_expr(&expr); + } + return ret; +} + int ObGroupByChecker::visit(ObAggFunRawExpr &expr) { int ret = OB_SUCCESS; diff --git a/src/sql/resolver/dml/ob_group_by_checker.h b/src/sql/resolver/dml/ob_group_by_checker.h index e9ee01bda6..368d9746c3 100644 --- a/src/sql/resolver/dml/ob_group_by_checker.h +++ b/src/sql/resolver/dml/ob_group_by_checker.h @@ -65,6 +65,7 @@ public: virtual int visit(ObWinFunRawExpr &expr); virtual int visit(ObPseudoColumnRawExpr &expr); virtual int visit(ObPlQueryRefRawExpr &expr); + virtual int visit(ObMatchFunRawExpr &expr); // set expr skip virtual bool skip_child(ObRawExpr &expr) diff --git a/src/sql/resolver/dml/ob_sql_hint.cpp b/src/sql/resolver/dml/ob_sql_hint.cpp index 62d64340e5..6391130d6b 100644 --- a/src/sql/resolver/dml/ob_sql_hint.cpp +++ b/src/sql/resolver/dml/ob_sql_hint.cpp @@ -2343,7 +2343,7 @@ int LogTableHint::init_index_hints(ObSqlSchemaGuard &schema_guard) OB_ISNULL(index_schema)) { ret = OB_SCHEMA_ERROR; LOG_WARN("fail to get table schema", K(index_id), K(ret)); - } else if (index_schema->is_domain_index()) { + } else if (index_schema->is_fts_index()) { // just ignore domain index } else if (OB_FAIL(index_schema->get_index_name(index_name))) { LOG_WARN("fail to get index name", K(index_name), K(ret)); diff --git a/src/sql/resolver/dml/ob_update_resolver.cpp b/src/sql/resolver/dml/ob_update_resolver.cpp index 6a9cc9a1f7..7758cd9900 100644 --- a/src/sql/resolver/dml/ob_update_resolver.cpp +++ b/src/sql/resolver/dml/ob_update_resolver.cpp @@ -445,8 +445,12 @@ int ObUpdateResolver::resolve_table_list(const ParseNode &parse_tree) LOG_WARN("failed to resolve table", K(ret)); } else {/*do nothing*/} if (OB_SUCC(ret)) { + JoinedTable *joined_table = nullptr; if (OB_FAIL(column_namespace_checker_.add_reference_table(table_item))) { LOG_WARN("add reference table to namespace checker failed", K(ret)); + } else if (OB_FAIL(try_add_join_table_for_fts(table_item, joined_table))) { + LOG_WARN("fail to try add join table for fts", K(ret), KPC(table_item)); + } else if (nullptr != joined_table && FALSE_IT(table_item = static_cast(joined_table))) { } else if (OB_FAIL(update_stmt->add_from_item(table_item->table_id_, table_item->is_joined_table()))) { LOG_WARN("failed to add from item", K(ret)); } else if (OB_FAIL(check_need_fired_trigger(table_item))) { @@ -562,7 +566,9 @@ int ObUpdateResolver::generate_update_table_info(ObTableAssignment &table_assign } } if (OB_SUCC(ret)) { - if (OB_FAIL(update_stmt->get_update_table_info().push_back(table_info))) { + if (OB_FAIL(try_update_column_expr_for_fts(*table_item, table_info->column_exprs_))) { + LOG_WARN("fail to try update column expr for fts", K(ret), KPC(table_item)); + } else if (OB_FAIL(update_stmt->get_update_table_info().push_back(table_info))) { LOG_WARN("failed to push back table info", K(ret)); } else if (gindex_cnt > 0) { update_stmt->set_has_global_index(true); diff --git a/src/sql/resolver/expr/ob_expr_info_flag.h b/src/sql/resolver/expr/ob_expr_info_flag.h index 47277a36e5..84a5d97f55 100644 --- a/src/sql/resolver/expr/ob_expr_info_flag.h +++ b/src/sql/resolver/expr/ob_expr_info_flag.h @@ -59,6 +59,7 @@ enum ObExprInfoFlag IS_ORA_ROWSCN_EXPR, IS_OP_PSEUDO_COLUMN, IS_ASSIGN_EXPR, + IS_MATCH_EXPR, IS_ASSOCIATED_FLAG_END, //add IS_xxx flag before me //IS_CONST_EXPR and CNT_CONST_EXPR are not in the flag of associated extraction IS_CONST_EXPR, // expression contains calculable expression @@ -103,6 +104,7 @@ enum ObExprInfoFlag CNT_ORA_ROWSCN_EXPR, CNT_OP_PSEUDO_COLUMN, CNT_ASSIGN_EXPR, + CNT_MATCH_EXPR, CNT_OBJ_ACCESS_EXPR, CNT_ASSOCIATED_FLAG_END, //add CNT_xxx flag before me @@ -178,6 +180,7 @@ inline const char* get_expr_info_flag_str(const ObExprInfoFlag flag) case IS_ENUM_OR_SET: { ret = "IS_ENUM_OR_SET"; break; } case IS_ASSIGN_EXPR: { ret = "IS_ASSIGN_EXPR"; break; } case IS_CONST_EXPR: { ret = "IS_CONST_EXPR"; break; } + case IS_MATCH_EXPR: { ret = "IS_MATCH_EXPR"; break; } case CNT_CONST_EXPR: { ret = "CNT_CONST_EXPR"; break; } case CNT_CONST: { ret = "CNT_CONST"; break; } case CNT_COLUMN: { ret = "CNT_COLUMN"; break; } @@ -214,6 +217,7 @@ inline const char* get_expr_info_flag_str(const ObExprInfoFlag flag) case CNT_SEQ_EXPR: { ret = "CNT_SEQ_EXPR"; break; } case CNT_DYNAMIC_PARAM: { ret = "CNT_DYNAMIC_PARAM"; break; } case CNT_ENUM_OR_SET: { ret = "CNT_ENUM_OR_SET"; break; } + case CNT_MATCH_EXPR: { ret = "CNT_MATCH_EXPR"; break; } case CNT_ASSIGN_EXPR: { ret = "CNT_ASSIGN_EXPR"; break; } case BE_USED: { ret = "BE_USED"; break; } case IS_SIMPLE_COND: { ret = "IS_SIMPLE_COND"; break; } diff --git a/src/sql/resolver/expr/ob_raw_expr.cpp b/src/sql/resolver/expr/ob_raw_expr.cpp index 84ba00b2cf..9ce8c92a91 100644 --- a/src/sql/resolver/expr/ob_raw_expr.cpp +++ b/src/sql/resolver/expr/ob_raw_expr.cpp @@ -603,6 +603,72 @@ bool ObRawExpr::is_spatial_expr() const return IS_SPATIAL_OP(expr->get_expr_type()); } +bool ObRawExpr::is_json_domain_expr() const +{ + const ObRawExpr *expr = ObRawExprUtils::skip_inner_added_expr(this); + return IS_JSON_DOMAIN_OP(expr->get_expr_type()); +} + +ObRawExpr* ObRawExpr::get_json_domain_param_expr() +{ + ObRawExpr* param_expr = nullptr; + + if (get_expr_type() == T_FUN_SYS_JSON_MEMBER_OF) { + param_expr = get_param_expr(1); + } else if (get_expr_type() == T_FUN_SYS_JSON_CONTAINS) { + param_expr = get_param_expr(0); + } else if (get_expr_type() == T_FUN_SYS_JSON_OVERLAPS) { + param_expr = get_param_expr(0); + } + + return param_expr; +} + +bool ObRawExpr::is_domain_expr() const +{ + const ObRawExpr *expr = ObRawExprUtils::skip_inner_added_expr(this); + return IS_DOMAIN_OP(expr->get_expr_type()); +} + +bool ObRawExpr::is_domain_json_expr() const +{ + const ObRawExpr *expr = ObRawExprUtils::skip_inner_added_expr(this); + return IS_DOMAIN_JSON_OP(expr->get_expr_type()); +} + +bool ObRawExpr::is_multivalue_define_json_expr() const +{ + bool b_ret = false; + const ObRawExpr *sub_expr = nullptr; + if (type_ == T_FUN_SYS_JSON_QUERY && + get_param_count() >= 13 && + OB_NOT_NULL(sub_expr = get_param_expr(12)) && + sub_expr->is_const_expr()) { + const ObConstRawExpr *const_expr = static_cast(sub_expr); + b_ret = const_expr->get_value().get_int() == 0; + } + + return b_ret; +} + +bool ObRawExpr::extract_multivalue_json_expr(const ObRawExpr*& json_expr) const +{ + bool found = false; + + for (int i = 0; i < get_param_count() && !found; ++i) { + const ObRawExpr *child = get_param_expr(i); + if (OB_ISNULL(child)) { + } else if (child->type_ == T_FUN_SYS_JSON_QUERY) { + json_expr = child; + found = json_expr->is_multivalue_define_json_expr(); + break; + } else if (child->extract_multivalue_json_expr(json_expr)) { + found = (json_expr && json_expr->type_ == T_FUN_SYS_JSON_QUERY); + } + } + return found; +} + bool ObRawExpr::is_geo_expr() const { return IS_GEO_OP(get_expr_type()); @@ -718,6 +784,7 @@ bool ObRawExpr::same_as(const ObRawExpr &expr, return bret; } + //////////////////////////////////////////////////////////////// ObRawExpr *&ObTerminalRawExpr::get_param_expr(int64_t index) { @@ -804,6 +871,7 @@ int ObRawExpr::is_const_inherit_expr(bool &is_const_inherit, || T_FUN_SYS_UUID_SHORT == type_ || T_FUN_SYS_SEQ_NEXTVAL == type_ || T_FUN_SYS_AUTOINC_NEXTVAL == type_ + || T_FUN_SYS_DOC_ID == type_ || T_FUN_SYS_TABLET_AUTOINC_NEXTVAL == type_ || T_FUN_SYS_ROWNUM == type_ || T_FUN_SYS_ROWKEY_TO_ROWID == type_ @@ -2765,6 +2833,10 @@ int ObOpRawExpr::get_name_internal(char *buf, const int64_t buf_len, int64_t &po } } } + } else if (T_FUN_SYS_BM25 == get_expr_type()) { + if (OB_FAIL(BUF_PRINTF("BM25(k1=1.2, b=0.75, epsilon=0.25)"))) { + LOG_WARN("fail to BUF_PRINTF", K(ret)); + } } else { if (OB_FAIL(BUF_PRINTF("(%s", get_type_name(get_expr_type())))) { LOG_WARN("fail to BUF_PRINTF", K(ret)); @@ -3991,13 +4063,61 @@ void ObSysFunRawExpr::reset() dblink_id_ = OB_INVALID_ID; } +bool ObSysFunRawExpr::inner_json_expr_same_as( + const ObRawExpr &expr, + ObExprEqualCheckContext *check_context) const +{ + bool bool_ret = false; + const ObRawExpr *l_expr = this; + const ObRawExpr *r_expr = &expr; + + if (r_expr->is_domain_json_expr()) { + l_expr->extract_multivalue_json_expr(l_expr); + } + + if (l_expr->get_expr_type() == T_FUN_SYS_JSON_QUERY + && r_expr->is_domain_json_expr()) { + const ObRawExpr *r_param_expr = nullptr; + const ObRawExpr *l_param_expr = l_expr->get_param_expr(1); + if (r_expr->get_expr_type() == T_FUN_SYS_JSON_MEMBER_OF) { + r_param_expr = r_expr->get_param_expr(1); + } else { + r_param_expr = r_expr->get_param_expr(0); + } + if (OB_ISNULL(r_param_expr)) { + } else if (r_param_expr->is_wrappered_json_extract()) { + r_param_expr = r_param_expr->get_param_expr(0)->get_param_expr(1); + } else if (r_param_expr->get_expr_type() == T_FUN_SYS_JSON_EXTRACT) { + r_param_expr = r_param_expr->get_param_expr(1); + } + + if (OB_NOT_NULL(r_param_expr)) { + bool_ret = l_param_expr->same_as(*r_param_expr, check_context); + } + } else if (l_expr->get_expr_type() == r_expr->get_expr_type()) { + bool_ret = l_expr->same_as(*r_expr, check_context); + } else if (l_expr->is_wrappered_json_extract() + && r_expr->get_expr_type() == T_FUN_SYS_JSON_EXTRACT) { + l_expr = l_expr->get_param_expr(0); + bool_ret = l_expr->same_as(*r_expr, check_context); + } else if (r_expr->is_wrappered_json_extract() + && l_expr->get_expr_type() == T_FUN_SYS_JSON_EXTRACT) { + r_expr = r_expr->get_param_expr(0); + bool_ret = l_expr->same_as(*r_expr, check_context); + } + + return bool_ret; +} + bool ObSysFunRawExpr::inner_same_as( const ObRawExpr &expr, ObExprEqualCheckContext *check_context) const { bool bool_ret = false; if (get_expr_type() != expr.get_expr_type()) { - if (check_context != NULL && check_context->ora_numeric_compare_ && expr.is_const_raw_expr() + if (IS_QUERY_JSON_EXPR(expr.get_expr_type()) || IS_QUERY_JSON_EXPR(get_expr_type())) { + bool_ret = inner_json_expr_same_as(expr, check_context); + } else if (check_context != NULL && check_context->ora_numeric_compare_ && expr.is_const_raw_expr() && T_FUN_SYS_CAST == get_expr_type() && lib::is_oracle_mode()) { bool_ret = check_context->compare_ora_numeric_consts(*this, static_cast(expr)); } @@ -6703,6 +6823,16 @@ int ObRawExprFactory::create_raw_expr(ObRawExpr::ExprClass expr_class, } break; } + case ObRawExpr::EXPR_MATCH_AGAINST: { + ObMatchFunRawExpr *dest_match_against_expr = NULL; + if (OB_FAIL(create_raw_expr(expr_type, dest_match_against_expr)) + || OB_ISNULL(dest_match_against_expr)) { + LOG_WARN("failed to allocate raw expr", KPC(dest_match_against_expr), K(ret)); + } else { + dest = dest_match_against_expr; + } + break; + } case ObRawExpr::EXPR_INVALID_CLASS: { ret = OB_ERR_UNEXPECTED; LOG_WARN("does not implement expr type copy", K(ret), K(expr_type), K(expr_class)); @@ -6712,6 +6842,244 @@ int ObRawExprFactory::create_raw_expr(ObRawExpr::ExprClass expr_class, return ret; } +int ObMatchFunRawExpr::assign(const ObRawExpr &other) +{ + int ret = OB_SUCCESS; + if (OB_LIKELY(this != &other)) { + if (OB_UNLIKELY(get_expr_class() != other.get_expr_class() || + get_expr_type() != other.get_expr_type())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid input expr", K(ret), K(other.get_expr_type())); + } else if (OB_FAIL(ObRawExpr::assign(other))) { + LOG_WARN("copy in Base class ObRawExpr failed", K(ret)); + } else { + const ObMatchFunRawExpr &tmp = static_cast(other); + if (OB_FAIL(match_columns_.assign(tmp.match_columns_))) { + LOG_WARN("faile to assign match columns", K(ret)); + } else { + mode_flag_ = tmp.mode_flag_; + search_key_ = tmp.search_key_; + } + } + } + return ret; +} + +int ObMatchFunRawExpr::replace_expr(const common::ObIArray &other_exprs, + const common::ObIArray &new_exprs) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ObRawExpr::replace_expr(other_exprs, new_exprs))) { + LOG_WARN("failed to replace expr", K(ret)); + } else if (OB_FAIL(ObTransformUtils::replace_exprs(other_exprs, + new_exprs, + match_columns_))) { + LOG_WARN("failed to replace expr", K(ret)); + } else if (OB_FAIL(ObTransformUtils::replace_expr(other_exprs, + new_exprs, + search_key_))) { + LOG_WARN("failed to replace expr", K(ret)); + } else { /*do nothing*/ } + return ret; +} + +int ObMatchFunRawExpr::do_visit(ObRawExprVisitor &visitor) +{ + return visitor.visit(*this); +} + +uint64_t ObMatchFunRawExpr::hash_internal(uint64_t seed) const +{ + uint64_t hash_value = seed; + for (int64_t i = 0; i < get_param_count(); ++i) { + if (NULL != get_param_expr(i)) { + hash_value = do_hash(*get_param_expr(i), hash_value); + } + } + hash_value = common::do_hash(mode_flag_, hash_value); + return hash_value; +} + +int ObMatchFunRawExpr::get_name_internal(char *buf, const int64_t buf_len, int64_t &pos, ExplainType type) const +{ + int ret = OB_SUCCESS; + if (lib::is_mysql_mode()) { + if (OB_FAIL(BUF_PRINTF("MATCH("))) { + LOG_WARN("fail to BUF_PRINTF", K(ret)); + } else { + int64_t i = 0; + for (; OB_SUCC(ret) && i < get_match_columns().count() - 1; ++i) { + if (OB_ISNULL(get_match_columns().at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (OB_FAIL(get_match_columns().at(i)->get_name(buf, buf_len, pos, type))) { + LOG_WARN("fail to get_name", K(i), K(ret)); + } else if (OB_FAIL(BUF_PRINTF(", "))) { + LOG_WARN("fail to BUF_PRINTF", K(ret)); + } else {} + } + if (OB_SUCC(ret)) { + if (OB_ISNULL(get_match_columns().at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (OB_FAIL(get_match_columns().at(i)->get_name(buf, buf_len, pos, type))) { + LOG_WARN("fail to BUF_PRINTF", K(ret)); + } else if (OB_FAIL(BUF_PRINTF(") AGAINST("))) { + LOG_WARN("fail to BUF_PRINTF", K(ret)); + } else if (OB_FAIL(get_search_key()->get_name(buf, buf_len, pos, type))) { + LOG_WARN("fail to BUF_PRINTF", K(ret)); + } else if (NATURAL_LANGUAGE_MODE == get_mode_flag() && + OB_FAIL(BUF_PRINTF(""))) { + LOG_WARN("fail to BUF_PRINTF", K(ret)); + } else if (BOOLEAN_MODE == get_mode_flag() && + OB_FAIL(BUF_PRINTF(" IN BOOLEAN MODE"))) { + LOG_WARN("fail to BUF_PRINTF", K(ret)); + } else if (NATURAL_LANGUAGE_MODE_WITH_QUERY_EXPANSION == get_mode_flag() && + OB_FAIL(BUF_PRINTF(" IN NATURAL LANGUAGE MODE WITH QUERY EXPANSION"))) { + LOG_WARN("fail to BUF_PRINTF", K(ret)); + } else if (WITH_QUERY_EXPANSION == get_mode_flag() && + OB_FAIL(BUF_PRINTF(" WITH QUERY EXPANSION"))) { + LOG_WARN("fail to BUF_PRINTF", K(ret)); + } else if (OB_FAIL(BUF_PRINTF(")"))) { + LOG_WARN("fail to BUF_PRINTF", K(ret)); + } else if (EXPLAIN_EXTENDED == type) { + if (OB_FAIL(BUF_PRINTF("("))) { + LOG_WARN("fail to BUF_PRINTF", K(ret)); + } else if (OB_FAIL(BUF_PRINTF("%p", this))) { + LOG_WARN("fail to BUF_PRINTF", K(ret)); + } else if (OB_FAIL(BUF_PRINTF(")"))) { + LOG_WARN("fail to BUF_PRINTF", K(ret)); + } else {} + } + } + } + } else { + // jinmao TODO: serialize oracle contains() + } + return ret; +} + +bool ObMatchFunRawExpr::inner_same_as(const ObRawExpr &expr, ObExprEqualCheckContext *check_context) const +{ + bool bret = true; + if (get_expr_type() != expr.get_expr_type()) { + bret = false; + } else { + const ObMatchFunRawExpr *match_expr = static_cast(&expr); + if (mode_flag_ != match_expr->mode_flag_ || + match_columns_.count() != match_expr->match_columns_.count()) { + bret = false; + } else if (OB_ISNULL(search_key_) || OB_ISNULL(match_expr->search_key_) || + !search_key_->same_as(*match_expr->search_key_, check_context)) { + bret = false; + } + for (int64_t i = 0; bret && i < match_columns_.count(); i++) { + if (OB_ISNULL(match_columns_.at(i)) || OB_ISNULL(match_expr->match_columns_.at(i)) || + !match_columns_.at(i)->same_as(*match_expr->match_columns_.at(i), check_context)) { + bret = false; + } + } + } + return bret; +} + +void ObMatchFunRawExpr::clear_child() +{ + match_columns_.reset(); + search_key_ = NULL; + mode_flag_ = NATURAL_LANGUAGE_MODE; +} + +void ObMatchFunRawExpr::reset() +{ + ObRawExpr::reset(); + clear_child(); +} + +int64_t ObMatchFunRawExpr::get_param_count() const +{ + return match_columns_.count() + 1 /*search key*/; +} + +const ObRawExpr *ObMatchFunRawExpr::get_param_expr(int64_t index) const +{ + const ObRawExpr *ptr_ret = NULL; + if (0 <= index && index < match_columns_.count()) { + ptr_ret = match_columns_.at(index); + } else if (index == match_columns_.count()) { + ptr_ret = search_key_; + } else { /*do nothing*/ } + return ptr_ret; +} + +ObRawExpr *&ObMatchFunRawExpr::get_param_expr(int64_t index) +{ + if (0 <= index && index < match_columns_.count()) { + return match_columns_.at(index); + } else if (index == match_columns_.count()) { + return search_key_; + } else { + return USELESS_POINTER; + } + return USELESS_POINTER; +} + +int ObMatchFunRawExpr::get_table_id(uint64_t &table_id) +{ + int ret = OB_SUCCESS; + table_id = OB_INVALID_ID; + if (get_match_columns().count() < 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null pointer", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < get_match_columns().count(); i++) { + ObColumnRefRawExpr *match_col = NULL; + if (OB_ISNULL(get_match_columns().at(i)) || !get_match_columns().at(i)->is_column_ref_expr()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret)); + } else if (OB_FALSE_IT(match_col = static_cast(get_match_columns().at(i)))) { + } else if (table_id != OB_INVALID_ID && table_id != match_col->get_table_id()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret)); + } else { + table_id = match_col->get_table_id(); + } + } + } + return ret; +} + +int ObMatchFunRawExpr::get_match_column_type(ObExprResType &result_type) +{ + int ret = OB_SUCCESS; + if (get_match_columns().count() < 1) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("unexpected empty match column set", K(ret)); + } else if (OB_ISNULL(get_match_columns().at(0))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else { + result_type.assign(get_match_columns().at(0)->get_result_type()); + } + return ret; +} + +int ObMatchFunRawExpr::replace_param_expr(int64_t index, ObRawExpr *expr) +{ + int ret = common::OB_SUCCESS; + if (OB_UNLIKELY(index < 0 || index >= get_param_count())) { + ret = common::OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(index), K(get_param_count())); + } else if (OB_UNLIKELY(NULL == expr)) { + ret = common::OB_INVALID_ARGUMENT; + LOG_WARN("unexpected null expr", K(ret)); + } else { + ObRawExpr *&target_expr = get_param_expr(index); + target_expr = expr; + } + return ret; +} + int ObUDTConstructorRawExpr::get_schema_object_version(share::schema::ObSchemaObjVersion &obj_version) { int ret = OB_SUCCESS; diff --git a/src/sql/resolver/expr/ob_raw_expr.h b/src/sql/resolver/expr/ob_raw_expr.h index f94411bfc4..8d4a97fecb 100644 --- a/src/sql/resolver/expr/ob_raw_expr.h +++ b/src/sql/resolver/expr/ob_raw_expr.h @@ -103,6 +103,21 @@ extern ObRawExpr *USELESS_POINTER; || ((op) == T_FUN_SYS_ST_CROSSES) \ || ((op) == T_FUN_SYS_ST_OVERLAPS)) \ +#define IS_DOMAIN_OP(op) \ + (((op) == T_FUN_SYS_JSON_MEMBER_OF) \ + || ((op) == T_FUN_SYS_JSON_CONTAINS) \ + || ((op) == T_FUN_SYS_JSON_OVERLAPS) \ + || ((op) == T_FUN_SYS_ST_INTERSECTS) \ + || ((op) == T_FUN_SYS_ST_COVERS) \ + || ((op) == T_FUN_SYS_ST_DWITHIN) \ + || ((op) == T_FUN_SYS_ST_WITHIN) \ + || ((op) == T_FUN_SYS_ST_CONTAINS)) \ + +#define IS_DOMAIN_JSON_OP(op) \ + (((op) == T_FUN_SYS_JSON_MEMBER_OF) \ + || ((op) == T_FUN_SYS_JSON_CONTAINS) \ + || ((op) == T_FUN_SYS_JSON_OVERLAPS)) \ + #define IS_MYSQL_GEO_OP(op) \ (((op) == T_FUN_SYS_ST_GEOMFROMTEXT) \ || ((op) == T_FUN_SYS_ST_INTERSECTION) \ @@ -180,6 +195,23 @@ extern ObRawExpr *USELESS_POINTER; #define IS_SPATIAL_EXPR(op) \ ((op) >= T_FUN_SYS_ST_LONGITUDE && (op) <= T_FUN_SYS_ST_LATITUDE) +#define IS_QUERY_JSON_EXPR(op) \ + (((op) == T_FUN_SYS_JSON_EXTRACT) \ + || ((op) == T_FUN_SYS_JSON_CONTAINS) \ + || ((op) == T_FUN_SYS_JSON_CONTAINS_PATH) \ + || ((op) == T_FUN_SYS_JSON_QUOTE) \ + || ((op) == T_FUN_SYS_JSON_UNQUOTE) \ + || ((op) == T_FUN_SYS_JSON_OVERLAPS) \ + || ((op) == T_FUN_SYS_JSON_MEMBER_OF) \ + || ((op) == T_FUN_SYS_JSON_VALUE)) + +// JSON_CONTAINS & JSON_OVERLAPS not support yet +#define IS_JSON_DOMAIN_OP(op) \ + (((op) == T_FUN_SYS_JSON_MEMBER_OF) /*\ + || ((op) == T_FUN_SYS_JSON_OVERLAPS) \ + || ((op) == T_FUN_SYS_JSON_CONTAINS)*/) \ + + // ObSqlBitSet is a simple bitset, in order to avoid memory exposure // ObBitSet is too large just for a simple bitset const static int64_t DEFAULT_SQL_BITSET_SIZE = 32; @@ -1571,6 +1603,7 @@ class ObOpRawExpr; class ObWinFunRawExpr; class ObUserVarIdentRawExpr; struct ObUDFInfo; +class ObMatchFunRawExpr; template struct ObResolveContext { @@ -1614,7 +1647,8 @@ struct ObResolveContext view_ref_id_(OB_INVALID_ID), is_variable_allowed_(true), is_expanding_view_(false), - is_in_system_view_(false) + is_in_system_view_(false), + match_exprs_(NULL) { } @@ -1662,6 +1696,7 @@ struct ObResolveContext bool is_variable_allowed_; bool is_expanding_view_; bool is_in_system_view_; + common::ObIArray *match_exprs_; }; typedef ObResolveContext ObExprResolveContext; @@ -1917,7 +1952,11 @@ public: bool is_deterministic() const { return is_deterministic_; } bool is_bool_expr() const; bool is_spatial_expr() const; + bool is_json_domain_expr() const; + ObRawExpr* get_json_domain_param_expr(); bool is_geo_expr() const; + bool is_domain_expr() const; + bool is_domain_json_expr() const; bool is_mysql_geo_expr() const; bool is_priv_geo_expr() const; bool is_xml_expr() const; @@ -1932,6 +1971,12 @@ public: void set_may_add_interval_part(MayAddIntervalPart flag) { may_add_interval_part_ = flag; } + bool is_wrappered_json_extract() const { + return (type_ == T_FUN_SYS_JSON_UNQUOTE && OB_NOT_NULL(get_param_expr(0)) && get_param_expr(0)->type_ == T_FUN_SYS_JSON_EXTRACT); + } + bool extract_multivalue_json_expr(const ObRawExpr*& json_expr) const; + bool is_multivalue_define_json_expr() const; + ObRawExpr* get_wrapper_json_extract() { return get_param_expr(0); } MayAddIntervalPart get_may_add_interval_part() const { return may_add_interval_part_;} RuntimeFilterType get_runtime_filter_type() const { return runtime_filter_type_; } @@ -2367,6 +2412,7 @@ public: const common::ObIArray &new_exprs) override; virtual bool inner_same_as(const ObRawExpr &expr, ObExprEqualCheckContext *check_context) const override; + virtual int do_visit(ObRawExprVisitor &visitor) override; virtual uint64_t hash_internal(uint64_t seed) const; @@ -2662,7 +2708,12 @@ public: inline bool is_default_identity_column() const { return share::schema::ObSchemaUtils::is_default_identity_column(column_flags_); } inline bool is_default_on_null_identity_column() const { return share::schema::ObSchemaUtils::is_default_on_null_identity_column(column_flags_); } inline bool is_fulltext_column() const { return share::schema::ObSchemaUtils::is_fulltext_column(column_flags_); } + inline bool is_doc_id_column() const { return share::schema::ObSchemaUtils::is_doc_id_column(column_flags_); } + inline bool is_word_segment_column() const { return column_name_.prefix_match(OB_WORD_SEGMENT_COLUMN_NAME_PREFIX); } + inline bool is_word_count_column() const { return column_name_.prefix_match(OB_WORD_COUNT_COLUMN_NAME_PREFIX); } inline bool is_spatial_generated_column() const { return share::schema::ObSchemaUtils::is_spatial_generated_column(column_flags_); } + inline bool is_multivalue_generated_column() const { return share::schema::ObSchemaUtils::is_multivalue_generated_column(column_flags_); } + inline bool is_multivalue_generated_array_column() const { return share::schema::ObSchemaUtils::is_multivalue_generated_array_column(column_flags_); } inline bool is_cte_generated_column() const { return share::schema::ObSchemaUtils::is_cte_generated_column(column_flags_); } inline bool has_generated_column_deps() const { return column_flags_ & GENERATED_DEPS_CASCADE_FLAG; } inline bool is_table_part_key_column() const { return column_flags_ & TABLE_PART_KEY_COLUMN_FLAG; } @@ -2869,6 +2920,7 @@ public: int get_name_internal(char *buf, const int64_t buf_len, int64_t &pos, ExplainType type) const; virtual bool inner_same_as(const ObRawExpr &expr, ObExprEqualCheckContext *check_context = NULL) const override; + VIRTUAL_TO_STRING_KV_CHECK_STACK_OVERFLOW(N_ITEM_TYPE, type_, N_RESULT_TYPE, result_type_, N_EXPR_INFO, info_, @@ -2973,6 +3025,7 @@ public: virtual bool inner_same_as(const ObRawExpr &expr, ObExprEqualCheckContext *check_context = NULL) const override; + //used for jit expr virtual int64_t get_children_count() const { @@ -3541,7 +3594,8 @@ public: virtual void reset(); virtual bool inner_same_as(const ObRawExpr &expr, ObExprEqualCheckContext *check_context = NULL) const override; - + virtual bool inner_json_expr_same_as(const ObRawExpr &expr, + ObExprEqualCheckContext *check_context = NULL) const; virtual int do_visit(ObRawExprVisitor &visitor) override; virtual uint64_t hash_internal(uint64_t seed) const @@ -3620,6 +3674,7 @@ public: uint64_t get_sequence_id() const { return sequence_id_; } virtual bool inner_same_as(const ObRawExpr &expr, ObExprEqualCheckContext *check_context = NULL) const override; + virtual int get_name_internal(char *buf, const int64_t buf_len, int64_t &pos, ExplainType type) const override; private: common::ObString database_name_; // sequence database name @@ -3642,6 +3697,7 @@ public: const share::schema::ObUDFMeta &get_udf_meta() const { return udf_meta_; } virtual bool inner_same_as(const ObRawExpr &expr, ObExprEqualCheckContext *check_context = NULL) const override; + private: //for udf function info share::schema::ObUDFMeta udf_meta_; @@ -3990,6 +4046,7 @@ public: int inner_deep_copy(ObIRawExprCopier &copier) override; virtual bool inner_same_as(const ObRawExpr &expr, ObExprEqualCheckContext *check_context = NULL) const override; + virtual ObExprOperator *get_op() override; int check_param() { return common::OB_SUCCESS; } @@ -4695,6 +4752,7 @@ public: const common::ObIArray &new_exprs) override; virtual bool inner_same_as(const ObRawExpr &expr, ObExprEqualCheckContext *check_context = NULL) const override; + virtual int do_visit(ObRawExprVisitor &visitor) override; virtual int get_name_internal(char *buf, const int64_t buf_len, @@ -4708,6 +4766,73 @@ private: const char *name_; }; +class ObMatchFunRawExpr : public ObRawExpr +{ +public: + ObMatchFunRawExpr() + : ObRawExpr(), + mode_flag_(NATURAL_LANGUAGE_MODE), + match_columns_(), + search_key_(NULL) + { + set_expr_class(ObIRawExpr::EXPR_MATCH_AGAINST); + } + + ObMatchFunRawExpr(common::ObIAllocator &alloc) + : ObRawExpr(alloc), + mode_flag_(NATURAL_LANGUAGE_MODE), + match_columns_(), + search_key_(NULL) + { + set_expr_class(ObIRawExpr::EXPR_MATCH_AGAINST); + } + + virtual ~ObMatchFunRawExpr() {} + int assign(const ObRawExpr &other) override; + virtual int replace_expr(const common::ObIArray &other_exprs, + const common::ObIArray &new_exprs) override; + virtual int do_visit(ObRawExprVisitor &visitor) override; + virtual uint64_t hash_internal(uint64_t seed) const; + int get_name_internal(char *buf, const int64_t buf_len, int64_t &pos, ExplainType type) const override; + virtual bool inner_same_as(const ObRawExpr &expr, ObExprEqualCheckContext *check_context = NULL) const override; + virtual void clear_child() override; + virtual void reset(); + virtual int64_t get_param_count() const; + virtual const ObRawExpr *get_param_expr(int64_t index) const; + virtual ObRawExpr *&get_param_expr(int64_t index); + inline void set_mode_flag(ObMatchAgainstMode mode_flag) { mode_flag_ = mode_flag; } + inline ObMatchAgainstMode get_mode_flag() const { return mode_flag_; } + inline int set_match_columns(ObIArray &match_columns) + { + return match_columns_.assign(match_columns); + } + inline const ObIArray& get_match_columns() const { return match_columns_; } + inline ObIArray& get_match_columns() { return match_columns_; } + inline void set_search_key(ObRawExpr *search_key) { search_key_ = search_key; } + inline const ObRawExpr *get_search_key() const { return search_key_; } + inline ObRawExpr *get_search_key() { return search_key_; } + int get_table_id(uint64_t &table_id); + int get_match_column_type(ObExprResType &result_type); + inline int64_t get_search_key_idx() { return get_match_columns().count(); } + + int replace_param_expr(int64_t index, ObRawExpr *expr); + + VIRTUAL_TO_STRING_KV( + N_ITEM_TYPE, type_, + N_RESULT_TYPE, result_type_, + N_EXPR_INFO, info_, + N_REL_ID, rel_ids_, + K_(mode_flag), + K_(match_columns), + KPC_(search_key)); + +private: + DISALLOW_COPY_AND_ASSIGN(ObMatchFunRawExpr); + ObMatchAgainstMode mode_flag_; // for MySQL search mode flag + ObSEArray match_columns_; // columns for choosing full-text index to use + ObRawExpr *search_key_; // user defined search query +}; + /// visitor interface class ObRawExprVisitor { @@ -4744,6 +4869,7 @@ public: virtual int visit(ObAliasRefRawExpr &expr) { UNUSED(expr); return common::OB_SUCCESS; } virtual int visit(ObWinFunRawExpr &expr) { UNUSED(expr); return common::OB_SUCCESS; } virtual int visit(ObPseudoColumnRawExpr &expr) { UNUSED(expr); return common::OB_SUCCESS; } + virtual int visit(ObMatchFunRawExpr &expr) { UNUSED(expr); return common::OB_SUCCESS; } virtual bool skip_child(ObRawExpr &expr) { UNUSED(expr); return false; } private: // disallow copy diff --git a/src/sql/resolver/expr/ob_raw_expr_deduce_type.cpp b/src/sql/resolver/expr/ob_raw_expr_deduce_type.cpp index d674f3039f..b3e3da4ebd 100644 --- a/src/sql/resolver/expr/ob_raw_expr_deduce_type.cpp +++ b/src/sql/resolver/expr/ob_raw_expr_deduce_type.cpp @@ -2979,6 +2979,38 @@ int ObRawExprDeduceType::visit(ObUDFRawExpr &expr) return ret; } +int ObRawExprDeduceType::visit(ObMatchFunRawExpr &expr) +{ + int ret = OB_SUCCESS; + ObExprResType result_type(alloc_); + result_type.set_double(); + expr.set_result_type(result_type); + ObExprResType col_result_type; + // cast search key if need + if (OB_ISNULL(expr.get_search_key())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (OB_FAIL(expr.get_match_column_type(col_result_type))) { + LOG_WARN("failed to get match column type", K(ret)); + } else if (expr.get_search_key()->get_result_type().get_type() != ObVarcharType || + col_result_type.get_collation_type() != expr.get_search_key()->get_result_type().get_collation_type()) { + ObExprResType search_key_type = expr.get_search_key()->get_result_type(); + ObCastMode def_cast_mode = CM_NONE; + search_key_type.set_varchar(); + search_key_type.set_length(OB_MAX_MYSQL_VARCHAR_LENGTH); + search_key_type.set_collation_type(col_result_type.get_collation_type()); + search_key_type.set_collation_level(search_key_type.get_collation_level()); + search_key_type.set_calc_meta(search_key_type.get_obj_meta()); + if (OB_FAIL(ObSQLUtils::get_default_cast_mode(false, 0, my_session_, + def_cast_mode))) { + LOG_WARN("get_default_cast_mode failed", K(ret)); + } else if (OB_FAIL(try_add_cast_expr(expr, expr.get_search_key_idx(), search_key_type, def_cast_mode))) { + LOG_WARN("add_implicit_cast failed", K(ret)); + } + } + return ret; +} + int ObRawExprDeduceType::init_normal_udf_expr(ObNonTerminalRawExpr &expr, ObExprOperator *op) { int ret = OB_SUCCESS; @@ -3393,12 +3425,15 @@ bool ObRawExprDeduceType::skip_cast_expr(const ObRawExpr &parent, } -static inline bool skip_cast_json_expr(const ObRawExpr *child_ptr, +static inline bool skip_cast_json_expr(const ObRawExpr *expr, const ObExprResType &input_type, ObItemType parent_expr_type) { - return (child_ptr->get_expr_type() == T_FUN_SYS_CAST && need_calc_json(parent_expr_type) && - (input_type.get_calc_type() == child_ptr->get_result_meta().get_type() || - input_type.get_calc_collation_type() == child_ptr->get_result_meta().get_collation_type())); + bool b_ret = (expr->get_expr_type() == T_FUN_SYS_CAST && + need_calc_json(parent_expr_type) && + (input_type.get_calc_type() == expr->get_result_meta().get_type() || + input_type.get_calc_collation_type() == expr->get_result_meta().get_collation_type())); + + return b_ret; } // 该函数会给case表达式按需增加隐式cast diff --git a/src/sql/resolver/expr/ob_raw_expr_deduce_type.h b/src/sql/resolver/expr/ob_raw_expr_deduce_type.h index 21e433d126..b50b60fc32 100644 --- a/src/sql/resolver/expr/ob_raw_expr_deduce_type.h +++ b/src/sql/resolver/expr/ob_raw_expr_deduce_type.h @@ -61,6 +61,7 @@ public: virtual int visit(ObPseudoColumnRawExpr &expr); virtual int visit(ObUDFRawExpr &expr); virtual int visit(ObPlQueryRefRawExpr &expr); + virtual int visit(ObMatchFunRawExpr &expr); int add_implicit_cast(ObOpRawExpr &parent, const ObCastMode &cast_mode); int add_implicit_cast(ObCaseOpRawExpr &parent, const ObCastMode &cast_mode); diff --git a/src/sql/resolver/expr/ob_raw_expr_info_extractor.cpp b/src/sql/resolver/expr/ob_raw_expr_info_extractor.cpp index a9fe9c5f6d..369ececd36 100644 --- a/src/sql/resolver/expr/ob_raw_expr_info_extractor.cpp +++ b/src/sql/resolver/expr/ob_raw_expr_info_extractor.cpp @@ -471,6 +471,7 @@ int ObRawExprInfoExtractor::visit(ObSysFunRawExpr &expr) } else { // these functions should not be calculated first if (T_FUN_SYS_AUTOINC_NEXTVAL == expr.get_expr_type() + || T_FUN_SYS_DOC_ID == expr.get_expr_type() || T_FUN_SYS_TABLET_AUTOINC_NEXTVAL == expr.get_expr_type() || T_FUN_SYS_SLEEP == expr.get_expr_type() || (T_FUN_SYS_LAST_INSERT_ID == expr.get_expr_type() && expr.get_param_count() > 0) @@ -651,6 +652,19 @@ int ObRawExprInfoExtractor::visit(ObPseudoColumnRawExpr &expr) return ret; } +int ObRawExprInfoExtractor::visit(ObMatchFunRawExpr &expr) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(clear_info(expr))) { + LOG_WARN("failed to clear info", K(ret)); + } else if (OB_FAIL(pull_info(expr))) { + LOG_WARN("pull match against info failed", K(ret)); + } else if (OB_FAIL(expr.add_flag(IS_MATCH_EXPR))) { + LOG_WARN("add flag to match against failed", K(ret)); + } + return ret; +} + } // namespace sql } // namespace oceanbase diff --git a/src/sql/resolver/expr/ob_raw_expr_info_extractor.h b/src/sql/resolver/expr/ob_raw_expr_info_extractor.h index 9f55421a19..32260bb5cb 100644 --- a/src/sql/resolver/expr/ob_raw_expr_info_extractor.h +++ b/src/sql/resolver/expr/ob_raw_expr_info_extractor.h @@ -42,6 +42,7 @@ ObRawExprInfoExtractor() virtual int visit(ObWinFunRawExpr &expr); virtual int visit(ObPseudoColumnRawExpr &expr); virtual int visit(ObPlQueryRefRawExpr &expr); + virtual int visit(ObMatchFunRawExpr &expr); private: // types and constants private: diff --git a/src/sql/resolver/expr/ob_raw_expr_modify_column_name.cpp b/src/sql/resolver/expr/ob_raw_expr_modify_column_name.cpp index 3145bce3dd..d9d6d0051b 100644 --- a/src/sql/resolver/expr/ob_raw_expr_modify_column_name.cpp +++ b/src/sql/resolver/expr/ob_raw_expr_modify_column_name.cpp @@ -81,6 +81,12 @@ int ObRawExprModifyColumnName::visit(ObAggFunRawExpr &expr) { return common::OB_SUCCESS; } +int ObRawExprModifyColumnName::visit(ObMatchFunRawExpr &expr) +{ + UNUSED (expr); + return common::OB_SUCCESS; +} + int ObRawExprModifyColumnName::visit(ObSysFunRawExpr &expr) { UNUSED (expr); return common::OB_SUCCESS; diff --git a/src/sql/resolver/expr/ob_raw_expr_modify_column_name.h b/src/sql/resolver/expr/ob_raw_expr_modify_column_name.h index f0e4fa83f9..7dac72885c 100644 --- a/src/sql/resolver/expr/ob_raw_expr_modify_column_name.h +++ b/src/sql/resolver/expr/ob_raw_expr_modify_column_name.h @@ -53,6 +53,7 @@ public: virtual int visit(ObWinFunRawExpr &expr); virtual int visit(ObPseudoColumnRawExpr &expr); virtual int visit(ObPlQueryRefRawExpr &expr); + virtual int visit(ObMatchFunRawExpr &expr); private: DISALLOW_COPY_AND_ASSIGN(ObRawExprModifyColumnName); diff --git a/src/sql/resolver/expr/ob_raw_expr_part_expr_checker.cpp b/src/sql/resolver/expr/ob_raw_expr_part_expr_checker.cpp index dd9afaa182..e14ed4cd3f 100644 --- a/src/sql/resolver/expr/ob_raw_expr_part_expr_checker.cpp +++ b/src/sql/resolver/expr/ob_raw_expr_part_expr_checker.cpp @@ -229,6 +229,13 @@ int ObRawExprPartExprChecker::visit(ObAggFunRawExpr &expr) return ret; } +int ObRawExprPartExprChecker::visit(ObMatchFunRawExpr &expr) +{ + int ret = OB_ERR_UNEXPECTED; + UNUSED(expr); + return ret; +} + int ObRawExprPartExprChecker::visit(ObSysFunRawExpr &expr) { int ret = OB_SUCCESS; diff --git a/src/sql/resolver/expr/ob_raw_expr_part_expr_checker.h b/src/sql/resolver/expr/ob_raw_expr_part_expr_checker.h index 999bca7566..85ea7f32d6 100644 --- a/src/sql/resolver/expr/ob_raw_expr_part_expr_checker.h +++ b/src/sql/resolver/expr/ob_raw_expr_part_expr_checker.h @@ -42,6 +42,7 @@ public: virtual int visit(ObSetOpRawExpr &expr); virtual int visit(ObAliasRefRawExpr &expr); virtual int visit(ObPlQueryRefRawExpr &expr); + virtual int visit(ObMatchFunRawExpr &expr); private: // types and constants const ObPartitionFuncType func_type_; diff --git a/src/sql/resolver/expr/ob_raw_expr_part_func_checker.cpp b/src/sql/resolver/expr/ob_raw_expr_part_func_checker.cpp index 21d45e2b74..7c790f3f53 100644 --- a/src/sql/resolver/expr/ob_raw_expr_part_func_checker.cpp +++ b/src/sql/resolver/expr/ob_raw_expr_part_func_checker.cpp @@ -143,6 +143,14 @@ int ObRawExprPartFuncChecker::visit(ObAggFunRawExpr &expr) return ret; } +int ObRawExprPartFuncChecker::visit(ObMatchFunRawExpr &expr) +{ + int ret = OB_ERR_PARTITION_FUNCTION_IS_NOT_ALLOWED; + LOG_WARN("invalid partition function", K(ret), + "item_type", expr.get_expr_type()); + return ret; +} + int ObRawExprPartFuncChecker::visit(ObSysFunRawExpr &expr) { int ret = OB_SUCCESS; diff --git a/src/sql/resolver/expr/ob_raw_expr_part_func_checker.h b/src/sql/resolver/expr/ob_raw_expr_part_func_checker.h index a75f77bbd5..2f050e4d4c 100644 --- a/src/sql/resolver/expr/ob_raw_expr_part_func_checker.h +++ b/src/sql/resolver/expr/ob_raw_expr_part_func_checker.h @@ -40,6 +40,7 @@ public: virtual int visit(ObSetOpRawExpr &expr); virtual int visit(ObAliasRefRawExpr &expr); virtual int visit(ObPlQueryRefRawExpr &expr); + virtual int visit(ObMatchFunRawExpr &expr); private: // types and constants bool gen_col_check_; diff --git a/src/sql/resolver/expr/ob_raw_expr_print_visitor.cpp b/src/sql/resolver/expr/ob_raw_expr_print_visitor.cpp index 9ab4ff58e9..9fbc0a11fa 100644 --- a/src/sql/resolver/expr/ob_raw_expr_print_visitor.cpp +++ b/src/sql/resolver/expr/ob_raw_expr_print_visitor.cpp @@ -178,5 +178,16 @@ int ObRawExprPrintVisitor::visit(ObSetOpRawExpr &expr) } return ret; } + +int ObRawExprPrintVisitor::visit(ObMatchFunRawExpr &expr) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(databuff_printf(buf_, buf_len_, pos_, "%s<%ld>|", get_type_name(expr.get_expr_type()), + expr.get_param_count()))) { + LOG_WARN("databuff setop failed", K(ret)); + } + return ret; +} + } // namespace sql } // namespace oceanbase diff --git a/src/sql/resolver/expr/ob_raw_expr_print_visitor.h b/src/sql/resolver/expr/ob_raw_expr_print_visitor.h index 15d12f24f9..3fe8adc513 100644 --- a/src/sql/resolver/expr/ob_raw_expr_print_visitor.h +++ b/src/sql/resolver/expr/ob_raw_expr_print_visitor.h @@ -36,6 +36,7 @@ public: virtual int visit(ObSysFunRawExpr &expr); virtual int visit(ObSetOpRawExpr &expr); virtual int visit(ObPlQueryRefRawExpr &expr); + virtual int visit(ObMatchFunRawExpr &expr); int64_t to_string(char* buf, const int64_t buf_len) const; private: // types and constants diff --git a/src/sql/resolver/expr/ob_raw_expr_replacer.cpp b/src/sql/resolver/expr/ob_raw_expr_replacer.cpp index ad3de654e1..46f52e3407 100644 --- a/src/sql/resolver/expr/ob_raw_expr_replacer.cpp +++ b/src/sql/resolver/expr/ob_raw_expr_replacer.cpp @@ -259,6 +259,28 @@ int ObRawExprReplacer::visit(ObAggFunRawExpr &expr) return ret; } +int ObRawExprReplacer::visit(ObMatchFunRawExpr &expr) +{ + int ret = OB_SUCCESS; + bool skip_expr = false; + if (OB_FAIL(check_skip_expr(expr, skip_expr))) { + LOG_WARN("failed to check skip expr"); + } else if (!skip_expr) { + ObRawExpr *new_expr = NULL; + bool need_replace = false; + int64_t count = expr.get_param_count(); + for (int64_t i = 0; OB_SUCC(ret) && i < count; ++i) { + if (OB_FAIL(check_need_replace(expr.get_param_expr(i), new_expr, need_replace))) { + LOG_WARN("failed to check need replace", K(ret)); + } else if (need_replace) { + expr.get_param_expr(i) = new_expr; + replace_happened_ = true; + } + } + } + return ret; +} + int ObRawExprReplacer::visit(ObSysFunRawExpr &expr) { int ret = OB_SUCCESS; diff --git a/src/sql/resolver/expr/ob_raw_expr_replacer.h b/src/sql/resolver/expr/ob_raw_expr_replacer.h index d00e46f407..70b75a974c 100644 --- a/src/sql/resolver/expr/ob_raw_expr_replacer.h +++ b/src/sql/resolver/expr/ob_raw_expr_replacer.h @@ -52,6 +52,7 @@ public: virtual int visit(ObPlQueryRefRawExpr &expr) override; virtual int visit(ObAliasRefRawExpr &expr) override; virtual int visit(ObPseudoColumnRawExpr &expr) override; + virtual int visit(ObMatchFunRawExpr &expr) override; virtual bool skip_child(ObRawExpr &expr) override; bool get_replace_happened() const { return replace_happened_; } diff --git a/src/sql/resolver/expr/ob_raw_expr_resolver.h b/src/sql/resolver/expr/ob_raw_expr_resolver.h index 676a8d005f..1763876d5c 100644 --- a/src/sql/resolver/expr/ob_raw_expr_resolver.h +++ b/src/sql/resolver/expr/ob_raw_expr_resolver.h @@ -37,7 +37,8 @@ public: common::ObIArray &win_exprs, common::ObIArray &udf_exprs, common::ObIArray &op_exprs, - common::ObIArray &user_var_exprs) = 0; + common::ObIArray &user_var_exprs, + common::ObIArray &match_exprs) = 0; private: // disallow copy DISALLOW_COPY_AND_ASSIGN(ObRawExprResolver); diff --git a/src/sql/resolver/expr/ob_raw_expr_resolver_impl.cpp b/src/sql/resolver/expr/ob_raw_expr_resolver_impl.cpp index 02a2475274..9cbaa5962e 100644 --- a/src/sql/resolver/expr/ob_raw_expr_resolver_impl.cpp +++ b/src/sql/resolver/expr/ob_raw_expr_resolver_impl.cpp @@ -56,7 +56,8 @@ int ObRawExprResolverImpl::resolve(const ParseNode *node, ObIArray &win_exprs, ObIArray &udf_info, ObIArray &op_exprs, - ObIArray &user_var_exprs) + ObIArray &user_var_exprs, + ObIArray &match_exprs) { ctx_.columns_ = &columns; ctx_.op_exprs_ = &op_exprs; @@ -66,6 +67,7 @@ int ObRawExprResolverImpl::resolve(const ParseNode *node, ctx_.win_exprs_ = &win_exprs; ctx_.udf_info_ = &udf_info; ctx_.user_var_exprs_ = &user_var_exprs; + ctx_.match_exprs_ = &match_exprs; int ret = recursive_resolve(node, expr); if (OB_SUCC(ret)) { if (OB_FAIL(expr->extract_info())) { @@ -1056,6 +1058,12 @@ int ObRawExprResolverImpl::do_recursive_resolve(const ParseNode *node, ObRawExpr LOG_ERROR("A BUG, Never Be Here!!!", K(ret)); break; } + case T_FUN_MATCH_AGAINST: { + if (OB_FAIL(process_match_against(node, expr))) { + LOG_WARN("process fun sys match against failed", K(ret)); + } + break; + } case T_WINDOW_FUNCTION: { const int64_t orig_win_func_cnt = ctx_.win_exprs_->count(); if (OB_FAIL(process_window_function_node(node, expr))) { @@ -6032,7 +6040,7 @@ int ObRawExprResolverImpl::process_json_query_node(const ParseNode *node, ObRawE if(OB_SUCC(ret) && T_FUN_SYS_JSON_QUERY != node->type_) { ret = OB_ERR_UNEXPECTED; LOG_WARN("node->type_ error"); - } else if (OB_SUCC(ret) && 11 != node->num_child_) { + } else if (OB_SUCC(ret) && 13 != node->num_child_) { ret = OB_ERR_UNEXPECTED; LOG_WARN("num_child_ error"); } @@ -6065,7 +6073,15 @@ int ObRawExprResolverImpl::process_json_query_node(const ParseNode *node, ObRawE } } } - if (OB_SUCC(ret) && returning_type->int16_values_[OB_NODE_CAST_TYPE_IDX] != T_VARCHAR + + bool is_mvi = false; + + if (OB_NOT_NULL(node->children_[8])) { + is_mvi = (node->children_[8]->value_ > 0 ); + } + + if (OB_SUCC(ret) && !is_mvi + && returning_type->int16_values_[OB_NODE_CAST_TYPE_IDX] != T_VARCHAR && returning_type->int16_values_[OB_NODE_CAST_TYPE_IDX] != T_LONGTEXT && returning_type->int16_values_[OB_NODE_CAST_TYPE_IDX] != T_JSON && returning_type->type_ != T_NULL) { @@ -6077,7 +6093,8 @@ int ObRawExprResolverImpl::process_json_query_node(const ParseNode *node, ObRawE LOG_WARN("invalid user.table.column, table.column, or column specification", K(ret)); } - // [json_text][json_path][returning_type][truncate][scalars][pretty][ascii][wrapper][error_type][empty_type][mismatch] + // [0:json_text][1:json_path][2:returning_type][3:truncate][4:scalars][5:pretty][6:ascii] + // [7:wrapper][8:asis][9:error_type][10:empty_type][11:mismatch][12:multivalue] for (int32_t i = 0; OB_SUCC(ret) && i < num; i++) { ObRawExpr *para_expr = NULL; CK(OB_NOT_NULL(node->children_[i])); @@ -6861,6 +6878,67 @@ int ObRawExprResolverImpl::resolve_udf_node(const ParseNode *node, ObUDFInfo &ud return ret; } +int ObRawExprResolverImpl::process_match_against(const ParseNode *node, ObRawExpr *&expr) +{ + int ret = OB_SUCCESS; + ObMatchFunRawExpr *match_against = NULL; + if (OB_ISNULL(node) || OB_ISNULL(node->children_) || node->num_child_ != 2 || OB_ISNULL(ctx_.match_exprs_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument for match against", K(ret), K(node)); + } else if (OB_ISNULL(node->children_[0]) || node->children_[0]->type_ != T_MATCH_COLUMN_LIST) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("match column list is unexpected", K(ret), K(node->children_[0])); + } else if (OB_FAIL(ctx_.expr_factory_.create_raw_expr(T_FUN_MATCH_AGAINST, match_against))) { + LOG_WARN("create match_against expr failed", K(ret)); + } else if (OB_ISNULL(match_against)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (OB_FAIL(ctx_.match_exprs_->push_back(match_against))) { + LOG_WARN("failed to push back expr", K(ret)); + } else { + // resolve match columns + ParseNode *column_list_node = node->children_[0]; + for (int64_t i = 0; OB_SUCC(ret) && i < column_list_node->num_child_; ++i) { + const ParseNode *column_node = column_list_node->children_[i]; + ObRawExpr *column_ref = NULL; + if (OB_FAIL(process_column_ref_node(column_node, column_ref))) { + LOG_WARN("resolve column node failed", K(ret)); + } else if (OB_FAIL(match_against->get_match_columns().push_back(column_ref))) { + LOG_WARN("add column ref to column list failed", K(ret)); + } + } + } + + if (OB_SUCC(ret)) { + // resolve search query and mode + ObRawExpr *search_keywords = nullptr; + if (OB_ISNULL(node->children_[1])) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("match against search keywords is unexpected"); + } else if (OB_FAIL(SMART_CALL(recursive_resolve(node->children_[1], search_keywords)))) { + LOG_WARN("recursive resolve search keywords failed", K(ret)); + } else if (OB_FAIL(search_keywords->extract_info())) { + LOG_WARN("failed to extract info", K(ret)); + } else if (!search_keywords->is_static_const_expr()) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "non-const search query is not supported"); + LOG_WARN("search query is not const expr", K(ret)); + } else if (ObMatchAgainstMode::NATURAL_LANGUAGE_MODE != static_cast(node->value_)) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "search modes other than NATURAL_LANGUAGE_MODE"); + LOG_WARN("unsupported match against mode", K(ret), K(node->value_)); + } else { + match_against->set_search_key(search_keywords); + match_against->set_mode_flag(static_cast(node->value_)); + expr = match_against; + LOG_DEBUG("resolve match against expr finish", K(ret), KPC(expr)); + } + } + if (OB_SUCC(ret) && OB_FAIL(match_against->extract_info())) { + LOG_WARN("failed to extract info", K(ret)); + } + return ret; +} int ObRawExprResolverImpl::not_int_check(const ObRawExpr *expr) { diff --git a/src/sql/resolver/expr/ob_raw_expr_resolver_impl.h b/src/sql/resolver/expr/ob_raw_expr_resolver_impl.h index c1018441de..3f2b49bd45 100644 --- a/src/sql/resolver/expr/ob_raw_expr_resolver_impl.h +++ b/src/sql/resolver/expr/ob_raw_expr_resolver_impl.h @@ -35,7 +35,8 @@ public: common::ObIArray &win_exprs, common::ObIArray &udf_exprs, common::ObIArray &op_exprs, - common::ObIArray &user_var_exprs); + common::ObIArray &user_var_exprs, + common::ObIArray &match_exprs); bool is_contains_assignment() {return is_contains_assignment_;} void set_contains_assignment(bool v) {is_contains_assignment_ = v;} @@ -139,6 +140,7 @@ private: ObSysFunRawExpr *&expr); int resolve_udf_param_expr(const ParseNode *node, common::ObIArray ¶m_exprs); + int process_match_against(const ParseNode *node, ObRawExpr *&expr); int process_window_function_node(const ParseNode *node, ObRawExpr *&expr); int process_sort_list_node(const ParseNode *node, common::ObIArray &order_items); int process_frame_node(const ParseNode *node, diff --git a/src/sql/resolver/expr/ob_raw_expr_util.cpp b/src/sql/resolver/expr/ob_raw_expr_util.cpp index 06b8370384..532dbf6afc 100644 --- a/src/sql/resolver/expr/ob_raw_expr_util.cpp +++ b/src/sql/resolver/expr/ob_raw_expr_util.cpp @@ -1442,11 +1442,12 @@ int ObRawExprUtils::make_raw_expr_from_str(const char *expr_str, if (OB_SUCC(ret)) { ObArray op_exprs; ObSEArray user_var_exprs; + ObSEArray match_exprs; ObRawExprResolverImpl expr_resolver(resolve_ctx); // generate raw expr if (OB_FAIL(expr_resolver.resolve(parsed_expr, expr, columns, sys_vars, *sub_query_info, aggr_exprs, win_exprs, - udf_info, op_exprs, user_var_exprs))) { + udf_info, op_exprs, user_var_exprs, match_exprs))) { _LOG_WARN("failed to resolve expr tree, err=%d", ret); } } @@ -3070,12 +3071,13 @@ int ObRawExprUtils::build_raw_expr(ObRawExprFactory &expr_factory, ctx.secondary_namespace_ = ns; ctx.tg_timing_event_ = tg_timing_event; ObSEArray user_var_exprs; + ObSEArray match_exprs; ObRawExprResolverImpl expr_resolver(ctx); if (OB_FAIL(session_info.get_name_case_mode(ctx.case_mode_))) { LOG_WARN("fail to get name case mode", K(ret)); } else if (OB_FAIL(expr_resolver.resolve(&node, expr, columns, sys_vars, sub_query_info, aggr_exprs, win_exprs, - udf_info, op_exprs, user_var_exprs))) { + udf_info, op_exprs, user_var_exprs, match_exprs))) { LOG_WARN("resolve expr failed", K(ret)); } else { /*do nothing*/ } } @@ -4533,6 +4535,156 @@ int ObRawExprUtils::create_instr_expr(ObRawExprFactory &expr_factory, } return ret; } + +int ObRawExprUtils::replace_json_wrapper_expr_if_need(ObRawExpr* qual, + int64_t qual_idx, + ObRawExpr *depend_expr, + ObRawExprFactory &expr_factory, + ObSQLSessionInfo *session_info, + bool& is_need_replace) +{ + int ret = OB_SUCCESS; + const ObRawExpr *const_depend_expr = depend_expr; + if (depend_expr->get_expr_type() == T_FUN_SYS_CAST) { + ObRawExpr* qual_expr = qual->get_param_expr(qual_idx); + const ObConstRawExpr* cast_val = static_cast(depend_expr->get_param_expr(1)); + int64_t depend_cast_data_type = cast_val->get_value().get_int(); + + ParseNode parse_node; + parse_node.value_ = depend_cast_data_type; + + bool is_need_create = false; + ObRawExpr *param_expr = nullptr; + + depend_expr = depend_expr->get_param_expr(0); + + if ((depend_expr->get_expr_type() == T_FUN_SYS_JSON_EXTRACT || depend_expr->get_expr_type() == T_FUN_SYS_JSON_UNQUOTE) + && qual_expr->get_expr_type() == T_FUN_SYS_JSON_EXTRACT) { + is_need_create = true; + param_expr = qual_expr; + } + + if (OB_SUCC(ret) && is_need_create) { + ObSysFunRawExpr* new_expr = nullptr; + ObConstRawExpr *qual_cast_type_expr = nullptr; + if (OB_FAIL(expr_factory.create_raw_expr(T_FUN_SYS_JSON_UNQUOTE, new_expr))) { + LOG_WARN("create to_type expr failed", K(ret)); + } else if (OB_ISNULL(new_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("wrapper json unquote is null"); + } else if (OB_FAIL(new_expr->set_param_expr(param_expr))) { + LOG_WARN("add param expr failed", K(ret)); + } else if (OB_FAIL(expr_factory.create_raw_expr(T_INT, qual_cast_type_expr))) { + LOG_WARN("build const int expr failed", K(ret)); + } else { + ObObj val; + val.set_int(parse_node.value_); + qual_cast_type_expr->set_length_semantics(LS_CHAR); + qual_cast_type_expr->set_value(val); + qual_cast_type_expr->set_param(val); + + if (OB_FAIL(static_cast((qual)->get_param_expr(1))->replace_param_expr(1, qual_cast_type_expr))) { + LOG_WARN("replace const int expr failed", K(ret)); + } else if (OB_FAIL(qual->get_param_expr(1)->formalize(session_info))) { + LOG_WARN("formalize expr failed", K(ret)); + } else { + new_expr->set_func_name(N_JSON_UNQUOTE); + if (OB_FAIL(new_expr->formalize(session_info))) { + LOG_WARN("formalize expr failed", K(ret)); + } else if (OB_FAIL(static_cast(qual)->replace_param_expr(qual_idx, new_expr))) { + LOG_WARN("replace failed", K(ret)); + } + } + } + } + } else if (const_depend_expr->get_expr_type() == T_FUN_SYS_JSON_QUERY || + const_depend_expr->extract_multivalue_json_expr(const_depend_expr)) { + ObRawExpr* qual_expr = qual->get_param_expr(qual_idx); + if (qual_expr->is_domain_json_expr()) { + is_need_replace = true; + } + } + + return ret; +} + +int ObRawExprUtils::replace_qual_param_if_need(ObRawExpr* qual, + ObColumnRefRawExpr *col_expr) +{ + INIT_SUCC(ret); + ObRawExpr* qual_expr = nullptr; + + if (qual->get_expr_type() == T_OP_BOOL + && OB_NOT_NULL(qual_expr = qual->get_param_expr(0)) + && qual_expr->is_domain_json_expr()) { + if (qual_expr->get_expr_type() == T_FUN_SYS_JSON_MEMBER_OF) { + if (OB_FAIL(static_cast(qual_expr)->replace_param_expr(1, col_expr))) { + LOG_WARN("replace const int expr failed", K(ret)); + } + } else if (OB_FAIL(static_cast(qual_expr)->replace_param_expr(0, col_expr))) { + LOG_WARN("replace const int expr failed", K(ret)); + } + } + + return ret; +} + +bool ObRawExprUtils::is_domain_expr_need_special_replace(ObRawExpr* qual_expr, + ObRawExpr *depend_expr) +{ + bool b_ret = false; + + const ObRawExpr *const_depend_expr = depend_expr; + if (depend_expr->get_expr_type() == T_FUN_SYS_CAST) { + depend_expr = depend_expr->get_param_expr(0); + + b_ret = (depend_expr->get_expr_type() == T_FUN_SYS_JSON_EXTRACT || depend_expr->get_expr_type() == T_FUN_SYS_JSON_UNQUOTE) + && qual_expr->get_expr_type() == T_FUN_SYS_JSON_EXTRACT; + } else if (depend_expr->get_expr_type() == T_FUN_SYS_JSON_QUERY) { + b_ret = qual_expr->is_domain_json_expr(); + } else if (const_depend_expr->extract_multivalue_json_expr(const_depend_expr) + && const_depend_expr->get_expr_type() == T_FUN_SYS_JSON_QUERY) { + b_ret = qual_expr->is_domain_json_expr(); + } + + return b_ret; +} + +int ObRawExprUtils::replace_domain_wrapper_expr(ObRawExpr *depend_expr, + ObColumnRefRawExpr *col_expr, + ObRawExprCopier& copier, + ObRawExprFactory& factory, + ObSQLSessionInfo *session_info, + ObRawExpr *&qual, + int64_t qual_idx, + ObRawExpr *&new_qual) +{ + INIT_SUCC(ret); + + ObSEArray column_exprs; + bool need_specific_replace = false; + + if (OB_FAIL(replace_json_wrapper_expr_if_need( + qual, qual_idx, depend_expr, factory, session_info, need_specific_replace))) { + LOG_WARN("failed to replace expr", K(ret)); + } else if (OB_FAIL(extract_column_exprs(qual, column_exprs))) { + LOG_WARN("extract_column_exprs error", K(ret)); + } else if (OB_FAIL(copier.add_skipped_expr(column_exprs))) { + LOG_WARN("failed to add skipped exprs", K(ret)); + } else if (OB_FAIL(copier.copy(qual, new_qual))) { + LOG_WARN("failed to copy expr node", K(ret)); + //depend_expr's res type may be diff from its column's. copy real_qual and deduce type again. + } else if (!need_specific_replace + && OB_FAIL(static_cast(new_qual)->replace_param_expr(qual_idx, col_expr))) { + LOG_WARN("replace failed", K(ret)); + } else if (need_specific_replace + && OB_FAIL(replace_qual_param_if_need(new_qual, col_expr))) { + LOG_WARN("specific replace failed", K(ret)); + } + + return ret; +} + int ObRawExprUtils::create_substr_expr(ObRawExprFactory &expr_factory, ObSQLSessionInfo *session_info, ObRawExpr *first_expr, @@ -4943,8 +5095,11 @@ bool ObRawExprUtils::need_column_conv(const ObExprResType &expected_type, const bool ObRawExprUtils::need_column_conv(const ColumnItem &column, ObRawExpr &expr) { int bret = true; - if (column.get_expr() != NULL && (column.get_expr()->is_fulltext_column() - || column.get_expr()->is_spatial_generated_column())) { + if (column.get_expr() != NULL + && (column.get_expr()->is_fulltext_column() + || column.get_expr()->is_spatial_generated_column() + || column.get_expr()->is_multivalue_generated_column() + || column.get_expr()->is_multivalue_generated_array_column())) { //全文索引的生成列是内部生成的隐藏列,不需要做column convert bret = false; } else if (column.get_column_type() != NULL) { @@ -4976,7 +5131,10 @@ int ObRawExprUtils::build_column_conv_expr(ObRawExprFactory &expr_factory, CK(OB_NOT_NULL(session_info)); CK(OB_NOT_NULL(column_schema)); if (OB_SUCC(ret)) { - if (column_schema->is_fulltext_column() || column_schema->is_spatial_generated_column()) { + if (column_schema->is_fulltext_column() + || column_schema->is_spatial_generated_column() + || column_schema->is_multivalue_generated_column() + || column_schema->is_multivalue_generated_array_column()) { //全文列不会破坏约束性,且数据不会存储,跳过强转 // 空间索引列是虚拟列,跳过强转 } else if (OB_FAIL(build_column_conv_expr(session_info, @@ -5028,7 +5186,10 @@ int ObRawExprUtils::build_column_conv_expr(ObRawExprFactory &expr_factory, } CK(session_info); if (OB_SUCC(ret)) { - if (col_ref.is_fulltext_column() || col_ref.is_spatial_generated_column()) { + if (col_ref.is_fulltext_column() || + col_ref.is_spatial_generated_column() || + col_ref.is_multivalue_generated_column() || + col_ref.is_multivalue_generated_array_column()) { // 全文列不会破坏约束性,且数据不会存储,跳过强转 // 空间索引列是虚拟列,跳过强转 } else if (OB_FAIL(build_column_conv_expr(session_info, @@ -9258,6 +9419,83 @@ bool ObRawExprUtils::is_column_ref_skip_implicit_cast(const ObRawExpr *expr) return bret; } +int ObRawExprUtils::build_default_match_filter(ObRawExprFactory &expr_factory, + ObRawExpr *relevance_expr, + ObRawExpr *threshold, + ObOpRawExpr *&match_filter, + const ObSQLSessionInfo *session) +{ + int ret = OB_SUCCESS; + ObOpRawExpr *greater_than = nullptr; + if (OB_FAIL(expr_factory.create_raw_expr(T_OP_GT, greater_than))) { + LOG_WARN("create cmp op failed", K(ret)); + } else if (OB_ISNULL(greater_than)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null pointer to created greater than expr", K(ret)); + } else if (OB_FAIL(greater_than->set_param_exprs(relevance_expr, threshold))) { + LOG_WARN("failed to set param exprs", K(ret)); + } else if (OB_FAIL(greater_than->formalize(session))) { + LOG_WARN("failed to formalize greater than expr", K(ret)); + } else { + match_filter = greater_than; + } + return ret; +} + +int ObRawExprUtils::build_bm25_expr(ObRawExprFactory &expr_factory, + ObRawExpr *related_doc_cnt, + ObRawExpr *related_token_cnt, + ObRawExpr *total_doc_cnt, + ObRawExpr *doc_token_cnt, + ObOpRawExpr *&bm25, + const ObSQLSessionInfo *session) +{ + int ret = OB_SUCCESS; + ObConstRawExpr *approx_avg_token_cnt = nullptr; + ObOpRawExpr *bm25_expr = nullptr; + // TODO: @Salton implement approx avg token cnt storage in fulltext index and rm this mock + constexpr double mock_approx_avg_cnt = 10; + if (OB_FAIL(build_const_double_expr(expr_factory, ObDoubleType, mock_approx_avg_cnt, approx_avg_token_cnt))) { + LOG_WARN("create approx average token count failed", K(ret)); + } else if (OB_FAIL(expr_factory.create_raw_expr(T_FUN_SYS_BM25, bm25_expr))) { + LOG_WARN("create bm25 func failed", K(ret)); + } else if (OB_ISNULL(bm25_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null pointer to created bm25 related exprs", K(ret), KP(bm25)); + } else { + OZ(approx_avg_token_cnt->formalize(session)); + OZ(bm25_expr->add_param_expr(related_doc_cnt)); + OZ(bm25_expr->add_param_expr(total_doc_cnt)); + OZ(bm25_expr->add_param_expr(doc_token_cnt)); + OZ(bm25_expr->add_param_expr(approx_avg_token_cnt)); + OZ(bm25_expr->add_param_expr(related_token_cnt)); + OZ(bm25_expr->formalize(session)); + OX(bm25 = bm25_expr); + } + return ret; +} + +int ObRawExprUtils::extract_match_against_filters(const ObIArray &filters, + ObIArray &other_filters, + ObIArray &match_filters) +{ + int ret = OB_SUCCESS; + ObRawExpr *expr = nullptr; + for (int64_t i = 0; OB_SUCC(ret) && i < filters.count(); ++i) { + if (OB_ISNULL(expr = filters.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null expr", K(ret)); + } else if (expr->has_flag(CNT_MATCH_EXPR)) { + if (OB_FAIL(match_filters.push_back(expr))) { + LOG_WARN("failed to push text ir filters", K(ret)); + } + } else if (OB_FAIL(other_filters.push_back(expr))) { + LOG_WARN("failed to push other filters", K(ret)); + } + } + return ret; +} + int ObRawExprUtils::build_dummy_count_expr(ObRawExprFactory &expr_factory, const ObSQLSessionInfo *session_info, ObAggFunRawExpr *&expr) diff --git a/src/sql/resolver/expr/ob_raw_expr_util.h b/src/sql/resolver/expr/ob_raw_expr_util.h index 533f42d9f9..ec00ec7f34 100644 --- a/src/sql/resolver/expr/ob_raw_expr_util.h +++ b/src/sql/resolver/expr/ob_raw_expr_util.h @@ -576,6 +576,26 @@ public: ObRawExpr *&expr, const ObLocalSessionVar *local_vars = NULL, int64_t local_var_id = OB_INVALID_INDEX_INT64); + + static bool is_domain_expr_need_special_replace(ObRawExpr* qual_expr, + ObRawExpr *depend_expr); + static int replace_domain_wrapper_expr(ObRawExpr *depend_expr, + ObColumnRefRawExpr *col_expr, + ObRawExprCopier& copier, + ObRawExprFactory& factory, + ObSQLSessionInfo *session_info, + ObRawExpr *&qual, + int64_t qual_idx, + ObRawExpr *&new_qual); + static int replace_json_wrapper_expr_if_need(ObRawExpr* qual, + int64_t qual_idx, + ObRawExpr *depend_expr, + ObRawExprFactory &expr_factory, + ObSQLSessionInfo *session_info, + bool& is_done_replace); + + static int replace_qual_param_if_need(ObRawExpr* qual, ObColumnRefRawExpr *col_expr); + static bool need_column_conv(const ColumnItem &column, ObRawExpr &expr); static int build_pad_expr(ObRawExprFactory &expr_factory, bool is_char, @@ -1200,6 +1220,21 @@ public: static int check_is_valid_generated_col(ObRawExpr *expr, ObIAllocator &allocator); static bool is_column_ref_skip_implicit_cast(const ObRawExpr *expr); + static int build_default_match_filter(ObRawExprFactory &expr_factory, + ObRawExpr *relevance_expr, + ObRawExpr *threshold, + ObOpRawExpr *&match_filter, + const ObSQLSessionInfo *session); + static int build_bm25_expr(ObRawExprFactory &expr_factory, + ObRawExpr *related_doc_cnt, + ObRawExpr *related_token_cnt, + ObRawExpr *total_doc_cnt, + ObRawExpr *doc_token_cnt, + ObOpRawExpr *&bm25, + const ObSQLSessionInfo *session); + static int extract_match_against_filters(const ObIArray &filters, + ObIArray &other_filters, + ObIArray &match_filters); static int build_dummy_count_expr(ObRawExprFactory &expr_factory, const ObSQLSessionInfo *session_info, ObAggFunRawExpr *&expr); diff --git a/src/sql/resolver/expr/ob_raw_expr_wrap_enum_set.cpp b/src/sql/resolver/expr/ob_raw_expr_wrap_enum_set.cpp index 2933f627b9..ec5d13cc61 100644 --- a/src/sql/resolver/expr/ob_raw_expr_wrap_enum_set.cpp +++ b/src/sql/resolver/expr/ob_raw_expr_wrap_enum_set.cpp @@ -774,6 +774,12 @@ int ObRawExprWrapEnumSet::visit(ObAggFunRawExpr &expr) return ret; } +int ObRawExprWrapEnumSet::visit(ObMatchFunRawExpr &expr) +{ + UNUSED(expr); + return OB_SUCCESS; +} + bool ObRawExprWrapEnumSet::can_wrap_type_to_str(const ObRawExpr &expr) const { bool bret = false; diff --git a/src/sql/resolver/expr/ob_raw_expr_wrap_enum_set.h b/src/sql/resolver/expr/ob_raw_expr_wrap_enum_set.h index 009cebc7d5..25deb7a637 100644 --- a/src/sql/resolver/expr/ob_raw_expr_wrap_enum_set.h +++ b/src/sql/resolver/expr/ob_raw_expr_wrap_enum_set.h @@ -51,6 +51,7 @@ public: int visit(ObWinFunRawExpr &expr); int visit(ObPseudoColumnRawExpr &expr); int visit(ObPlQueryRefRawExpr &expr); + int visit(ObMatchFunRawExpr &expr); bool skip_child(); private: int visit_left_expr(ObOpRawExpr &expr, int64_t row_dimension, diff --git a/src/sql/resolver/ob_resolver_utils.cpp b/src/sql/resolver/ob_resolver_utils.cpp index 16f8e4b7e3..c463d83057 100644 --- a/src/sql/resolver/ob_resolver_utils.cpp +++ b/src/sql/resolver/ob_resolver_utils.cpp @@ -2143,18 +2143,20 @@ int ObResolverUtils::resolve_obj_access_ref_node(ObRawExprFactory &expr_factory, ObArray udf_info; ObArray op_exprs; ObSEArray user_var_exprs; + ObSEArray match_exprs; if (OB_FAIL(expr_resolver.resolve(node, expr, columns, sys_vars, sub_query_info, aggr_exprs, - win_exprs, udf_info, op_exprs, user_var_exprs))) { + win_exprs, udf_info, op_exprs, user_var_exprs, match_exprs))) { LOG_WARN("failed to resolve expr tree", K(ret)); } else if (OB_UNLIKELY(1 != columns.count()) || OB_UNLIKELY(!sys_vars.empty()) || OB_UNLIKELY(!sub_query_info.empty()) || OB_UNLIKELY(!aggr_exprs.empty()) - || OB_UNLIKELY(!win_exprs.empty())) { + || OB_UNLIKELY(!win_exprs.empty()) + || OB_UNLIKELY(!match_exprs.empty())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("expr is invalid", K(op_exprs.empty()), K(columns.count()), K(sys_vars.count()), K(sub_query_info.count()), K(aggr_exprs.count()), K(win_exprs.count()), - K(udf_info.count()), K(ret)); + K(udf_info.count()), K(match_exprs.count()), K(ret)); } else if (OB_FAIL(q_name.assign(columns.at(0)))) { LOG_WARN("assign qualified name failed", K(ret), K(columns)); } else { /*do nothing*/ } @@ -3281,6 +3283,7 @@ int ObResolverUtils::resolve_const_expr(ObResolverParams ¶ms, ObArray sys_vars; ObArray op_exprs; ObSEArray user_var_exprs; + ObSEArray match_exprs; ObCollationType collation_connection = CS_TYPE_INVALID; ObCharsetType character_set_connection = CHARSET_INVALID; if (OB_ISNULL(params.expr_factory_) || OB_ISNULL(params.session_info_)) { @@ -3306,7 +3309,7 @@ int ObResolverUtils::resolve_const_expr(ObResolverParams ¶ms, LOG_WARN("fail to get name case mode", K(ret)); } else if (OB_FAIL(expr_resolver.resolve(&node, const_expr, columns, sys_vars, sub_query_info, aggr_exprs, win_exprs, - udf_info, op_exprs, user_var_exprs))) { + udf_info, op_exprs, user_var_exprs, match_exprs))) { LOG_WARN("resolve expr failed", K(ret)); } else if (OB_FAIL(resolve_columns_for_const_expr(const_expr, columns, params))) { LOG_WARN("resolve columnts for const expr failed", K(ret)); @@ -3316,6 +3319,9 @@ int ObResolverUtils::resolve_const_expr(ObResolverParams ¶ms, } else if (udf_info.count() > 0) { ret = OB_ERR_UNEXPECTED; LOG_ERROR("UDFInfo should not found be here!!!", K(ret)); + } else if (match_exprs.count() > 0) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("fulltext search expr should not found be here", K(ret)); } //process oracle compatible implicit conversion @@ -4241,6 +4247,7 @@ int ObResolverUtils::resolve_partition_range_value_expr(ObResolverParams ¶ms ObArray part_column_refs; ObArray op_exprs; ObSEArray user_var_exprs; + ObSEArray match_exprs; ObExprResolveContext ctx(*params.expr_factory_, params.session_info_->get_timezone_info(), OB_NAME_CASE_INVALID); ctx.dest_collation_ = collation_connection; ctx.connection_charset_ = ObCharset::charset_type_by_coll(part_func_expr.get_collation_type()); @@ -4252,7 +4259,7 @@ int ObResolverUtils::resolve_partition_range_value_expr(ObResolverParams ¶ms LOG_WARN("fail to get name case mode", K(ret)); } else if (OB_FAIL(expr_resolver.resolve(&node, part_value_expr, columns, sys_vars, sub_query_info, aggr_exprs, win_exprs, udf_info, - op_exprs, user_var_exprs))) { + op_exprs, user_var_exprs, match_exprs))) { LOG_WARN("resolve expr failed", K(ret)); } else if (sub_query_info.count() > 0) { ret = OB_ERR_PARTITION_FUNCTION_IS_NOT_ALLOWED; @@ -4262,6 +4269,9 @@ int ObResolverUtils::resolve_partition_range_value_expr(ObResolverParams ¶ms if (OB_SUCC(ret) && udf_info.count() > 0) { ret = OB_NOT_SUPPORTED; LOG_USER_ERROR(OB_NOT_SUPPORTED, "udf"); + } else if (OB_UNLIKELY(match_exprs.count() > 0)) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "fulltext search func"); } if (OB_SUCC(ret)) { @@ -4404,6 +4414,7 @@ int ObResolverUtils::resolve_partition_range_value_expr(ObResolverParams ¶ms ObArray part_column_refs; ObArray op_exprs; ObSEArray user_var_exprs; + ObSEArray match_exprs; ObExprResolveContext ctx(*params.expr_factory_, params.session_info_->get_timezone_info(), OB_NAME_CASE_INVALID); @@ -4424,7 +4435,8 @@ int ObResolverUtils::resolve_partition_range_value_expr(ObResolverParams ¶ms win_exprs, udf_info, op_exprs, - user_var_exprs))) { + user_var_exprs, + match_exprs))) { LOG_WARN("resolve expr failed", K(ret)); } else if (sub_query_info.count() > 0) { ret = OB_ERR_PARTITION_FUNCTION_IS_NOT_ALLOWED; @@ -4435,6 +4447,9 @@ int ObResolverUtils::resolve_partition_range_value_expr(ObResolverParams ¶ms if (OB_SUCC(ret) && udf_info.count() > 0) { ret = OB_ERR_UNEXPECTED; LOG_ERROR("UDFInfo should not found be here!!!", K(ret)); + } else if (OB_UNLIKELY(match_exprs.count() > 0)) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "fulltext search func"); } if (OB_SUCC(ret)) { @@ -4562,6 +4577,7 @@ int ObResolverUtils::resolve_partition_expr(ObResolverParams ¶ms, ObArray tmp_part_keys; ObArray op_exprs; ObSEArray user_var_exprs; + ObSEArray match_exprs; ObCollationType collation_connection = CS_TYPE_INVALID; ObCharsetType character_set_connection = CHARSET_INVALID; //part_keys is not null, means that need output partition keys @@ -4587,7 +4603,7 @@ int ObResolverUtils::resolve_partition_expr(ObResolverParams ¶ms, LOG_WARN("fail to get name case mode", K(ret)); } else if (OB_FAIL(expr_resolver.resolve(&node, part_expr, columns, sys_vars, sub_query_info, aggr_exprs, win_exprs, udf_info, - op_exprs, user_var_exprs))) { + op_exprs, user_var_exprs, match_exprs))) { LOG_WARN("resolve expr failed", K(ret)); } else if (sub_query_info.count() > 0) { ret = OB_ERR_PARTITION_FUNCTION_IS_NOT_ALLOWED; @@ -4600,6 +4616,9 @@ int ObResolverUtils::resolve_partition_expr(ObResolverParams ¶ms, } else if (udf_info.count() > 0) { ret = OB_NOT_SUPPORTED; LOG_USER_ERROR(OB_NOT_SUPPORTED, "udf"); + } else if (OB_UNLIKELY(match_exprs.count() > 0)) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "fulltext search func"); } else if (OB_FAIL(resolve_columns_for_partition_expr(part_expr, columns, tbl_schema, part_func_type, partition_key_start, partition_keys))) { LOG_WARN("resolve columns for partition expr failed", K(ret)); diff --git a/src/sql/resolver/ob_schema_checker.cpp b/src/sql/resolver/ob_schema_checker.cpp index 38f267c282..d72400ae4a 100644 --- a/src/sql/resolver/ob_schema_checker.cpp +++ b/src/sql/resolver/ob_schema_checker.cpp @@ -294,7 +294,8 @@ int ObSchemaChecker::check_table_or_index_exists( const uint64_t tenant_id, const uint64_t database_id, const ObString &table_name, - const bool is_hidden, + const bool with_hidden_flag, + const bool is_built_in_index, bool &is_exist) { int ret = OB_SUCCESS; @@ -307,13 +308,13 @@ int ObSchemaChecker::check_table_or_index_exists( ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(tenant_id), K(database_id), K(table_name), K(ret)); } else if (OB_FAIL(check_table_exists(tenant_id, database_id, table_name, - is_index_table, is_hidden, is_exist))) { + is_index_table, with_hidden_flag, is_exist))) { LOG_WARN("check table exist failed", K(tenant_id), K(database_id), K(table_name), K(ret)); } else if(!is_exist) { is_index_table = true; if (OB_FAIL(check_table_exists(tenant_id, database_id, table_name, - is_index_table, is_hidden, is_exist))) { - LOG_WARN("check index exist failed", K(tenant_id), K(database_id), K(table_name), K(ret)); + is_index_table, with_hidden_flag, is_exist, is_built_in_index))) { + LOG_WARN("check index exist failed", K(tenant_id), K(database_id), K(table_name), K(ret), K(is_built_in_index)); } } return ret; @@ -323,8 +324,9 @@ int ObSchemaChecker::check_table_exists(const uint64_t tenant_id, const uint64_t database_id, const ObString &table_name, const bool is_index_table, - const bool is_hidden, - bool &is_exist) + const bool with_hidden_flag, + bool &is_exist, + const bool is_built_in_index) { int ret = OB_SUCCESS; @@ -338,9 +340,9 @@ int ObSchemaChecker::check_table_exists(const uint64_t tenant_id, ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(tenant_id), K(database_id), K(table_name), K(ret)); } else { - if (OB_FAIL(schema_mgr_->get_table_id(tenant_id, database_id, table_name, - is_index_table, is_hidden ? ObSchemaGetterGuard::USER_HIDDEN_TABLE_TYPE : ObSchemaGetterGuard::ALL_NON_HIDDEN_TYPES, table_id))) { - + if (OB_FAIL(schema_mgr_->get_table_id(tenant_id, database_id, table_name, is_index_table, + with_hidden_flag ? ObSchemaGetterGuard::USER_HIDDEN_TABLE_TYPE : ObSchemaGetterGuard::ALL_NON_HIDDEN_TYPES, + table_id, is_built_in_index))) { LOG_WARN("get table id failed", K(ret), K(tenant_id), K(database_id), K(table_name), K(is_index_table)); } else { @@ -374,8 +376,9 @@ int ObSchemaChecker::check_table_exists(const uint64_t tenant_id, const ObString &database_name, const ObString &table_name, const bool is_index_table, - const bool is_hidden, - bool &is_exist) + const bool with_hidden_flag, + bool &is_exist, + const bool is_built_in_index) { int ret = OB_SUCCESS; @@ -388,8 +391,9 @@ int ObSchemaChecker::check_table_exists(const uint64_t tenant_id, ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(tenant_id), K(database_name), K(table_name), K(ret)); } else { - if (OB_FAIL(schema_mgr_->get_table_id(tenant_id, database_name, table_name, - is_index_table, is_hidden ? ObSchemaGetterGuard::USER_HIDDEN_TABLE_TYPE : ObSchemaGetterGuard::ALL_NON_HIDDEN_TYPES, table_id))) { + if (OB_FAIL(schema_mgr_->get_table_id(tenant_id, database_name, table_name, is_index_table, + with_hidden_flag ? ObSchemaGetterGuard::USER_HIDDEN_TABLE_TYPE : ObSchemaGetterGuard::ALL_NON_HIDDEN_TYPES, + table_id, is_built_in_index))) { LOG_WARN("fail to check table exist", K(tenant_id), K(database_name), K(table_name), K(is_index_table), K(ret)); } else { @@ -774,8 +778,9 @@ int ObSchemaChecker::get_table_schema(const uint64_t tenant_id, const ObString &table_name, const bool is_index_table, const bool cte_table_fisrt, - const bool is_hidden, - const ObTableSchema *&table_schema) + const bool with_hidden_flag, + const ObTableSchema *&table_schema, + const bool is_built_in_index/*= false*/) { int ret = OB_SUCCESS; table_schema = NULL; @@ -790,9 +795,9 @@ int ObSchemaChecker::get_table_schema(const uint64_t tenant_id, LOG_WARN("invalid arguments", K(tenant_id), K(database_id), K(table_name), K(ret)); ret = OB_INVALID_ARGUMENT; } else if (OB_FAIL(schema_mgr_->get_table_schema(tenant_id, database_id, table_name, - is_index_table, table, is_hidden))) { + is_index_table, table, with_hidden_flag, is_built_in_index))) { LOG_WARN("get table schema failed", K(tenant_id), K(database_id), K(table_name), - K(is_index_table), K(ret)); + K(with_hidden_flag), K(is_built_in_index), K(is_index_table), K(ret)); } else { // 也有可能是临时cte递归表schema与已有表重名, // 这个时候必须由cte递归表schema优先(same with oracle) diff --git a/src/sql/resolver/ob_schema_checker.h b/src/sql/resolver/ob_schema_checker.h index f369b4b9a3..9cf2b09249 100644 --- a/src/sql/resolver/ob_schema_checker.h +++ b/src/sql/resolver/ob_schema_checker.h @@ -108,21 +108,24 @@ public: int check_table_or_index_exists(const uint64_t tenant_id, const uint64_t database_id, const common::ObString &table_name, - const bool is_hidden, + const bool with_hidden_flag, + const bool is_built_in_index, bool &is_exist); int check_table_exists(const uint64_t tenant_id, const uint64_t database_id, const common::ObString &table_name, const bool is_index, - const bool is_hidden, - bool &is_exist); + const bool with_hidden_flag, + bool &is_exist, + const bool is_built_in_index = false); //int check_table_exists(uint64_t table_id, bool &is_exist) const; int check_table_exists(const uint64_t tenant_id, const common::ObString &database_name, const common::ObString &table_name, const bool is_index_table, - const bool is_hidden, - bool &is_exist); + const bool with_hidden_flag, + bool &is_exist, + const bool is_built_in_index = false); // mock_fk_parent_table begin int get_mock_fk_parent_table_with_name( @@ -187,8 +190,9 @@ public: const common::ObString &table_name, const bool is_index_table, const bool cte_table_fisrt, - const bool is_hidden, - const share::schema::ObTableSchema *&table_schema); + const bool with_hidden_flag, + const share::schema::ObTableSchema *&table_schema, + const bool is_built_in_index = false); int get_table_schema(const uint64_t tenant_id, const uint64_t table_id, const share::schema::ObTableSchema *&table_schema, bool is_link = false) const; int get_link_table_schema(const uint64_t dblink_id, const common::ObString &database_name, diff --git a/src/sql/rewrite/ob_key_part.cpp b/src/sql/rewrite/ob_key_part.cpp index 10ca24c56b..524fc22820 100644 --- a/src/sql/rewrite/ob_key_part.cpp +++ b/src/sql/rewrite/ob_key_part.cpp @@ -52,9 +52,9 @@ void ObKeyPart::reset_key() like_keypart_->escape_.reset(); } else if (is_in_key()) { in_keypart_->reset(); - } else if (is_geo_key()) { - geo_keypart_->wkb_.reset(); - geo_keypart_->distance_.reset(); + } else if (is_domain_key()) { + domain_keypart_->const_param_.reset(); + domain_keypart_->extra_param_.reset(); } } @@ -273,8 +273,8 @@ bool ObKeyPart::is_question_mark() const bret = normal_keypart_->start_.is_unknown() || normal_keypart_->end_.is_unknown(); } else if (is_in_key()) { bret = in_keypart_->contain_questionmark_; - } else if (is_geo_key()) { - bret = geo_keypart_->wkb_.is_unknown(); + } else if (is_domain_key()) { + bret = domain_keypart_->const_param_.is_unknown(); } return bret; } @@ -721,15 +721,15 @@ int ObKeyPart::deep_node_copy(const ObKeyPart &other) in_keypart_->is_strict_in_ = other.in_keypart_->is_strict_in_; in_keypart_->contain_questionmark_ = other.in_keypart_->contain_questionmark_; } - } else if (other.is_geo_key()) { - if (OB_FAIL(create_geo_key())) { + } else if (other.is_domain_key()) { + if (OB_FAIL(create_domain_key())) { LOG_WARN("create geo key failed", K(ret)); - } else if (OB_FAIL(ob_write_obj(allocator_, other.geo_keypart_->wkb_, geo_keypart_->wkb_))) { + } else if (OB_FAIL(ob_write_obj(allocator_, other.domain_keypart_->const_param_, domain_keypart_->const_param_))) { LOG_WARN("deep copy geo wkb failed", K(ret)); - } else if (OB_FAIL(ob_write_obj(allocator_, other.geo_keypart_->distance_, geo_keypart_->distance_))) { + } else if (OB_FAIL(ob_write_obj(allocator_, other.domain_keypart_->extra_param_, domain_keypart_->extra_param_))) { LOG_WARN("deep copy geo distance failed", K(ret)); } else { - geo_keypart_->geo_type_ = other.geo_keypart_->geo_type_; + domain_keypart_->domain_op_ = other.domain_keypart_->domain_op_; } } return ret; @@ -753,8 +753,8 @@ int ObKeyPart::shallow_node_copy(const ObKeyPart &other) } else if (other.is_in_key()) { in_keypart_ = other.in_keypart_; key_type_ = other.key_type_; - } else if (other.is_geo_key()) { - geo_keypart_ = other.geo_keypart_; + } else if (other.is_domain_key()) { + domain_keypart_ = other.domain_keypart_; key_type_ = other.key_type_; } return ret; @@ -1101,10 +1101,10 @@ OB_DEF_SERIALIZE(ObKeyPart) } } } - } else if (is_geo_key()) { - OB_UNIS_ENCODE(geo_keypart_->wkb_); - OB_UNIS_ENCODE(geo_keypart_->geo_type_); - OB_UNIS_ENCODE(geo_keypart_->distance_); + } else if (is_domain_key()) { + OB_UNIS_ENCODE(domain_keypart_->const_param_); + OB_UNIS_ENCODE(domain_keypart_->domain_op_); + OB_UNIS_ENCODE(domain_keypart_->extra_param_); } else { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected key type", K_(key_type)); @@ -1178,13 +1178,13 @@ OB_DEF_DESERIALIZE(ObKeyPart) } } } - } else if (T_GEO_KEY == key_type_) { - if (OB_FAIL(create_geo_key())) { - LOG_WARN("create geo key failed", K(ret)); + } else if (T_DOMAIN_KEY == key_type_) { + if (OB_FAIL(create_domain_key())) { + LOG_WARN("create domain key failed", K(ret)); } - OB_UNIS_DECODE(geo_keypart_->wkb_); - OB_UNIS_DECODE(geo_keypart_->geo_type_); - OB_UNIS_DECODE(geo_keypart_->distance_); + OB_UNIS_DECODE(domain_keypart_->const_param_); + OB_UNIS_DECODE(domain_keypart_->domain_op_); + OB_UNIS_DECODE(domain_keypart_->extra_param_); } else { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected key type", K_(key_type)); @@ -1234,10 +1234,10 @@ OB_DEF_SERIALIZE_SIZE(ObKeyPart) } } } - } else if (is_geo_key()) { - OB_UNIS_ADD_LEN(geo_keypart_->wkb_); - OB_UNIS_ADD_LEN(geo_keypart_->geo_type_); - OB_UNIS_ADD_LEN(geo_keypart_->distance_); + } else if (is_domain_key()) { + OB_UNIS_ADD_LEN(domain_keypart_->const_param_); + OB_UNIS_ADD_LEN(domain_keypart_->domain_op_); + OB_UNIS_ADD_LEN(domain_keypart_->extra_param_); } OB_UNIS_ADD_LEN(null_safe_); OB_UNIS_ADD_LEN(rowid_column_idx_); @@ -1579,16 +1579,16 @@ int ObKeyPart::create_not_in_key() return ret; } -int ObKeyPart::create_geo_key() +int ObKeyPart::create_domain_key() { int ret = OB_SUCCESS; void *ptr = NULL; - if (OB_UNLIKELY(NULL == (ptr = allocator_.alloc(sizeof(ObGeoKeyPart))))) { + if (OB_UNLIKELY(NULL == (ptr = allocator_.alloc(sizeof(ObDomainKeyPart))))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_ERROR("alloc memory failed"); } else { - key_type_ = T_GEO_KEY; - geo_keypart_ = new(ptr) ObGeoKeyPart(); + key_type_ = T_DOMAIN_KEY; + domain_keypart_ = new(ptr) ObDomainKeyPart(); } return ret; } @@ -1656,11 +1656,11 @@ DEF_TO_STRING(ObKeyPart) N_OFFSETS, in_keypart_->offsets_, N_MISSING_OFFSETS, in_keypart_->missing_offsets_, N_IN_PARAMS, in_keypart_->in_params_); - } else if (is_geo_key()) { + } else if (is_domain_key()) { J_COMMA(); - J_KV("wkb_", geo_keypart_->wkb_, - "geo_type_", geo_keypart_->geo_type_, - "distance_", geo_keypart_->distance_); + J_KV("const_param_", domain_keypart_->const_param_, + "domain_type_", domain_keypart_->domain_op_, + "extra_param_", domain_keypart_->extra_param_); } J_OBJ_END(); return pos; diff --git a/src/sql/rewrite/ob_key_part.h b/src/sql/rewrite/ob_key_part.h index 138ea98d92..c3176a0b3a 100644 --- a/src/sql/rewrite/ob_key_part.h +++ b/src/sql/rewrite/ob_key_part.h @@ -33,7 +33,7 @@ enum ObKeyPartType T_NORMAL_KEY = 0, T_LIKE_KEY, T_IN_KEY, - T_GEO_KEY + T_DOMAIN_KEY }; enum InType @@ -274,11 +274,14 @@ struct ObInKeyPart bool contain_questionmark_; }; -struct ObGeoKeyPart +// need override folling function if add a new type +// set_domain_const_param(const param in expr): for json is const_param_, for gis is wkb_ +// set_domain_extra_param(extra param in expr): fro gis is distance +struct ObDomainKeyPart { - common::ObObj wkb_; - common::ObGeoRelationType geo_type_; - common::ObObj distance_; + common::ObObj const_param_; + common::ObDomainOpType domain_op_; + common::ObObj extra_param_; }; class ObKeyPart : public common::ObDLinkBase @@ -343,13 +346,14 @@ public: inline bool is_like_key() const { return T_LIKE_KEY == key_type_ && like_keypart_ != NULL; } inline bool is_in_key() const {return T_IN_KEY == key_type_ && in_keypart_ != NULL && in_keypart_->in_type_ == T_IN_KEY_PART; } inline bool is_not_in_key() const {return T_IN_KEY == key_type_ && in_keypart_ != NULL && in_keypart_->in_type_ == T_NOT_IN_KEY_PART; } - inline bool is_geo_key() const { return T_GEO_KEY == key_type_ && geo_keypart_ != NULL; } + inline bool is_geo_key() const { return T_DOMAIN_KEY == key_type_ && domain_keypart_ != NULL && (domain_keypart_->domain_op_ >= ObDomainOpType::T_GEO_COVERS && domain_keypart_->domain_op_ <= ObDomainOpType::T_GEO_COVEREDBY); } + inline bool is_domain_key() const { return T_DOMAIN_KEY == key_type_ && domain_keypart_ != NULL; } int create_normal_key(); int create_like_key(); int create_in_key(); int create_not_in_key(); - int create_geo_key(); + int create_domain_key(); inline ObNormalKeyPart *get_normal_key() { @@ -428,8 +432,8 @@ public: ObLikeKeyPart *like_keypart_; // in expr type ObInKeyPart *in_keypart_; - //geo expr type - ObGeoKeyPart *geo_keypart_; + //domain expr type + ObDomainKeyPart *domain_keypart_; }; //list member ObKeyPart *item_next_; diff --git a/src/sql/rewrite/ob_query_range.cpp b/src/sql/rewrite/ob_query_range.cpp index b477bfc9cd..fd3764349c 100644 --- a/src/sql/rewrite/ob_query_range.cpp +++ b/src/sql/rewrite/ob_query_range.cpp @@ -24,6 +24,8 @@ #include "observer/omt/ob_tenant_srs.h" #include "sql/engine/expr/ob_geo_expr_utils.h" #include "sql/engine//expr/ob_datum_cast.h" +#include "sql/engine/expr/ob_expr_json_func_helper.h" +#include "sql/engine/expr/ob_expr_json_utils.h" //if cnd is true get full range key part which is always true //else, get empty key part which is always false @@ -425,7 +427,7 @@ int ObQueryRange::check_cur_expr(const ObRawExpr *cur_expr, ObIArray &o LOG_WARN("failed to check and or", K(ret)); } } - } else if (cur_expr->is_spatial_expr()) { + } else if (cur_expr->is_domain_expr()) { is_valid_expr = true; } else if (IS_BASIC_CMP_OP(cmp_type) || T_OP_NE == cmp_type || T_OP_IS == cmp_type || T_OP_IN == cmp_type || T_OP_NOT_IN == cmp_type) { @@ -771,7 +773,7 @@ int ObQueryRange::preliminary_extract_query_range(const ColumnIArray &range_colu if (OB_ISNULL(cur_expr)) { // continue } else if (OB_FAIL(preliminary_extract(cur_expr, temp_result, dtc_params, - T_OP_IN == cur_expr->get_expr_type()))) { + T_OP_IN == cur_expr->get_expr_type() || is_single_domain_op(cur_expr)))) { LOG_WARN("Generate table range failed", K(ret)); } else if (NULL == temp_result) { // ignore the condition from which we can not extract query range @@ -1249,7 +1251,7 @@ bool ObQueryRange::is_precise_get(const ObKeyPart &key_part_head, is_terminated = true; } else if (NULL != cur->or_next_ || NULL != cur->item_next_) { is_precise_get = false; - } else if (cur->is_like_key() || cur->is_geo_key()) { + } else if (cur->is_like_key() || cur->is_domain_key()) { is_precise_get = false; } else if (!cur->is_equal_condition()) { is_precise_get = false; @@ -1949,7 +1951,7 @@ int ObQueryRange::get_normal_cmp_keypart(ObItemType cmp_type, return ret; } -int ObQueryRange::get_geo_single_keypart(const ObObj &val_start, const ObObj &val_end, ObKeyPart &out_keypart) const +int ObQueryRange::get_domain_equal_keypart(const ObObj &val_start, const ObObj &val_end, ObKeyPart &out_keypart) const { int ret = OB_SUCCESS; if (OB_FAIL(out_keypart.create_normal_key())) { @@ -3611,7 +3613,7 @@ int ObQueryRange::pre_extract_const_op(const ObRawExpr *c_expr, return ret; } -int ObQueryRange::get_dwithin_item(const ObRawExpr *expr, const ObConstRawExpr *&extra_item) +int ObQueryRange::get_extra_param(const ObRawExpr *expr, const ObConstRawExpr *&extra_item) { int ret = OB_SUCCESS; if (expr->get_param_count() != 3) { @@ -3626,7 +3628,6 @@ int ObQueryRange::get_dwithin_item(const ObRawExpr *expr, const ObConstRawExpr * } return ret; } - int ObQueryRange::set_geo_keypart_whole_range(ObKeyPart &out_key_part) { int ret = OB_SUCCESS; @@ -3665,7 +3666,7 @@ int ObQueryRange::pre_extract_geo_op(const ObOpRawExpr *geo_expr, const ObConstRawExpr *extra_item = NULL; const ObRawExpr *const_item = NULL; const ObColumnRefRawExpr *column_item = NULL; - common::ObGeoRelationType op_type; + common::ObDomainOpType op_type; if (OB_ISNULL(l_expr) || OB_ISNULL(r_expr)) { GET_ALWAYS_TRUE_OR_FALSE(true, out_key_part); } else if (l_expr->has_flag(CNT_COLUMN) && r_expr->has_flag(CNT_COLUMN)) { @@ -3680,8 +3681,8 @@ int ObQueryRange::pre_extract_geo_op(const ObOpRawExpr *geo_expr, } else if (l_expr->has_flag(CNT_COLUMN)) { column_item = ObRawExprUtils::get_column_ref_expr_recursively(l_expr); const_item = r_expr; - op_type = (ObGeoRelationType::T_COVERS == op_type ? ObGeoRelationType::T_COVEREDBY : - (ObGeoRelationType::T_COVEREDBY == op_type ? ObGeoRelationType::T_COVERS : op_type)); + op_type = (ObDomainOpType::T_GEO_COVERS == op_type ? ObDomainOpType::T_GEO_COVEREDBY : + (ObDomainOpType::T_GEO_COVEREDBY == op_type ? ObDomainOpType::T_GEO_COVERS : op_type)); } else { ret = OB_ERR_UNEXPECTED; LOG_WARN("failed to find column item", K(ret), KPC(r_expr), KPC(l_expr)); @@ -3695,7 +3696,7 @@ int ObQueryRange::pre_extract_geo_op(const ObOpRawExpr *geo_expr, if (OB_SUCC(ret)) { bool is_cellid_col = false; uint64_t column_id = column_item->get_column_id(); - ObGeoColumnInfo column_info; + ObGeoColumnInfo column_info; if (OB_FAIL(columnId_map_.get_refactored(column_id, column_info))) { if (OB_NOT_INIT == ret || OB_HASH_NOT_EXIST == ret) { ret = OB_SUCCESS; @@ -3727,8 +3728,8 @@ int ObQueryRange::pre_extract_geo_op(const ObOpRawExpr *geo_expr, contain_geo_filters_ = true; out_key_part->id_ = key_part_id; out_key_part->pos_ = *key_part_pos; - if (op_type == ObGeoRelationType::T_DWITHIN) { - if (OB_FAIL(get_dwithin_item(expr, extra_item))) { + if (op_type == ObDomainOpType::T_GEO_DWITHIN) { + if (OB_FAIL(get_extra_param(expr, extra_item))) { LOG_WARN("failed to get dwithin item", K(ret)); } } @@ -3741,11 +3742,11 @@ int ObQueryRange::pre_extract_geo_op(const ObOpRawExpr *geo_expr, LOG_WARN("failed to get calculable expr val", K(ret)); } else if (!is_valid) { GET_ALWAYS_TRUE_OR_FALSE(true, out_key_part); - } else if (OB_FAIL(out_key_part->create_geo_key())) { - LOG_WARN("create like geo part failed", K(ret)); + } else if (OB_FAIL(out_key_part->create_domain_key())) { + LOG_WARN("create domain part failed", K(ret)); } else if (extra_item != NULL && OB_FAIL(get_calculable_expr_val(extra_item, - out_key_part->geo_keypart_->distance_, + out_key_part->domain_keypart_->extra_param_, is_valid))) { LOG_WARN("failed to get calculable expr val", K(ret)); } else if (!is_valid) { @@ -3753,11 +3754,11 @@ int ObQueryRange::pre_extract_geo_op(const ObOpRawExpr *geo_expr, } else { if (!const_item->is_immutable_const_expr()) { ObObj val; - out_key_part->geo_keypart_->geo_type_ = op_type; - if (OB_FAIL(get_final_expr_val(const_item, out_key_part->geo_keypart_->wkb_))) { + out_key_part->domain_keypart_->domain_op_ = op_type; + if (OB_FAIL(get_final_expr_val(const_item, out_key_part->domain_keypart_->const_param_))) { LOG_WARN("failed to get final expr idx", K(ret)); } else if (extra_item != NULL && - OB_FAIL(get_final_expr_val(extra_item, out_key_part->geo_keypart_->distance_))) { + OB_FAIL(get_final_expr_val(extra_item, out_key_part->domain_keypart_->extra_param_))) { LOG_WARN("failed to get final distance expr idx", K(ret)); } } else { @@ -3774,6 +3775,588 @@ int ObQueryRange::pre_extract_geo_op(const ObOpRawExpr *geo_expr, return ret; } +int ObQueryRange::need_extract_domain_range(const ObOpRawExpr *domain_expr, + ObKeyPart *&out_key_part, + bool& need_extract) +{ + int ret = OB_SUCCESS; + need_extract = false; + if (OB_ISNULL(domain_expr) || OB_ISNULL(query_range_ctx_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("expr is null.", KP(domain_expr), K_(query_range_ctx)); + } else { + const ObRawExpr *expr = ObRawExprUtils::skip_inner_added_expr(domain_expr); + const ObRawExpr *l_expr = expr->get_param_expr(0); + const ObRawExpr *r_expr = expr->get_param_expr(1); + if (OB_ISNULL(l_expr) || OB_ISNULL(r_expr)) { + GET_ALWAYS_TRUE_OR_FALSE(true, out_key_part); + } else if (l_expr->has_flag(IS_COLUMN) && r_expr->has_flag(IS_COLUMN)) { + need_extract = false; + GET_ALWAYS_TRUE_OR_FALSE(true, out_key_part); + } else if (l_expr->has_flag(IS_DYNAMIC_PARAM) && r_expr->has_flag(IS_DYNAMIC_PARAM)) { + need_extract = false; + GET_ALWAYS_TRUE_OR_FALSE(true, out_key_part); + } else { + need_extract = true; + } + } + return ret; +} + +int ObQueryRange::reset_domain_op_by_param_pos(const bool is_left_column, ObDomainOpType & op_type) +{ + int ret = OB_SUCCESS; + if (is_left_column) { + op_type = (ObDomainOpType::T_GEO_COVERS == op_type ? ObDomainOpType::T_GEO_COVEREDBY : + (ObDomainOpType::T_GEO_COVEREDBY == op_type ? ObDomainOpType::T_GEO_COVERS : op_type)); + } + return ret; +} + +bool ObQueryRange::need_extra_param(ObDomainOpType& op_type) +{ + bool ret_bool = false; + if (op_type == ObDomainOpType::T_GEO_DWITHIN) { + ret_bool = true; + } + return ret_bool; +} + +// is single in or single domain op +bool ObQueryRange::is_single_op(const ObRawExpr *expr_root) +{ + bool ret_bool = false; + if (T_OP_IN == expr_root->get_expr_type()) { + ret_bool = true; + } else if (expr_root->is_domain_expr()) { + ret_bool = true; + } + return ret_bool; +} + +// op that be rewritten as normal key-part, need to check if can cast +bool ObQueryRange::can_domain_be_extract_range(const ObDomainOpType &op_type, const ObExprResType &col_type, + const ObExprCalcType &res_type, common::ObObjType data_type, + bool &always_true) +{ + bool bret = true; + if (op_type == ObDomainOpType::T_JSON_MEMBER_OF || + op_type == ObDomainOpType::T_JSON_CONTAINS || + op_type == ObDomainOpType::T_JSON_OVERLAPS) { + if (res_type.is_json() && col_type.is_json()) { + bret = true; + always_true = true; + } else { + bret = can_be_extract_range(ObItemType::T_OP_EQ, col_type, res_type, data_type, always_true); + } + } + return bret; +} + +// is single domain op +bool ObQueryRange::is_single_domain_op(const ObRawExpr *expr_root) +{ + bool ret_bool = false; + if (expr_root->is_domain_expr()) { + ret_bool = true; + } + return ret_bool; +} + +int ObQueryRange::init_domain_key_part(const ObRawExpr *expr, ObKeyPart *&out_key_part, + ObDomainOpType &op_type, bool& need_extract) +{ + int ret = OB_SUCCESS; + need_extract = false; + if (OB_ISNULL(expr) || OB_ISNULL(query_range_ctx_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("expr is null.", KP(expr)); + } else { + const ObRawExpr *l_expr = expr->get_param_expr(0); + const ObRawExpr *r_expr = expr->get_param_expr(1); + // only expr with three param need to init extra_param, most only have domain_param and const param + const ObConstRawExpr *extra_param = nullptr; + const ObRawExpr *const_param = nullptr; + const ObColumnRefRawExpr *column_param = nullptr; + bool is_left_column = false; + if (OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(l_expr, l_expr))) { + LOG_WARN("failed to get expr without lossless cast", K(ret)); + } else if (OB_FAIL(ObOptimizerUtil::get_expr_without_lossless_cast(r_expr, r_expr))) { + LOG_WARN("failed to get expr without lossless cast", K(ret)); + } else if (OB_UNLIKELY(r_expr->has_flag(CNT_COLUMN))) { + column_param = ObRawExprUtils::get_column_ref_expr_recursively(r_expr); + const_param = l_expr; + } else if (l_expr->has_flag(CNT_COLUMN)) { + column_param = ObRawExprUtils::get_column_ref_expr_recursively(l_expr); + const_param = r_expr; + is_left_column = true; + } else { + // there must be domain column + query_range_ctx_->cur_expr_is_precise_ = false; + GET_ALWAYS_TRUE_OR_FALSE(true, out_key_part); + } + + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(const_param)) { + } else if (OB_FAIL(reset_domain_op_by_param_pos(is_left_column, op_type))) { + LOG_WARN("failed to reset domain op by param pos", K(is_left_column), K(op_type), K(ret)); + } else if (need_extra_param(op_type) && OB_FAIL(get_extra_param(expr, extra_param))) { + LOG_WARN("failed to find extra param", K(ret), KPC(r_expr), KPC(l_expr)); + } else { + bool is_domain_col = false; + uint64_t column_id = column_param->get_column_id(); + ObGeoColumnInfo column_info; + if (OB_FAIL(columnId_map_.get_refactored(column_id, column_info))) { + if (OB_NOT_INIT == ret || OB_HASH_NOT_EXIST == ret) { + ret = OB_SUCCESS; + } else { + LOG_WARN("failed to get from columnId_map_", K(ret)); + } + } else { + is_domain_col = true; + } + if (OB_SUCC(ret)) { + if (!const_param->is_immutable_const_expr()) { + query_range_ctx_->need_final_extract_ = true; + } + ObKeyPartId key_part_id(column_param->get_table_id(), + is_domain_col ? column_info.cellid_columnId_ : column_id); + ObKeyPartPos *key_part_pos = nullptr; + bool b_is_key_part = false; + if (OB_FAIL(is_key_part(key_part_id, key_part_pos, b_is_key_part))) { + LOG_WARN("is_key_part failed", K(ret)); + } else if (!b_is_key_part || OB_UNLIKELY(!const_param->is_const_expr())) { + need_extract = false; + GET_ALWAYS_TRUE_OR_FALSE(true, out_key_part); + } else if (OB_ISNULL(key_part_pos)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null key part pos"); + } else if (OB_ISNULL((out_key_part = create_new_key_part()))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_ERROR("alloc memory failed", K(ret)); + } else { + // for gis, need to set contain_geo_filters_ = true; + out_key_part->id_ = key_part_id; + out_key_part->pos_ = *key_part_pos; + ObObj const_val; + bool is_valid = true; + bool always_true = true; + query_range_ctx_->cur_expr_is_precise_ = false; + if (OB_FAIL(ret)) { + // do nothing + } else if (!can_domain_be_extract_range(op_type, key_part_pos->column_type_, column_param->get_result_type().get_obj_meta(), + const_param->get_result_type().get_type(), always_true)) { + need_extract = false; + GET_ALWAYS_TRUE_OR_FALSE(always_true, out_key_part); + } else if (OB_FAIL(get_calculable_expr_val(const_param, const_val, is_valid))) { + LOG_WARN("failed to get calculable expr val", K(ret)); + } else if (!is_valid) { + // not const, can't extract query range + need_extract = false; + GET_ALWAYS_TRUE_OR_FALSE(true, out_key_part); + } else if (OB_FAIL(out_key_part->create_domain_key())) { + LOG_WARN("create domain part failed", K(ret)); + } else if (OB_FALSE_IT(out_key_part->domain_keypart_->const_param_ = const_val) + || OB_FALSE_IT(out_key_part->domain_keypart_->domain_op_ = op_type)) { + } else if (extra_param != NULL && + OB_FAIL(get_calculable_expr_val(extra_param, + out_key_part->domain_keypart_->extra_param_, + is_valid))) { + LOG_WARN("failed to get calculable expr val", K(ret)); + } else if (!is_valid) { + // not const, can't extract query range + need_extract = false; + GET_ALWAYS_TRUE_OR_FALSE(true, out_key_part); + } else if (!const_param->is_immutable_const_expr()) { + // need final extract, just record const value in key part + need_extract = false; + ObObj val; + if (OB_FAIL(get_final_expr_val(const_param, out_key_part->domain_keypart_->const_param_))) { + LOG_WARN("failed to get final expr idx", K(ret)); + } else if (extra_param != NULL && + OB_FAIL(get_final_expr_val(extra_param, out_key_part->domain_keypart_->extra_param_))) { + LOG_WARN("failed to get final distance expr idx", K(ret)); + } + } else { + need_extract = true; + } + } + } // if ob_fail + } + } + return ret; +} + +int ObQueryRange::get_member_of_keyparts(const common::ObObj &const_param, ObKeyPart *&out_key_part, const ObDataTypeCastParams &dtc_params) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(out_key_part)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid argument", K(ret)); + } else { + int64_t cmp = 0; + ObObj cast_obj = const_param; + if (OB_FAIL(ObKeyPart::try_cast_value(dtc_params, allocator_, out_key_part->pos_, cast_obj, cmp))) { + LOG_WARN("failed to try cast value type", K(ret)); + } else if (cmp != 0) { + if (OB_NOT_NULL(query_range_ctx_)) { + query_range_ctx_->cur_expr_is_precise_ = false; + } + GET_ALWAYS_TRUE_OR_FALSE(true, out_key_part); + } else if (OB_FAIL(get_domain_equal_keypart(cast_obj, cast_obj, *out_key_part))) { + LOG_WARN("get normal cmp keypart failed", K(ret)); + } + } + return ret; +} + +int ObQueryRange::get_json_array_in_keyparts(ObIJsonBase* j_base, ObKeyPart *&out_key_part, + ObExecContext *exec_ctx, const ObDataTypeCastParams &dtc_params) +{ + int ret = OB_SUCCESS; + ObKeyPartPos *key_pos = nullptr; + bool b_key_part = false; + InParamMeta *new_param_meta = NULL; + ObExprResType col_res_type; + uint64_t table_id; + if (OB_ISNULL(out_key_part) || OB_ISNULL(j_base)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("should not be null", K(ret)); + } else if (OB_FALSE_IT(key_pos = &out_key_part->pos_) || OB_FALSE_IT(table_id = out_key_part->id_.table_id_) + || OB_FALSE_IT(col_res_type = out_key_part->pos_.column_type_)) { + } else if (OB_FAIL(out_key_part->create_in_key())) { + LOG_WARN("failed to create in key part", K(ret)); + } else if (OB_ISNULL(out_key_part->in_keypart_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (OB_FAIL(out_key_part->in_keypart_->offsets_.push_back(key_pos->offset_))) { + LOG_WARN("failed to push back key offset", K(ret)); + } else if (OB_ISNULL(new_param_meta = out_key_part->in_keypart_->create_param_meta(allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to create new param meta", K(ret)); + } else { + out_key_part->in_keypart_->table_id_ = table_id; + out_key_part->in_keypart_->is_strict_in_ = true; + new_param_meta->pos_ = *key_pos; + bool always_true = false; + contain_in_ = true; + for (int64_t i = 0; OB_SUCC(ret) && !always_true && i < j_base->element_count(); ++i) { + ObIJsonBase* tmp_j_base = nullptr; + int64_t cmp = 0; + ObObj val; + if (OB_FAIL(j_base->get_array_element(i, tmp_j_base))) { + LOG_WARN("fail to get json array element", K(i), K(ret)); + } else if (OB_ISNULL(tmp_j_base)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get json array element result is null.", K(i), K(ret)); + } else if (OB_FAIL(ObJsonUtil::cast_json_scalar_to_sql_obj(&allocator_, exec_ctx, tmp_j_base, + col_res_type, val))) { + GET_ALWAYS_TRUE_OR_FALSE(true, out_key_part); + always_true = true; + } else if (OB_FAIL(ObKeyPart::try_cast_value(dtc_params, allocator_, *key_pos, val, cmp))) { + LOG_WARN("failed to try cast value type", K(ret)); + } else if (cmp != 0) { + GET_ALWAYS_TRUE_OR_FALSE(true, out_key_part); + always_true = true; + } else { + val.set_collation_type(col_res_type.get_collation_type()); + } + if (OB_SUCC(ret) && OB_FAIL(new_param_meta->vals_.push_back(val))) { + LOG_WARN("failed to push back val", K(ret)); + } + } + if (OB_SUCC(ret) && !always_true) { + if (OB_UNLIKELY(new_param_meta->vals_.empty())) { + // all always false + GET_ALWAYS_TRUE_OR_FALSE(false, out_key_part); + } else if (OB_FAIL(out_key_part->in_keypart_->in_params_.push_back(new_param_meta))) { + LOG_WARN("failed to push back param meta", K(ret)); + } else if (OB_FAIL(out_key_part->formalize_keypart(contain_row_))) { + LOG_WARN("failed to formalize in key", K(ret)); + } + } + } + return ret; +} + +int ObQueryRange::get_json_array_keyparts(ObIJsonBase* j_base, ObIArray &key_parts, + ObKeyPart *&out_key_part, ObExecContext *exec_ctx, + const ObDataTypeCastParams &dtc_params) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(j_base) || OB_ISNULL(out_key_part) || OB_ISNULL(exec_ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid argument", K(ret)); + } else if (j_base->json_type() != common::ObJsonNodeType::J_ARRAY) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get wrong json_type", K(ret)); + } else { + for (int i = 0; i < j_base->element_count(); ++i) { + ObKeyPart *tmp_key_part = nullptr; + ObIJsonBase* tmp_j_base = nullptr; + if (OB_FAIL(j_base->get_array_element(i, tmp_j_base))) { + LOG_WARN("fail to get json array element", K(i), K(ret)); + } else if (OB_ISNULL(tmp_j_base)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get json array element result is null", K(i), K(ret)); + } else if (OB_ISNULL(tmp_key_part = create_new_key_part())) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc memory failed", K(ret)); + } else { + ObObj val; + tmp_key_part->id_ = out_key_part->id_; + tmp_key_part->pos_ = out_key_part->pos_; + if (OB_FAIL(ObJsonUtil::cast_json_scalar_to_sql_obj(&allocator_, exec_ctx, tmp_j_base, + tmp_key_part->pos_.column_type_, val))) { + if (OB_NOT_NULL(query_range_ctx_)) { + query_range_ctx_->cur_expr_is_precise_ = false; + } + } else if (OB_FAIL(get_member_of_keyparts(val, tmp_key_part, dtc_params))) { + LOG_WARN("fail to get member of keyparts", K(ret)); + } else if (OB_FAIL(key_parts.push_back(tmp_key_part))) { + LOG_WARN("fail to push keypart", K(ret)); + } + } + } + } + return ret; +} + +int ObQueryRange::get_contain_or_overlaps_keyparts(const common::ObObj &const_param, const common::ObDomainOpType op_type, + ObIArray &key_parts, ObKeyPart *&out_key_part, + ObExecContext *exec_ctx, const ObDataTypeCastParams &dtc_params, + bool is_single_op) +{ + int ret = OB_SUCCESS; + ObIJsonBase* j_base = nullptr; + ObEvalCtx ctx(*exec_ctx); + if (OB_ISNULL(out_key_part)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid argument", K(ret)); + } else if (OB_FAIL(ObJsonExprHelper::get_json_val(const_param, exec_ctx, false, &allocator_, j_base))) { + LOG_WARN("fail to get json val", K(ret)); + } else if (OB_ISNULL(j_base)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("fail to get json base", K(ret)); + } else if (j_base->is_json_scalar(j_base->json_type())) { + // if is scalar, equal to member of + if (OB_FAIL(get_member_of_keyparts(const_param, out_key_part, dtc_params))) { + LOG_WARN("fail to get member of keyparts", K(ret)); + } + } else if (j_base->json_type() == common::ObJsonNodeType::J_ARRAY) { + int size = j_base->element_count(); + if (size > MAX_JSON_ARRAY_CHANGE_TO_OR_SIZE && is_single_op + && (OB_NOT_NULL(query_range_ctx_) && query_range_ctx_->use_in_optimization_)) { + if (OB_FAIL(get_json_array_in_keyparts(j_base, out_key_part, exec_ctx, dtc_params))) { + LOG_WARN("fail to get json_array in keyparts", K(ret)); + } + } else if (OB_FAIL(get_json_array_keyparts(j_base, key_parts, out_key_part, exec_ctx, dtc_params))) { + LOG_WARN("fail to get json_array keyparts", K(ret)); + } + } else { + // must be object, can't extract query range + GET_ALWAYS_TRUE_OR_FALSE(true, out_key_part); + } + return ret; +} + +int ObQueryRange::get_simple_domain_keyparts(const common::ObObj &const_param, const common::ObDomainOpType op_type, + ObIArray &key_parts, ObKeyPart *&out_key_part, + ObExecContext *exec_ctx, const ObDataTypeCastParams &dtc_params, + bool is_single_op) +{ + int ret = OB_SUCCESS; + switch(op_type) { + case ObDomainOpType::T_JSON_MEMBER_OF: { + if (OB_FAIL(get_member_of_keyparts(const_param, out_key_part, dtc_params))) { + LOG_WARN("fail to get member of keyparts.", K(op_type), K(ret)); + } else if (OB_NOT_NULL(query_range_ctx_) && !(out_key_part->is_always_false() || out_key_part->is_always_true())) { + query_range_ctx_->cur_expr_is_precise_ = true; + } + break; + } + case ObDomainOpType::T_JSON_CONTAINS: + case ObDomainOpType::T_JSON_OVERLAPS: { + if (OB_FAIL(get_contain_or_overlaps_keyparts(const_param, op_type, key_parts, out_key_part, exec_ctx, dtc_params, is_single_op))) { + LOG_WARN("fail to get member of keyparts.", K(op_type), K(ret)); + } + break; + } + default: { + ret = OB_NOT_SUPPORTED; + break; + } + } + return ret; +} + +int ObQueryRange::connect_domain_keyparts(const common::ObDomainOpType op_type, ObIArray &key_parts, + ObKeyPart *&out_key_part, ObExecContext *exec_ctx, + const ObDataTypeCastParams &dtc_params) +{ + int ret = OB_SUCCESS; + switch(op_type) { + // connect by or + case ObDomainOpType::T_JSON_CONTAINS: + case ObDomainOpType::T_JSON_OVERLAPS: { + if (OB_FAIL(connect_and_or_domain_keyparts(key_parts, out_key_part, exec_ctx, dtc_params, false))) { + LOG_WARN("fail to get connect keyparts.", K(op_type), K(ret)); + } + break; + } + // connect by and + default: { + ret = OB_NOT_SUPPORTED; + break; + } + } + return ret; +} + +int ObQueryRange::connect_and_or_domain_keyparts(ObIArray &key_parts, ObKeyPart *&out_key_part, + ObExecContext *exec_ctx, const ObDataTypeCastParams &dtc_params, + bool is_and_op) +{ + int ret = OB_SUCCESS; + bool cur_expr_is_precise = false; + ObKeyPartList key_part_list; + for (int64_t i = 0; OB_SUCC(ret) && i < key_parts.count(); ++i) { + ObKeyPart *tmp = key_parts.at(i); + if (OB_NOT_NULL(query_range_ctx_)) { + query_range_ctx_->cur_expr_is_precise_ = false; + cur_expr_is_precise = true; + } + if (is_and_op) { + if (OB_FAIL(add_and_item(key_part_list, tmp))) { + LOG_WARN("push back failed", K(ret)); + } + } else { //T_OP_OR + if (OB_FAIL(add_or_item(key_part_list, tmp))) { + LOG_WARN("push back failed", K(ret)); + } + } + if (OB_SUCC(ret) && cur_expr_is_precise) { + cur_expr_is_precise = (cur_expr_is_precise && query_range_ctx_->cur_expr_is_precise_); + } + } + if (OB_SUCC(ret)) { + if (OB_NOT_NULL(query_range_ctx_)) { + query_range_ctx_->cur_expr_is_precise_ = cur_expr_is_precise; + } + ObSqlBitSet<> key_offsets; + ObKeyPart *tmp_key_part = nullptr; + if (is_and_op) { + if (OB_FAIL(and_range_graph(key_part_list, tmp_key_part))) { + LOG_WARN("and range graph failed", K(ret)); + } + } else if (OB_FAIL(or_range_graph(key_part_list, exec_ctx, tmp_key_part, dtc_params))) { + LOG_WARN("or range graph failed", K(ret)); + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(refine_large_range_graph(tmp_key_part))) { + LOG_WARN("failed to refine large range graph", K(ret)); + } else if (OB_FAIL(remove_useless_range_graph(tmp_key_part, key_offsets))) { + LOG_WARN("failed to remove useless range", K(ret)); + } else if (OB_FAIL(overwrite_keypart(out_key_part, tmp_key_part))) { + LOG_WARN("fail to copy keypart node", K(ret)); + } + } + return ret; +} + +// int ObQueryRange::replace_unknown_value(ObKeyPart *root) +// final 阶段传入的keypart是指针,而非指针的引用,因此只能把最终的结果覆盖到root上,而非直接修改root的指针。 +int ObQueryRange::overwrite_keypart(ObKeyPart* out_key_part, ObKeyPart* other) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(other) || OB_ISNULL(out_key_part)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid argument", K(ret), KP(other), KP(out_key_part)); + } else if (OB_FAIL(out_key_part->deep_node_copy(*other))) { + LOG_WARN("get invalid argument", K(ret)); + } else { + out_key_part->item_next_ = other->item_next_; + out_key_part->or_next_ = other->or_next_; + out_key_part->and_next_ = other->and_next_; + } + return ret; +} + +int ObQueryRange::get_simple_domain_range(const common::ObObj &const_param, const common::ObDomainOpType op_type, + ObKeyPart *&out_key_part, ObExecContext *exec_ctx, + const ObDataTypeCastParams &dtc_params, bool is_single_op) +{ + int ret = OB_SUCCESS; + ObSEArray key_parts; + if (OB_FAIL(get_simple_domain_keyparts(const_param, op_type, key_parts, out_key_part, exec_ctx, dtc_params, is_single_op))) { + LOG_WARN("fail to get simple domain keyparts.", K(op_type), K(ret)); + } else if (key_parts.count() == 0) { + // do nothing, already set result in out_key_part + } else if (key_parts.count() == 1 && OB_FAIL(overwrite_keypart(out_key_part, key_parts.at(0)))) { + LOG_WARN("fail to overwrite keypart node", K(ret)); + } else if (OB_FAIL(connect_domain_keyparts(op_type, key_parts, out_key_part, exec_ctx, dtc_params))) { + LOG_WARN("fail to connect domain keyparts", K(ret)); + } + return ret; +} + +int ObQueryRange::get_complex_domain_range(const common::ObObj &const_param, const common::ObDomainOpType op_type, + ObKeyPart *&out_key_part, const ObDataTypeCastParams &dtc_params) +{ + int ret = OB_SUCCESS; + // todo + GET_ALWAYS_TRUE_OR_FALSE(true, out_key_part); + return ret; +} + +int ObQueryRange::get_domain_range(const common::ObObj &const_param, const common::ObDomainOpType op_type, + ObKeyPart *&out_key_part, ObExecContext *exec_ctx, + const ObDataTypeCastParams &dtc_params, bool is_single_op) +{ + int ret = OB_SUCCESS; + if (is_simple_domain_op(op_type)) { + //ObSEArray pre_key_parts; + if (OB_FAIL(get_simple_domain_range(const_param, op_type, out_key_part, exec_ctx, dtc_params, is_single_op))) { + LOG_WARN("fail to get simple domain range.", K(op_type), K(ret)); + } + } else { + // get_range_by_type + // 会用不同的连接方式,然后对于每一部分相同的连接方式可以调用 + if (OB_FAIL(get_complex_domain_range(const_param, op_type, out_key_part, dtc_params))) { + LOG_WARN("fail to get simple complex range.", K(op_type), K(ret)); + } + } + return ret; +} + +int ObQueryRange::pre_extract_domain_op(const ObOpRawExpr *domain_expr, + ObKeyPart *&out_key_part, + const ObDataTypeCastParams &dtc_params, + const bool is_single_op) +{ + UNUSED(dtc_params); + int ret = OB_SUCCESS; + bool need_extract = false; + if (OB_FAIL(need_extract_domain_range(domain_expr, out_key_part, need_extract))) { + LOG_WARN("fail to check domain expr type.", KP(domain_expr), K_(query_range_ctx)); + } else if (need_extract) { + const ObRawExpr *expr = ObRawExprUtils::skip_inner_added_expr(domain_expr); + ObDomainOpType op_type = get_domain_op_type(expr->get_expr_type()); + if (OB_FAIL(init_domain_key_part(expr, out_key_part, op_type, need_extract))) { + LOG_WARN("fail to init domain_key_part.", K(op_type)); + } else if (!need_extract) { + } else if (OB_ISNULL(out_key_part) || !out_key_part->is_domain_key() || OB_ISNULL(query_range_ctx_)) { + ret = OB_ERR_UNEXPECTED; + } else if (OB_FAIL(get_domain_range(out_key_part->domain_keypart_->const_param_, + out_key_part->domain_keypart_->domain_op_, + out_key_part, query_range_ctx_->exec_ctx_, + dtc_params, is_single_op))) { + LOG_WARN("create domain range failed", K(ret)); + } + } + return ret; +} + // For each index, preliminary extract query range, // the result may contain prepared '?' expression. // If prepared '?' expression exists, final extract action is needed @@ -3837,6 +4420,10 @@ int ObQueryRange::preliminary_extract(const ObRawExpr *node, if (OB_FAIL(pre_extract_geo_op(b_expr, out_key_part, dtc_params))) { LOG_WARN("extract and_or failed", K(ret)); } + } else if (node->is_domain_expr()) { + if (OB_FAIL(pre_extract_domain_op(b_expr, out_key_part, dtc_params, is_single_in))) { + LOG_WARN("extract and_or failed", K(ret)); + } } else { query_range_ctx_->cur_expr_is_precise_ = false; GET_ALWAYS_TRUE_OR_FALSE(true, out_key_part); @@ -7473,15 +8060,25 @@ int ObQueryRange::replace_unknown_value(ObKeyPart *root, ObExecContext &exec_ctx root->normal_keypart_->include_end_ = false; } } else if (root->is_geo_key()) { - if (OB_FAIL(get_result_value_with_rowid(*root, root->geo_keypart_->wkb_, exec_ctx, is_inconsistent_rowid))) { + if (OB_FAIL(get_result_value_with_rowid(*root, root->domain_keypart_->const_param_, exec_ctx, is_inconsistent_rowid))) { LOG_WARN("get param wkb value failed", K(ret)); - } else if (OB_FAIL(get_result_value_with_rowid(*root, root->geo_keypart_->distance_, exec_ctx, is_inconsistent_rowid))) { + } else if (OB_FAIL(get_result_value_with_rowid(*root, root->domain_keypart_->extra_param_, exec_ctx, is_inconsistent_rowid))) { LOG_WARN("get param distance value failed", K(ret)); - } else if (OB_FAIL(get_geo_range(root->geo_keypart_->wkb_, - root->geo_keypart_->geo_type_, + } else if (OB_FAIL(get_geo_range(root->domain_keypart_->const_param_, + root->domain_keypart_->domain_op_, root))) { LOG_WARN("get geo range failed", K(ret)); } + } else if (root->is_domain_key()) { + if (OB_FAIL(get_result_value_with_rowid(*root, root->domain_keypart_->const_param_, exec_ctx, is_inconsistent_rowid))) { + LOG_WARN("get param wkb value failed", K(ret)); + } else if (OB_FAIL(get_result_value_with_rowid(*root, root->domain_keypart_->extra_param_, exec_ctx, is_inconsistent_rowid))) { + LOG_WARN("get param distance value failed", K(ret)); + } else if (OB_FAIL(get_domain_range(root->domain_keypart_->const_param_, + root->domain_keypart_->domain_op_, + root, &exec_ctx, dtc_params))) { + LOG_WARN("get geo range failed", K(ret)); + } } else if (root->is_like_key()) { if (OB_FAIL(get_result_value_with_rowid(*root, root->like_keypart_->pattern_, exec_ctx, is_inconsistent_rowid))) { LOG_WARN("get result value failed", K(ret)); @@ -8425,13 +9022,13 @@ inline bool ObQueryRange::is_standard_graph(const ObKeyPart *root) const bret = false; } else { for (const ObKeyPart *cur = root; bret && NULL != cur; cur = cur->and_next_) { - if (NULL != cur->or_next_ || cur->is_like_key() || cur->is_in_key() || cur->is_geo_key()) { + if (NULL != cur->or_next_ || cur->is_like_key() || cur->is_in_key() || cur->is_domain_key()) { bret = false; } else { for (const ObKeyPart *item_next = cur->item_next_; bret && NULL != item_next; item_next = item_next->item_next_) { - if (item_next->is_like_key() || item_next->is_in_key() || item_next->is_geo_key()) { + if (item_next->is_like_key() || item_next->is_in_key() || item_next->is_domain_key()) { bret = false; } } @@ -8929,35 +9526,74 @@ DEF_TO_STRING(ObQueryRange::ObRangeExprItem) return pos; } -common::ObGeoRelationType ObQueryRange::get_geo_relation(ObItemType type) const +common::ObDomainOpType ObQueryRange::get_geo_relation(ObItemType type) const { - common::ObGeoRelationType rel_type = common::ObGeoRelationType::T_INVALID; + common::ObDomainOpType rel_type = common::ObDomainOpType::T_INVALID; switch (type) { case T_FUN_SYS_PRIV_ST_EQUALS : case T_FUN_SYS_PRIV_ST_TOUCHES : case T_FUN_SYS_ST_INTERSECTS : { - rel_type = common::ObGeoRelationType::T_INTERSECTS; + rel_type = common::ObDomainOpType::T_GEO_INTERSECTS; break; } case T_FUN_SYS_ST_CONTAINS : case T_FUN_SYS_ST_COVERS : { - rel_type = common::ObGeoRelationType::T_COVERS; + rel_type = common::ObDomainOpType::T_GEO_COVERS; break; } case T_FUN_SYS_ST_DWITHIN : { - rel_type = common::ObGeoRelationType::T_DWITHIN; + rel_type = common::ObDomainOpType::T_GEO_DWITHIN; break; } case T_FUN_SYS_ST_WITHIN : { - rel_type = common::ObGeoRelationType::T_COVEREDBY; + rel_type = common::ObDomainOpType::T_GEO_COVEREDBY; + break; + } + default: + break; + } + return rel_type; +} + +common::ObDomainOpType ObQueryRange::get_domain_op_type(ObItemType type) const +{ + common::ObDomainOpType rel_type = common::ObDomainOpType::T_INVALID; + switch (type) { + case T_FUN_SYS_JSON_MEMBER_OF : { + rel_type = common::ObDomainOpType::T_JSON_MEMBER_OF; + break; + } + case T_FUN_SYS_JSON_CONTAINS : { + rel_type = common::ObDomainOpType::T_JSON_CONTAINS; + break; + } + case T_FUN_SYS_JSON_OVERLAPS : { + rel_type = common::ObDomainOpType::T_JSON_OVERLAPS; + break; + } + case T_FUN_SYS_ST_INTERSECTS : { + rel_type = common::ObDomainOpType::T_GEO_INTERSECTS; + break; + } + case T_FUN_SYS_ST_CONTAINS : + case T_FUN_SYS_ST_COVERS : { + rel_type = common::ObDomainOpType::T_GEO_COVERS; + break; + } + case T_FUN_SYS_ST_DWITHIN : { + rel_type = common::ObDomainOpType::T_GEO_DWITHIN; + break; + } + case T_FUN_SYS_ST_WITHIN : { + rel_type = common::ObDomainOpType::T_GEO_COVEREDBY; break; } case T_FUN_SYS_ST_CROSSES : { - rel_type = common::ObGeoRelationType::T_INTERSECTS; + rel_type = common::ObDomainOpType::T_GEO_INTERSECTS; break; } case T_FUN_SYS_ST_OVERLAPS : { - rel_type = common::ObGeoRelationType::T_INTERSECTS; + rel_type = common::ObDomainOpType::T_GEO_INTERSECTS; break; } default: @@ -8968,7 +9604,7 @@ common::ObGeoRelationType ObQueryRange::get_geo_relation(ObItemType type) const int ObQueryRange::get_geo_intersects_keypart(uint32_t input_srid, const common::ObString &wkb_str, - const common::ObGeoRelationType op_type, + const common::ObDomainOpType op_type, ObKeyPart *out_key_part) { INIT_SUCC(ret); @@ -8992,9 +9628,9 @@ int ObQueryRange::get_geo_intersects_keypart(uint32_t input_srid, } else if (((input_srid == 0) || !(srs_item->is_geographical_srs())) && OB_FAIL(OTSRS_MGR->get_srs_bounds(input_srid, srs_item, srs_bound))) { LOG_WARN("failed to get srs item", K(ret)); - } else if (op_type == ObGeoRelationType::T_DWITHIN) { - distance = out_key_part->geo_keypart_->distance_.get_double(); - if (out_key_part->geo_keypart_->distance_.is_unknown() || std::isnan(distance)) { + } else if (op_type == ObDomainOpType::T_GEO_DWITHIN) { + distance = out_key_part->domain_keypart_->extra_param_.get_double(); + if (out_key_part->domain_keypart_->extra_param_.is_unknown() || std::isnan(distance)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid distance para", K(ret)); } else if (input_srid != 0 && srs_item->is_geographical_srs()) { @@ -9060,7 +9696,7 @@ int ObQueryRange::get_geo_intersects_keypart(uint32_t input_srid, ObObj val; val.set_uint64(cells_with_ancestors[i]); if (i == 0) { - if (OB_FAIL(get_geo_single_keypart(val, val, *out_key_part))) { + if (OB_FAIL(get_domain_equal_keypart(val, val, *out_key_part))) { LOG_WARN("get normal cmp keypart failed", K(ret)); } } else { @@ -9071,7 +9707,7 @@ int ObQueryRange::get_geo_intersects_keypart(uint32_t input_srid, } else { tmp->id_ = out_key_part->id_; tmp->pos_ = out_key_part->pos_; - if (OB_FAIL(get_geo_single_keypart(val, val, *tmp))) { + if (OB_FAIL(get_domain_equal_keypart(val, val, *tmp))) { LOG_WARN("get normal cmp keypart failed", K(ret)); } else { last->or_next_ = tmp; @@ -9098,7 +9734,7 @@ int ObQueryRange::get_geo_intersects_keypart(uint32_t input_srid, ObObj val_start, val_end; val_start.set_uint64(start_id); val_end.set_uint64(end_id); - if (OB_FAIL(get_geo_single_keypart(val_start, val_end, *tmp))) { + if (OB_FAIL(get_domain_equal_keypart(val_start, val_end, *tmp))) { LOG_WARN("get normal cmp keypart failed", K(ret)); } else { last->or_next_ = tmp; @@ -9118,7 +9754,7 @@ int ObQueryRange::get_geo_intersects_keypart(uint32_t input_srid, int ObQueryRange::get_geo_coveredby_keypart(uint32_t input_srid, const common::ObString &wkb_str, - const common::ObGeoRelationType op_type, + const common::ObDomainOpType op_type, ObKeyPart *out_key_part) { INIT_SUCC(ret); @@ -9198,7 +9834,7 @@ int ObQueryRange::get_geo_coveredby_keypart(uint32_t input_srid, val.set_uint64(cells[i]); cell_head->id_ = out_key_part->id_; cell_head->pos_ = out_key_part->pos_; - if (OB_FAIL(get_geo_single_keypart(val, val, *cell_head))) { + if (OB_FAIL(get_domain_equal_keypart(val, val, *cell_head))) { LOG_WARN("get normal cmp keypart failed", K(ret)); } else { cell_last = cell_head; @@ -9225,7 +9861,7 @@ int ObQueryRange::get_geo_coveredby_keypart(uint32_t input_srid, val.set_uint64(ancestors[i]); tmp->id_ = out_key_part->id_; tmp->pos_ = out_key_part->pos_; - if (OB_FAIL(get_geo_single_keypart(val, val, *tmp))) { + if (OB_FAIL(get_domain_equal_keypart(val, val, *tmp))) { LOG_WARN("get normal cmp keypart failed", K(ret)); } else { cell_last->or_next_ = tmp; @@ -9276,7 +9912,7 @@ int ObQueryRange::get_geo_coveredby_keypart(uint32_t input_srid, val.set_uint64(cells_cover_geo[i]); tmp->id_ = out_key_part->id_; tmp->pos_ = out_key_part->pos_; - if (OB_FAIL(get_geo_single_keypart(val, val, *tmp))) { + if (OB_FAIL(get_domain_equal_keypart(val, val, *tmp))) { LOG_WARN("get normal cmp keypart failed", K(ret)); } else { last->or_next_ = tmp; @@ -9323,7 +9959,7 @@ int ObQueryRange::get_geo_coveredby_keypart(uint32_t input_srid, return ret; } -int ObQueryRange::get_geo_range(const common::ObObj &wkb, const common::ObGeoRelationType op_type, +int ObQueryRange::get_geo_range(const common::ObObj &wkb, const common::ObDomainOpType op_type, ObKeyPart *out_key_part) { INIT_SUCC(ret); @@ -9352,14 +9988,14 @@ int ObQueryRange::get_geo_range(const common::ObObj &wkb, const common::ObGeoRel static_cast(column_info.srid_)); } else { switch (op_type) { - case ObGeoRelationType::T_INTERSECTS: - case ObGeoRelationType::T_COVERS: - case ObGeoRelationType::T_DWITHIN: + case ObDomainOpType::T_GEO_INTERSECTS: + case ObDomainOpType::T_GEO_COVERS: + case ObDomainOpType::T_GEO_DWITHIN: if (OB_FAIL(get_geo_intersects_keypart(input_srid, wkb_str, op_type, out_key_part))) { LOG_WARN("failed to get keypart from intersects_keypart", K(ret), K(op_type)); } break; - case ObGeoRelationType::T_COVEREDBY: + case ObDomainOpType::T_GEO_COVEREDBY: if (OB_FAIL(get_geo_coveredby_keypart(input_srid, wkb_str, op_type, out_key_part))) { LOG_WARN("failed to get keypart from intersects_keypart", K(ret), K(op_type)); } diff --git a/src/sql/rewrite/ob_query_range.h b/src/sql/rewrite/ob_query_range.h index 74a452d85d..9ce8911a0a 100644 --- a/src/sql/rewrite/ob_query_range.h +++ b/src/sql/rewrite/ob_query_range.h @@ -462,7 +462,8 @@ public: int is_get(bool &is_get) const; int is_get(int64_t column_count, bool &is_get) const; bool is_precise_get() const { return table_graph_.is_precise_get_; } - common::ObGeoRelationType get_geo_relation(ObItemType type) const; + common::ObDomainOpType get_geo_relation(ObItemType type) const; + common::ObDomainOpType get_domain_op_type(ObItemType type) const; const common::ObIArray &get_range_exprs() const { return range_exprs_; } const common::ObIArray &get_ss_range_exprs() const { return ss_range_exprs_; } int check_graph_type(ObKeyPart &key_part_head); @@ -485,6 +486,9 @@ public: static bool can_be_extract_range(ObItemType cmp_type, const ObExprResType &col_type, const ObExprCalcType &res_type, common::ObObjType data_type, bool &always_true); + static bool can_domain_be_extract_range(const ObDomainOpType &op_type, const ObExprResType &col_type, + const ObExprCalcType &res_type, common::ObObjType data_type, + bool &always_true); // that mean the query range contain non-standard range graph, // need copy from ObTableScan operator to physical operator context to extract query range @@ -585,16 +589,7 @@ private: int get_normal_cmp_keypart(ObItemType cmp_type, const common::ObObj &val, ObKeyPart &out_keypart) const; - int get_geo_single_keypart(const ObObj &val_start, const ObObj &val_end, ObKeyPart &out_keypart) const; - int get_geo_intersects_keypart(uint32_t input_srid, - const common::ObString &wkb, - const common::ObGeoRelationType op_type, - ObKeyPart *out_key_part); - int get_geo_coveredby_keypart(uint32_t input_srid, - const common::ObString &wkb, - const common::ObGeoRelationType op_type, - ObKeyPart *out_key_part); - int set_geo_keypart_whole_range(ObKeyPart &out_key_part); + int set_domain_normal_keypart(const ObObj &val_start, const ObObj &val_end, ObKeyPart &out_keypart) const; int get_row_key_part(const ObRawExpr *l_expr, const ObRawExpr *r_expr, ObItemType cmp_type, @@ -819,8 +814,92 @@ private: bool is_get_graph(int deepth, ObKeyPart *key_part); int get_like_range(const common::ObObj &pattern, const common::ObObj &escape, ObKeyPart &out_key_part, const ObDataTypeCastParams &dtc_params); - int get_geo_range(const common::ObObj &wkb, const common::ObGeoRelationType op_type, ObKeyPart *out_key_part); +// domain index interface functions + int pre_extract_domain_op(const ObOpRawExpr *domain_expr, + ObKeyPart *&out_key_part, + const ObDataTypeCastParams &dtc_params, + const bool is_single_op); + + // check if only domain op (use in optimizer when is true) + bool is_single_domain_op(const ObRawExpr *expr_root); + // following functions are expected to be used only by domain index: + // check param type, and make sure can be extract query range + int domain_index_check_can_be_extracted(const ObOpRawExpr *domain_expr, ObKeyPart *&out_key_part, bool& need_extract); + // init domain key part: domain_op, obj, and check if need final extract + int domain_index_init_key_part(const ObRawExpr *expr, ObKeyPart *&out_key_part, + ObDomainOpType & op_type, bool& need_extract); + // use for simple predicate, which connect by only one way + int doamain_index_get_simple_range(const ObDomainOpType op_type, ObKeyPart *&out_key_part, + ObExecContext *exec_ctx, const ObDataTypeCastParams &dtc_params, + bool is_single_op); + // use for simple predicate, get all keyparts that need to be connected + int doamain_index_get_keyparts(const ObDomainOpType op_type, ObIArray &key_parts, + ObKeyPart *&out_key_part, ObExecContext *exec_ctx, + const ObDataTypeCastParams &dtc_params, bool is_single_op); + // use for simple predicate, chose connect way: and/or + int doamain_index_connect_keyparts(const ObDomainOpType op_type, ObIArray &key_parts, + ObKeyPart *&out_key_part, ObExecContext *exec_ctx, + const ObDataTypeCastParams &dtc_params); + // use for simple predicate, connect key parts + int doamain_index_connect_and_or(ObIArray &key_parts, ObKeyPart *&out_key_part, + ObExecContext *exec_ctx, const ObDataTypeCastParams &dtc_params, + bool is_and_op); + int domain_index_set_equal_keyparts(const common::ObObj &const_param, + ObKeyPart *&out_key_part, + const ObDataTypeCastParams &dtc_params); + // for domain predicate that need third arg + int domain_get_extra_param(ObDomainOpType& op, const ObRawExpr *expr, const ObConstRawExpr *&extra_item); + int get_json_array_keyparts(ObIJsonBase* j_base, ObIArray &key_parts, + ObKeyPart *&out_key_part, ObExecContext *exec_ctx, + const ObDataTypeCastParams &dtc_params); + int get_json_array_in_keyparts(ObIJsonBase* j_base, ObKeyPart *&out_key_part, + ObExecContext *exec_ctx, const ObDataTypeCastParams &dtc_params); + // functions used for geo predicate + int get_geo_range(const common::ObObj &wkb, const ObDomainOpType op_type, ObKeyPart *out_key_part); int get_dwithin_item(const ObRawExpr *expr, const ObConstRawExpr *&extra_item); + int get_domain_equal_keypart(const ObObj &val_start, const ObObj &val_end, ObKeyPart &out_keypart) const; + int get_geo_intersects_keypart(uint32_t input_srid, + const common::ObString &wkb, + const ObDomainOpType op_type, + ObKeyPart *out_key_part); + int get_geo_coveredby_keypart(uint32_t input_srid, + const common::ObString &wkb, + const ObDomainOpType op_type, + ObKeyPart *out_key_part); + int set_geo_keypart_whole_range(ObKeyPart &out_key_part); + int overwrite_keypart(ObKeyPart* out_key_part, ObKeyPart* src_key_part); + int reset_domain_op_by_param_pos(const bool is_left_column, ObDomainOpType & op_type); + int get_domain_range(const common::ObObj &const_param, const common::ObDomainOpType op_type, + ObKeyPart *&out_key_part, ObExecContext *exec_ctx, + const ObDataTypeCastParams &dtc_params, bool is_single_op = false); + int get_simple_domain_range(const common::ObObj &const_param, const common::ObDomainOpType op_type, + ObKeyPart *&out_key_part, ObExecContext *exec_ctx, + const ObDataTypeCastParams &dtc_params, bool is_single_op); + int get_simple_domain_keyparts(const common::ObObj &const_param, const common::ObDomainOpType op_type, + ObIArray &key_parts, ObKeyPart *&out_key_part, + ObExecContext *exec_ctx, const ObDataTypeCastParams &dtc_params, + bool is_single_op); + int connect_domain_keyparts(const common::ObDomainOpType op_type, ObIArray &key_parts, + ObKeyPart *&out_key_part, ObExecContext *exec_ctx, + const ObDataTypeCastParams &dtc_params); + int connect_and_or_domain_keyparts(ObIArray &key_parts, ObKeyPart *&out_key_part, + ObExecContext *exec_ctx, const ObDataTypeCastParams &dtc_params, + bool is_and_op); + int get_member_of_keyparts(const common::ObObj &const_param, ObKeyPart *&out_key_part, const ObDataTypeCastParams &dtc_params); + int get_contain_or_overlaps_keyparts(const common::ObObj &const_param, const common::ObDomainOpType op_type, + ObIArray &key_parts, ObKeyPart *&out_key_part, + ObExecContext *exec_ctx, const ObDataTypeCastParams &dtc_params, + bool is_single_op); + int get_complex_domain_range(const common::ObObj &const_param, const common::ObDomainOpType op_type, + ObKeyPart *&out_key_part, const ObDataTypeCastParams &dtc_params); + int get_extra_param(const ObRawExpr *expr, const ObConstRawExpr *&extra_item); + int need_extract_domain_range(const ObOpRawExpr *domain_expr, ObKeyPart *&out_key_part, bool& need_extract); + int init_domain_key_part(const ObRawExpr *expr, ObKeyPart *&out_key_part, + ObDomainOpType & op_type, bool& need_extract); + inline bool is_simple_domain_op(const common::ObDomainOpType op_type) { return op_type > common::ObDomainOpType::T_INVALID + || op_type < common::ObDomainOpType::T_DOMAIN_OP_END;} + bool is_single_op(const ObRawExpr *expr_root); + bool need_extra_param(ObDomainOpType & op_type); int get_like_const_range(const ObRawExpr *text, const ObRawExpr *pattern, const ObRawExpr *escape, @@ -909,6 +988,7 @@ private: static const int64_t MAX_RANGE_SIZE_OLD = 10000; static const int64_t MAX_RANGE_SIZE_NEW = 100000; static const int64_t MAX_NOT_IN_SIZE = 10; //do not extract range for not in row over this size + static const int64_t MAX_JSON_ARRAY_CHANGE_TO_OR_SIZE = 10; typedef common::ObObjStore KeyPartStore; private: ObRangeGraph table_graph_; diff --git a/src/sql/rewrite/ob_transform_pre_process.cpp b/src/sql/rewrite/ob_transform_pre_process.cpp index 8f795790d9..539eba4419 100644 --- a/src/sql/rewrite/ob_transform_pre_process.cpp +++ b/src/sql/rewrite/ob_transform_pre_process.cpp @@ -36,6 +36,7 @@ #include "sql/resolver/dml/ob_select_stmt.h" #include "sql/resolver/dml/ob_select_resolver.h" #include "sql/resolver/dml/ob_merge_stmt.h" +#include "sql/resolver/dml/ob_delete_stmt.h" #include "sql/resolver/dml/ob_merge_resolver.h" #include "sql/resolver/dml/ob_update_stmt.h" #include "sql/rewrite/ob_expand_aggregate_utils.h" @@ -317,6 +318,19 @@ int ObTransformPreProcess::transform_one_stmt(common::ObIArray LOG_TRACE("succeed to transform for last_insert_id.",K(is_happened), K(ret)); } } + if (OB_SUCC(ret)) { + if (lib::is_mysql_mode() && stmt->get_match_exprs().count() > 0 && + OB_FAIL(preserve_order_for_fulltext_search(stmt, is_happened))) { + LOG_WARN("failed to preserve order for fulltext search", K(ret)); + } else { + trans_happened |= is_happened; + LOG_TRACE("succeed to transform for preserve order for fulltext search",K(is_happened), K(ret)); + } + } + if (OB_SUCC(ret) && OB_FAIL(disable_complex_dml_for_fulltext_index(stmt))) { + LOG_WARN("disable complex dml for fulltext index", K(ret)); + // jinmao TODO: table scan 能吐出正确的 doc_id 后,可删除此限制 + } if (OB_SUCC(ret)) { LOG_DEBUG("transform pre process succ", K(*stmt)); if (OB_FAIL(stmt->formalize_stmt(ctx_->session_info_))) { @@ -9829,6 +9843,38 @@ int ObTransformPreProcess::check_can_transform_insert_only_merge_into(const ObMe return ret; } +// full-text index queries on a single base table are processed with order preservation. +// (Order is not preserved in multi-table join scenarios.) +int ObTransformPreProcess::preserve_order_for_fulltext_search(ObDMLStmt *stmt, bool& trans_happened) +{ + int ret = OB_SUCCESS; + trans_happened = false; + TableItem *table_item = NULL; + ObMatchFunRawExpr *match_expr = NULL; + if (OB_ISNULL(stmt)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (stmt->get_table_items().count() != 1 || stmt->get_order_item_size() != 0) { + // do nothing + } else if (OB_ISNULL(table_item = stmt->get_table_item(0))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (!table_item->is_basic_table()) { + // do nothing + } else if (OB_FAIL(stmt->get_match_expr_on_table(table_item->table_id_, match_expr))) { + LOG_WARN("failed to get fulltext search expr on table", K(table_item->table_id_), K(ret)); + } else if (OB_ISNULL(match_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else { + OrderItem item(match_expr, default_desc_direction()); + if (OB_FAIL(stmt->add_order_item(item))) { + LOG_WARN("failed to add order item", K(ret), K(item)); + } + } + return ret; +} + int ObTransformPreProcess::preserve_order_for_pagination(ObDMLStmt *stmt, bool &trans_happened) { @@ -10071,5 +10117,74 @@ int ObTransformPreProcess::get_rowkey_for_single_table(ObSelectStmt* stmt, return ret; } +int ObTransformPreProcess::disable_complex_dml_for_fulltext_index(ObDMLStmt *stmt) +{ + int ret = OB_SUCCESS; + ObSEArray tables_to_check; + bool has_table_with_fulltext_index = false; + if (OB_ISNULL(stmt) || OB_ISNULL(ctx_) || OB_ISNULL(ctx_->schema_checker_) || + OB_ISNULL(ctx_->session_info_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (stmt->is_insert_stmt()) { + ObInsertStmt *insert_stmt = static_cast(stmt); + ObInsertTableInfo table_info = insert_stmt->get_insert_table_info(); + if (table_info.is_replace_ || table_info.assignments_.count() != 0) { + TableItem* table = stmt->get_table_item_by_id(table_info.table_id_); + if (OB_FAIL(tables_to_check.push_back(table))) { + LOG_WARN("failed to push back table", K(ret)); + } + } + } else if (stmt->is_delete_stmt() || stmt->is_update_stmt()) { + ObDelUpdStmt *del_upd_stmt = static_cast(stmt); + ObSEArray table_infos; + TableItem* table = NULL; + if (OB_FAIL(del_upd_stmt->get_dml_table_infos(table_infos))) { + LOG_WARN("failed to get dml table infos", K(ret)); + } else if (table_infos.count() == 1) { + if (OB_ISNULL(table_infos.at(0))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (OB_ISNULL(table = stmt->get_table_item_by_id(table_infos.at(0)->table_id_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (!table->is_generated_table() && !table->is_temp_table()) { + // do nothing + } else if (OB_FAIL(tables_to_check.push_back(table))) { + LOG_WARN("failed to push back table", K(ret)); + } + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < table_infos.count(); ++i) { + if (OB_ISNULL(table_infos.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (OB_ISNULL(table = stmt->get_table_item_by_id(table_infos.at(i)->table_id_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (OB_FAIL(tables_to_check.push_back(table))) { + LOG_WARN("failed to push back table", K(ret)); + } + } + } + } + if (OB_FAIL(ret)) { + // do nothing + } else { + for (int64_t i = 0; OB_SUCC(ret) && !has_table_with_fulltext_index && i < tables_to_check.count(); ++i) { + if (OB_FAIL(ObTransformUtils::check_table_with_fulltext_recursively(tables_to_check.at(i), + ctx_->schema_checker_, + ctx_->session_info_, + has_table_with_fulltext_index))) { + LOG_WARN("failed to check table with fulltext recursively", K(ret)); + } else if (has_table_with_fulltext_index) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "complex dml operations on table with fulltext index"); + LOG_WARN("not supported complex dml operations on table with fulltext index", K(ret)); + } + } + } + return ret; +} + } // end namespace sql } // end namespace oceanbase diff --git a/src/sql/rewrite/ob_transform_pre_process.h b/src/sql/rewrite/ob_transform_pre_process.h index bd1bdb2eab..f4eac63380 100644 --- a/src/sql/rewrite/ob_transform_pre_process.h +++ b/src/sql/rewrite/ob_transform_pre_process.h @@ -634,6 +634,7 @@ struct DistinctObjMeta int check_exec_param_correlated(const ObRawExpr *expr, bool &is_correlated); int check_is_correlated_cte(ObSelectStmt *stmt, ObIArray &visited_cte, bool &is_correlated); int convert_join_preds_vector_to_scalar(JoinedTable &joined_table, bool &trans_happened); + int preserve_order_for_fulltext_search(ObDMLStmt *stmt, bool& trans_happened); int flatten_conditions(ObDMLStmt *stmt, bool &trans_happened); int recursive_flatten_join_conditions(ObDMLStmt *stmt, TableItem *table, bool &trans_happened); @@ -658,6 +659,7 @@ struct DistinctObjMeta int get_rowkey_for_single_table(ObSelectStmt* stmt, ObIArray &unique_keys, bool &is_valid); + int disable_complex_dml_for_fulltext_index(ObDMLStmt *stmt); private: DISALLOW_COPY_AND_ASSIGN(ObTransformPreProcess); }; diff --git a/src/sql/rewrite/ob_transform_utils.cpp b/src/sql/rewrite/ob_transform_utils.cpp index 55723bf512..9df249eeb5 100644 --- a/src/sql/rewrite/ob_transform_utils.cpp +++ b/src/sql/rewrite/ob_transform_utils.cpp @@ -3062,6 +3062,14 @@ int ObTransformUtils::get_simple_filter_column(const ObDMLStmt *stmt, col_exprs)))) { LOG_WARN("failed to get spatial filter column", K(ret)); } + } else if (expr->is_json_domain_expr()) { + ObRawExpr *json_expr = ObRawExprUtils::skip_inner_added_expr(expr); + if (OB_FAIL(SMART_CALL(get_simple_filter_column(stmt, + json_expr, + table_id, + col_exprs)))) { + LOG_WARN("failed to get spatial filter column", K(ret)); + } } break; } @@ -15034,6 +15042,54 @@ int ObTransformUtils::check_child_projection_validity(const ObSelectStmt *child_ return ret; } +int ObTransformUtils::check_fulltext_index_match_column(const ColumnReferenceSet &match_column_set, + const ObTableSchema *table_schema, + const ObTableSchema *inv_idx_schema, + bool &found_matched_index) +{ + int ret = OB_SUCCESS; + const ObColumnSchemaV2 *main_tbl_word_seg_col_schema = nullptr; + const ObColumnSchemaV2 *idx_tbl_word_seg_col_schema = nullptr; + + if (OB_ISNULL(inv_idx_schema) || OB_ISNULL(table_schema) || OB_UNLIKELY(!inv_idx_schema->is_fts_index())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected table / index schema", K(ret), KPC(table_schema), KPC(inv_idx_schema)); + } + + for (int64_t col_idx = 0; OB_SUCC(ret) && col_idx < inv_idx_schema->get_column_count(); ++col_idx) { + const ObColumnSchemaV2 *trav_col_schema = nullptr; + if (OB_ISNULL(trav_col_schema = inv_idx_schema->get_column_schema_by_idx(col_idx))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null col schema", K(ret), K(col_idx), KPC(inv_idx_schema)); + } else if (trav_col_schema->is_word_segment_column()) { + idx_tbl_word_seg_col_schema = trav_col_schema; + break; + } + } + + ObSEArray indexed_column_ids; + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(idx_tbl_word_seg_col_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected word segment column not found", K(ret), KPC(inv_idx_schema)); + } else if (OB_ISNULL(main_tbl_word_seg_col_schema = + table_schema->get_column_schema(idx_tbl_word_seg_col_schema->get_column_id()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected word segment column on main table not found", K(ret)); + } else if (OB_FAIL(main_tbl_word_seg_col_schema->get_cascaded_column_ids(indexed_column_ids))) { + LOG_WARN("failed to get cascaded column ids", K(ret)); + } else if (indexed_column_ids.count() == match_column_set.num_members()) { + bool col_id_mismatch = false; + for (int64_t i = 0; !col_id_mismatch && i < indexed_column_ids.count(); ++i) { + if (!match_column_set.has_member(indexed_column_ids.at(i))) { + col_id_mismatch = true; + } + } + found_matched_index = !col_id_mismatch; + } + return ret; +} + int ObTransformUtils::add_aggr_winfun_expr(ObSelectStmt *stmt, ObRawExpr *expr) { @@ -15275,6 +15331,59 @@ bool ObTransformUtils::is_const_null(ObRawExpr &expr) return bret; } +int ObTransformUtils::check_table_with_fulltext_recursively(TableItem *table, + ObSchemaChecker *schema_checker, + ObSQLSessionInfo *session_info, + bool &has_fulltext_index) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(table) || OB_ISNULL(schema_checker) || OB_ISNULL(session_info)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (table->is_generated_table() || table->is_temp_table()) { + if (OB_ISNULL(table->ref_query_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else { + ObIArray &tables = table->ref_query_->get_table_items(); + for (int64_t i = 0; OB_SUCC(ret) && !has_fulltext_index && i < tables.count(); ++i) { + if (SMART_CALL(check_table_with_fulltext_recursively(tables.at(i), + schema_checker, + session_info, + has_fulltext_index))) { + LOG_WARN("failed to check table with fulltext recursively", K(ret)); + } + } + } + } else if (table->is_basic_table()) { + const ObTableSchema* table_schema = NULL; + if (OB_FAIL(schema_checker->get_table_schema(session_info->get_effective_tenant_id(), + table->ref_id_, + table_schema))) { + LOG_WARN("failed to get table schema", K(ret)); + } else if (OB_FAIL(table_schema->check_has_fts_index(*schema_checker->get_schema_guard(), + has_fulltext_index))) { + LOG_WARN("failed to check has fts index", K(ret)); + } + } else if (table->is_joined_table()) { + JoinedTable *joined_table = static_cast(table); + if (OB_FAIL(SMART_CALL(check_table_with_fulltext_recursively(joined_table->left_table_, + schema_checker, + session_info, + has_fulltext_index)))) { + LOG_WARN("failed to check left table", K(ret)); + } else if (has_fulltext_index) { + // do nothing + } else if (OB_FAIL(SMART_CALL(check_table_with_fulltext_recursively(joined_table->right_table_, + schema_checker, + session_info, + has_fulltext_index)))) { + LOG_WARN("failed to check right table", K(ret)); + } + } + return ret; +} + // if sql is only one row, is_full_group_by skipped checking orderby in resolver. bool ObTransformUtils::is_full_group_by(ObSelectStmt& stmt, ObSQLMode mode) { diff --git a/src/sql/rewrite/ob_transform_utils.h b/src/sql/rewrite/ob_transform_utils.h index a03e3e2247..0743c95611 100644 --- a/src/sql/rewrite/ob_transform_utils.h +++ b/src/sql/rewrite/ob_transform_utils.h @@ -1873,6 +1873,10 @@ public: static int check_child_projection_validity(const ObSelectStmt *child_stmt, ObRawExpr *expr, bool &is_valid); + static int check_fulltext_index_match_column(const ColumnReferenceSet &match_column_set, + const ObTableSchema *table_schema, + const ObTableSchema *inv_idx_schema, + bool &found_matched_index); static int is_winfunc_topn_filter(const ObIArray &winfunc_exprs, ObRawExpr *filter, bool &is_topn_filter, @@ -1884,6 +1888,10 @@ public: static bool is_const_null(ObRawExpr &expr); static bool is_full_group_by(ObSelectStmt& stmt, ObSQLMode mode); + static int check_table_with_fulltext_recursively(TableItem *table, + ObSchemaChecker *schema_checker, + ObSQLSessionInfo *session_info, + bool &has_fulltext_index); static int add_aggr_winfun_expr(ObSelectStmt *stmt, ObRawExpr *expr); static int expand_mview_table(ObTransformerCtx *ctx, ObDMLStmt *upper_stmt, TableItem *rt_mv_table); diff --git a/src/sql/rewrite/ob_transformer_impl.cpp b/src/sql/rewrite/ob_transformer_impl.cpp index 9f2c616415..7fcc847b7a 100644 --- a/src/sql/rewrite/ob_transformer_impl.cpp +++ b/src/sql/rewrite/ob_transformer_impl.cpp @@ -424,7 +424,8 @@ int ObTransformerImpl::choose_rewrite_rules(ObDMLStmt *stmt, uint64_t &need_type LOG_WARN("failed to check stmt functions", K(ret)); } else { //TODO::unpivot open @xifeng - if (func.contain_unpivot_query_ || func.contain_enum_set_values_ || func.contain_geometry_values_) { + if (func.contain_unpivot_query_ || func.contain_enum_set_values_ || func.contain_geometry_values_ || + func.contain_fulltext_search_) { disable_list = ObTransformRule::ALL_TRANSFORM_RULES; } if (func.contain_sequence_) { @@ -516,6 +517,7 @@ int ObTransformerImpl::check_stmt_functions(const ObDMLStmt *stmt, StmtFunc &fun func.contain_sequence_ = func.contain_sequence_ || stmt->has_sequence(); func.contain_for_update_ = func.contain_for_update_ || stmt->has_for_update(); func.contain_unpivot_query_ = func.contain_unpivot_query_ || stmt->is_unpivot_select(); + func.contain_fulltext_search_ = func.contain_fulltext_search_ || (stmt->get_match_exprs().count() != 0); } for (int64_t i = 0; OB_SUCC(ret) && (!func.contain_enum_set_values_ || !func.contain_geometry_values_) @@ -726,8 +728,9 @@ int ObTransformerImpl::add_param_and_expr_constraints(ObExecContext &exec_ctx, } else if (OB_FAIL(append(query_ctx->all_equal_param_constraints_, trans_ctx.equal_param_constraints_))) { LOG_WARN("fail to append equal param constraints. ", K(ret)); - } else if (OB_FAIL(query_ctx->all_expr_constraints_.assign(trans_ctx.expr_constraints_))) { - LOG_WARN("fail to assign expr constraints", K(ret)); + } else if (OB_FAIL(append(query_ctx->all_expr_constraints_, + trans_ctx.expr_constraints_))) { + LOG_WARN("fail to append expr constraints", K(ret)); } return ret; } diff --git a/src/sql/rewrite/ob_transformer_impl.h b/src/sql/rewrite/ob_transformer_impl.h index 2dc2b3374b..e74f91d5dc 100644 --- a/src/sql/rewrite/ob_transformer_impl.h +++ b/src/sql/rewrite/ob_transformer_impl.h @@ -118,7 +118,8 @@ public: contain_enum_set_values_(false), contain_geometry_values_(false), contain_link_table_(false), - contain_json_table_(false) + contain_json_table_(false), + contain_fulltext_search_(false) {} bool all_found() const { @@ -130,7 +131,8 @@ public: contain_enum_set_values_ && contain_geometry_values_ && contain_link_table_ && - contain_json_table_; + contain_json_table_ && + contain_fulltext_search_; } bool contain_hie_query_; @@ -142,6 +144,7 @@ public: bool contain_geometry_values_; bool contain_link_table_; bool contain_json_table_; + bool contain_fulltext_search_; }; static int check_stmt_functions(const ObDMLStmt *stmt, StmtFunc &func); int check_temp_table_functions(ObDMLStmt *stmt, StmtFunc &func); diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt index bb2fa6984d..5cf2bf81c6 100644 --- a/src/storage/CMakeLists.txt +++ b/src/storage/CMakeLists.txt @@ -169,6 +169,15 @@ ob_set_subtarget(ob_storage ckpt slog_ckpt/ob_tablet_replay_create_handler.cpp ) +ob_set_subtarget(ob_storage fts + fts/ob_fts_plugin_mgr.cpp + fts/ob_fts_plugin_helper.cpp + fts/ob_fts_stop_word.cpp + fts/ob_ngram_ft_parser.cpp + fts/ob_whitespace_ft_parser.cpp + fts/ob_text_retrieval_iterator.cpp +) + ob_set_subtarget(ob_storage high_availability high_availability/ob_ls_migration.cpp high_availability/ob_physical_copy_task.cpp diff --git a/src/storage/compaction/ob_compaction_util.h b/src/storage/compaction/ob_compaction_util.h index b205304953..875a62df87 100644 --- a/src/storage/compaction/ob_compaction_util.h +++ b/src/storage/compaction/ob_compaction_util.h @@ -104,6 +104,9 @@ inline bool is_valid_merge_level(const ObMergeLevel &merge_level) } const char *merge_level_to_str(const ObMergeLevel &merge_level); +// open or close FTS index checksum verify +#define VERIFY_FTS_CHECKSUM true + } // namespace storage } // namespace oceanbase diff --git a/src/storage/ddl/ob_build_index_task.cpp b/src/storage/ddl/ob_build_index_task.cpp index 579b9d68d4..273948d9a7 100644 --- a/src/storage/ddl/ob_build_index_task.cpp +++ b/src/storage/ddl/ob_build_index_task.cpp @@ -540,7 +540,7 @@ int ObUniqueIndexChecker::check_unique_index(ObIDag *dag) LOG_WARN("fail to get log stream", K(ret), K(ls_id_)); } else if (OB_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, tablet_id_, tablet_handle_))) { LOG_WARN("fail to get tablet", K(ret), K(tablet_id_), K(tablet_handle_)); - } else if (index_schema_->is_domain_index()) { + } else if (index_schema_->is_fts_index()) { STORAGE_LOG(INFO, "do not need to check unique for domain index", "index_id", index_schema_->get_table_id()); } else { if (OB_FAIL(ret)) { diff --git a/src/storage/fts/ob_fts_buildin_parser_register.ipp b/src/storage/fts/ob_fts_buildin_parser_register.ipp new file mode 100644 index 0000000000..997e70cdc4 --- /dev/null +++ b/src/storage/fts/ob_fts_buildin_parser_register.ipp @@ -0,0 +1,49 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OB_FTS_BUILD_IN_PARSER_REGISTER_H_ +#define OB_FTS_BUILD_IN_PARSER_REGISTER_H_ + +#include "storage/fts/ob_whitespace_ft_parser.h" +#include "storage/fts/ob_ngram_ft_parser.h" + +///////////////////////////////////// Default fulltext parser ////////////////////////////////////////// + +OB_DECLARE_PLUGIN(whitespace_parser) +{ + oceanbase::lib::ObPluginType::OB_FT_PARSER_PLUGIN, // fulltext parser type + "space", // name + OB_PLUGIN_AUTHOR_OCEANBASE, // author + "This is a default space parser plugin.", // brief specification + 0x00001, // version + oceanbase::lib::ObPluginLicenseType::OB_MULAN_V2_LICENSE, // Mulan PubL v2 license + &oceanbase::storage::whitespace_parser, // default space parser plugin instance +}; + +OB_DECLARE_BUILDIN_PLUGIN_HANDLER(ObBuildInWhitespaceFTParser, whitespace_parser); + +///////////////////////////////////// Ngram fulltext parser ////////////////////////////////////////// + +OB_DECLARE_PLUGIN(ngram_parser) +{ + oceanbase::lib::ObPluginType::OB_FT_PARSER_PLUGIN, // fulltext parser type + "ngram", // name + OB_PLUGIN_AUTHOR_OCEANBASE, // author + "This is a ngram fulltext parser plugin.", // brief specification + 0x00001, // version + oceanbase::lib::ObPluginLicenseType::OB_MULAN_V2_LICENSE, // Mulan PubL v2 license + &oceanbase::storage::ngram_parser, // ngram parser plugin instance +}; + +OB_DECLARE_BUILDIN_PLUGIN_HANDLER(ObBuildInNgramFTParser, ngram_parser); + +#endif // OB_FTS_BUILD_IN_PARSER_REGISTER_H_ diff --git a/src/storage/fts/ob_fts_plugin_helper.cpp b/src/storage/fts/ob_fts_plugin_helper.cpp new file mode 100644 index 0000000000..eacf4bf12a --- /dev/null +++ b/src/storage/fts/ob_fts_plugin_helper.cpp @@ -0,0 +1,283 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE_FTS + +#include "lib/alloc/alloc_assist.h" +#include "lib/utility/utility.h" +#include "lib/utility/ob_print_utils.h" +#include "share/rc/ob_tenant_base.h" +#include "storage/fts/ob_fts_stop_word.h" +#include "storage/fts/ob_fts_plugin_helper.h" + +namespace oceanbase +{ +namespace storage +{ + +// The plugin_name comes from index table schema and consists of two parts: name and +// version, e.g. default_parser.1, separated by dot. +int ObFTParser::parse_from_str(const char *plugin_name, const int64_t buf_len) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(plugin_name)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("plugin name is nullptr", K(ret), KP(plugin_name)); + } else if (OB_UNLIKELY(buf_len >= OB_PLUGIN_NAME_LENGTH)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("plugin name is too long", K(ret), K(buf_len)); + } else { + char name[OB_PLUGIN_NAME_LENGTH]; + char *saveptr = nullptr; + char *token = nullptr; + char *end_ptr = nullptr; + MEMCPY(name, plugin_name, buf_len); + name[buf_len] = '\0'; + if (OB_ISNULL(token = STRTOK_R(name, ".", &saveptr))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, plugin name is illegal", K(ret), KCSTRING(name)); + } else if (OB_FAIL(parser_name_.set_name(token))) { + LOG_WARN("fail to set parser name", K(ret), KCSTRING(token)); + } else if (OB_ISNULL(token = STRTOK_R(nullptr, ".", &saveptr))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, plugin name is illegal", K(ret), KCSTRING(name)); + } else if (OB_FAIL(ob_strtoll(token, end_ptr, parser_version_))) { + LOG_WARN("failed to convert str to ll", KCSTRING(token)); + } else if (OB_NOT_NULL(token = STRTOK_R(nullptr, ".", &saveptr))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, plugin name is illegal", K(ret), KCSTRING(name)); + } else if (OB_UNLIKELY(!is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("plugin name isn't valid fulltext parser", K(ret), KCSTRING(plugin_name), KPC(this)); + } + } + return ret; +} + +// The fulltext parser name consists of two parts: name and version, e.g. default_parser.1, +// separated by dot. This function is designed to serialize them into cstring. +int ObFTParser::serialize_to_str(char *buf, const int64_t buf_len) +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + if (OB_ISNULL(buf) || OB_UNLIKELY(buf_len < OB_PLUGIN_NAME_LENGTH)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KP(buf), K(buf_len)); + } else if (OB_UNLIKELY(!is_valid())) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("invalid fulltext parser doesn't support to serialize_to_str", K(ret), KPC(this)); + } else if (OB_FAIL(common::databuff_printf(buf, buf_len, pos, "%.*s.%ld", parser_name_.len(), parser_name_.str(), + parser_version_))) { + LOG_WARN("fail to printf", K(ret), K(buf_len), K(parser_name_), K(parser_version_)); + } + return ret; +} + +int ObFTParseHelper::get_fulltext_parser_desc( + const lib::ObIPluginHandler &handler, + lib::ObIFTParserDesc *&parser_desc) +{ + int ret = OB_SUCCESS; + int64_t version = 0; + int64_t size = 0; + lib::ObPlugin *plugin = nullptr; + lib::ObIPluginDesc *desc = nullptr; + if (OB_FAIL(handler.get_plugin_version(version))) { + LOG_WARN("fail to get plugin version", K(ret), K(handler)); + } else if (OB_FAIL(handler.get_plugin_size(size))) { + LOG_WARN("fail to get plugin size", K(ret), K(handler)); + } else if (OB_UNLIKELY(OB_PLUGIN_INTERFACE_VERSION != version || sizeof(lib::ObPlugin) != size)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("the plugin interface version or size is invalid", K(ret), K(version), K(size)); + } else if (OB_FAIL(handler.get_plugin(plugin))) { + LOG_WARN("fail to get plugin", K(ret), K(handler)); + } else if (OB_ISNULL(plugin)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, plugin is nullptr", K(ret), K(handler)); + } else if (OB_UNLIKELY(lib::ObPluginType::OB_FT_PARSER_PLUGIN != plugin->type_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("this plugin isn't a fulltext parser", K(ret), K(plugin->type_), K(handler)); + } else { + parser_desc = static_cast(plugin->desc_); + } + return ret; +} + +int ObFTParseHelper::segment( + const int64_t parser_version, + const lib::ObIFTParserDesc *parser_desc, + const ObCharsetInfo *cs, + const char *ft, + const int64_t ft_len, + common::ObIAllocator &allocator, + lib::ObFTParserParam::ObIAddWord &add_word) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(parser_version < 0 || nullptr == parser_desc || nullptr == cs || nullptr == ft || 0 >= ft_len)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(parser_version), KP(parser_desc), KP(cs), K(ft), K(ft_len)); + } else { + lib::ObFTParserParam param; + param.allocator_ = &allocator; + param.add_word_ = &add_word; + param.cs_ = cs; + param.fulltext_ = ft; + param.ft_length_ = ft_len; + param.parser_version_ = parser_version; + if (OB_FAIL(parser_desc->segment(¶m))) { + LOG_WARN("fail to segment", K(ret), K(param)); + } + } + return ret; +} + +ObFTParseHelper::ObFTParseHelper() + : plugin_param_(), + allocator_(nullptr), + parser_desc_(nullptr), + parser_name_(), + filter_stopword_(false), + is_inited_(false) +{ +} + +ObFTParseHelper::~ObFTParseHelper() +{ + reset(); +} + +int ObFTParseHelper::init( + common::ObIAllocator *allocator, + const common::ObString &plugin_name) +{ + int ret = OB_SUCCESS; + lib::ObIPluginHandler *parse_handler = nullptr; + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("this fulltext parse helper has been initialized", K(ret), KP(parser_desc_), K(is_inited_)); + } else if (OB_ISNULL(allocator) || OB_UNLIKELY(plugin_name.empty())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(allocator), K(plugin_name)); + } else if (OB_FAIL(parser_name_.parse_from_str(plugin_name.ptr(), plugin_name.length()))) { + LOG_WARN("fail to parse name from cstring", K(ret), K(plugin_name)); + } else if (OB_FAIL(OB_FT_PLUGIN_MGR.get_plugin_handler(parser_name_.get_parser_name(), parse_handler))) { + LOG_WARN("fail to open plugin handler", K(ret), K(plugin_name)); + } else if (OB_ISNULL(parse_handler)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, parse handler is nullptr", K(ret), KP(parse_handler)); + } else if (OB_FAIL(get_fulltext_parser_desc(*parse_handler, parser_desc_))) { + LOG_WARN("fail to get fulltext parser descriptor", K(ret), KPC(parse_handler)); + } else { + plugin_param_.desc_ = parser_desc_; + filter_stopword_ = need_stopword_list(parser_name_); + allocator_ = allocator; + is_inited_ = true; + } + if (OB_FAIL(ret) && OB_UNLIKELY(!is_inited_)) { + reset(); + } + return ret; +} + +void ObFTParseHelper::reset() +{ + parser_desc_ = nullptr; + plugin_param_.reset(); + allocator_ = nullptr; + filter_stopword_ = false; + is_inited_ = false; +} + +int ObFTParseHelper::segment( + const ObCollationType &type, + const char *fulltext, + const int64_t fulltext_len, + int64_t &doc_length, + common::ObIArray &words) const +{ + int ret = OB_SUCCESS; + const ObCharsetInfo *cs = nullptr; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("this fulltext parser helper hasn't been initialized", K(ret), K(is_inited_)); + } else if (OB_ISNULL(allocator_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator ptr is nullptr", K(ret), KP_(allocator), K_(is_inited)); + } else if (OB_UNLIKELY(CS_TYPE_INVALID == type || type >= CS_TYPE_EXTENDED_MARK)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(type)); + } else if (OB_ISNULL(cs = common::ObCharset::get_charset(type))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, charset info is nullptr", K(ret), K(type)); + } else { + words.reuse(); + lib::ObFTParserParam::ObIAddWord *add_word = nullptr; + if (OB_FAIL(alloc_add_word(type, words, add_word))) { + LOG_WARN("fail to allocate add word", K(ret), K(type)); + } else if (OB_FAIL(segment(parser_name_.get_parser_version(), parser_desc_, cs, fulltext, fulltext_len, *allocator_, + *add_word))) { + LOG_WARN("fail to segment fulltext", K(ret), K(parser_name_), KP(parser_desc_), KP(cs), KP(fulltext), + K(fulltext_len), KP(allocator_)); + } else { + doc_length = add_word->get_add_word_count(); + } + free_add_word(add_word); + } + LOG_DEBUG("ft parse segment", K(ret), K(type), K(ObString(fulltext_len, fulltext)), K(words)); + return ret; +} + +bool ObFTParseHelper::need_stopword_list(const ObFTParser &parser) +{ + share::ObPluginName name("space"); + return parser.get_parser_name() == name; +} + +int ObFTParseHelper::alloc_add_word( + const ObCollationType &type, + common::ObIArray &words, + lib::ObFTParserParam::ObIAddWord *&add_word) const +{ + int ret = OB_SUCCESS; + common::ObMemAttr mem_attr(MTL_ID(), "FTAddWord"); + void *buf = nullptr; + const int64_t buf_size = filter_stopword_ ? sizeof(ObStopWordAddWord) : sizeof(ObNoStopWordAddWord); + if (OB_NOT_NULL(add_word)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("add word isn't nullptr", K(ret), KPC(add_word)); + } else if (OB_ISNULL(buf = ob_malloc(buf_size, mem_attr))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate memory", K(ret), K(buf_size)); + } else if (filter_stopword_) { + add_word = new (buf) ObStopWordAddWord(type, *allocator_, words); + } else { + add_word = new (buf) ObNoStopWordAddWord(type, *allocator_, words); + } + if (OB_FAIL(ret) && OB_NOT_NULL(buf)) { + ob_free(buf); + buf = nullptr; + add_word = nullptr; + } + return ret; +} + +void ObFTParseHelper::free_add_word(lib::ObFTParserParam::ObIAddWord *&add_word) const +{ + if (OB_NOT_NULL(add_word)) { + add_word->~ObIAddWord(); + ob_free(static_cast(add_word)); + add_word = nullptr; + } +} + +} // end namespace storage +} // end namespace oceanbase diff --git a/src/storage/fts/ob_fts_plugin_helper.h b/src/storage/fts/ob_fts_plugin_helper.h new file mode 100644 index 0000000000..895c775c90 --- /dev/null +++ b/src/storage/fts/ob_fts_plugin_helper.h @@ -0,0 +1,132 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OB_FTS_PLUGIN_HELPER_H_ +#define OB_FTS_PLUGIN_HELPER_H_ + +#include "lib/ob_errno.h" +#include "lib/charset/ob_charset.h" +#include "lib/string/ob_string.h" +#include "share/ob_define.h" +#include "storage/fts/ob_fts_struct.h" +#include "storage/fts/ob_fts_plugin_mgr.h" + +namespace oceanbase +{ +namespace storage +{ + +class ObFTParser final +{ +public: + ObFTParser() : parser_name_(), parser_version_(-1) {} + ~ObFTParser() = default; + int parse_from_str(const char *plugin_name, const int64_t buf_len); + int serialize_to_str(char *buf, const int64_t buf_len); + OB_INLINE const share::ObPluginName &get_parser_name() const { return parser_name_; } + OB_INLINE int64_t get_parser_version() const { return parser_version_; } + OB_INLINE bool is_valid() const { return parser_name_.is_valid() && parser_version_ >= 0; } + OB_INLINE void set_name_and_version(const share::ObPluginName &name, const int64_t version) + { + parser_name_ = name; + parser_version_ = version; + } + OB_INLINE bool operator ==(const ObFTParser &other) const + { + bool is_equal = true; + if (this != &other) { + is_equal = parser_name_ == other.get_parser_name() && parser_version_ == other.parser_version_; + } + return is_equal; + } + OB_INLINE bool operator !=(const ObFTParser &other) const { return !(*this == other); } + TO_STRING_KV(K_(parser_name), K_(parser_version)); +private: + share::ObPluginName parser_name_; + int64_t parser_version_; +}; + +class ObFTParseHelper final +{ +public: + ObFTParseHelper(); + ~ObFTParseHelper(); + + /** + * initialize fulltext parse helper + * + * @param[in] allocator + * @param[in] parser_name, which consists of two parts name and version. + * e.g. default_parser.1 + * | | + * parse name paser version + * + * @return error code + */ + int init( + common::ObIAllocator *allocator, + const common::ObString &plugin_name); + /** + * Split document into multiple words + * + * @param[in] type, collation type for fulltext + * @param[in] fulltext + * @param[in] fulltext_len, length of the fulltext + * @param[out] doc_length, length of document by word count + * @param[out] words, word lists after segment + */ + int segment( + const common::ObCollationType &type, + const char *fulltext, + const int64_t fulltext_len, + int64_t &doc_length, + common::ObIArray &words) const; + const ObFTParser &get_parser_name() const { return parser_name_; } + void reset(); + + TO_STRING_KV(K_(plugin_param), KP_(allocator), KP_(parser_desc), K_(is_inited)); +private: + static int get_fulltext_parser_desc( + const lib::ObIPluginHandler &handler, + lib::ObIFTParserDesc *&parser_desc); + static int segment( + const int64_t parser_version, + const lib::ObIFTParserDesc *parser_desc, + const ObCharsetInfo *cs, + const char *fulltext, + const int64_t fulltext_len, + common::ObIAllocator &allocator, + lib::ObFTParserParam::ObIAddWord &add_word); + static bool need_stopword_list(const ObFTParser &parser); + + int alloc_add_word( + const ObCollationType &type, + common::ObIArray &words, + lib::ObFTParserParam::ObIAddWord *&add_word) const; + void free_add_word(lib::ObFTParserParam::ObIAddWord *&add_word) const; + +private: + lib::ObPluginParam plugin_param_; + common::ObIAllocator *allocator_; + lib::ObIFTParserDesc *parser_desc_; + ObFTParser parser_name_; + bool filter_stopword_; + bool is_inited_; + +private: + DISALLOW_COPY_AND_ASSIGN(ObFTParseHelper); +}; + +} // end namespace storage +} // end namespace oceanbase + +#endif // OB_FTS_PLUGIN_HELPER_H_ diff --git a/src/storage/fts/ob_fts_plugin_mgr.cpp b/src/storage/fts/ob_fts_plugin_mgr.cpp new file mode 100644 index 0000000000..9aca2e5f66 --- /dev/null +++ b/src/storage/fts/ob_fts_plugin_mgr.cpp @@ -0,0 +1,245 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE_FTS + +#include "share/rc/ob_tenant_base.h" +#include "share/ob_force_print_log.h" +#include "storage/fts/ob_fts_plugin_mgr.h" +#include "storage/fts/ob_fts_stop_word.h" +#include "storage/fts/ob_fts_plugin_helper.h" + +#include "storage/fts/ob_fts_buildin_parser_register.ipp" + +namespace oceanbase +{ +namespace storage +{ + +ObTenantFTPluginMgr::ObTenantFTPluginMgr(const uint64_t tenant_id) + : handler_allocator_(tenant_id), + handler_map_(), + stopword_set_(), + stopword_type_(ObCollationType::CS_TYPE_INVALID), + tenant_id_(tenant_id), + is_inited_(false) +{ +} + +void ObTenantFTPluginMgr::destroy() +{ + FLOG_INFO("destroy ObTenantFTPluginMgr", KP(this)); + for(PluginHandlerMap::const_iterator iter = handler_map_.begin(); iter != handler_map_.end(); ++iter) { + oceanbase::lib::ObIPluginHandler *handler = iter->second; + handler_allocator_.free(handler); + } + stopword_set_.destroy(); + stopword_type_ = ObCollationType::CS_TYPE_INVALID; + handler_map_.destroy(); + handler_allocator_.reset(); + is_inited_ = false; +} + +int ObTenantFTPluginMgr::register_plugins() +{ + int ret = OB_SUCCESS; + if (OB_FAIL(register_plugin())) { + LOG_WARN("fail to register default fulltext parser", K(ret)); + } else if (OB_FAIL(register_plugin())) { + LOG_WARN("fail to register ngram fulltext parser", K(ret)); + } + return ret; +} + +void ObTenantFTPluginMgr::unregister_plugins() +{ + int ret = OB_SUCCESS; + if (OB_FAIL(unregister_plugin())) { + LOG_ERROR("fail to unregister default fulltext parser", K(ret)); + } else if (OB_FAIL(unregister_plugin())) { + LOG_ERROR("fail to unregister ngram fulltext parser", K(ret)); + } +} + +int ObTenantFTPluginMgr::mtl_new(ObTenantFTPluginMgr *&ft_parser_mgr) +{ + int ret = OB_SUCCESS; + const uint64_t tenant_id = MTL_ID(); + ft_parser_mgr = OB_NEW(ObTenantFTPluginMgr, ObMemAttr(tenant_id, "FTParserMgr"), tenant_id); + if (OB_ISNULL(ft_parser_mgr)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc fulltext parser manager memory", K(ret), K(tenant_id)); + } + return ret; +} + +void ObTenantFTPluginMgr::mtl_destroy(ObTenantFTPluginMgr *&ft_parser_mgr) +{ + if (OB_ISNULL(ft_parser_mgr)) { + LOG_WARN_RET(OB_ERR_UNEXPECTED, "fulltext parser manager is nullptr", KP(ft_parser_mgr)); + } else { + OB_DELETE(ObTenantFTPluginMgr, oceanbase::ObModIds::OMT_TENANT, ft_parser_mgr); + ft_parser_mgr = nullptr; + } +} + +__attribute__((weak)) +ObTenantFTPluginMgr &ObTenantFTPluginMgr::get_ft_plugin_mgr() +{ + return (*(MTL(ObTenantFTPluginMgr *))); +} + +int ObTenantFTPluginMgr::init() +{ + int ret = OB_SUCCESS; + const lib::ObMemAttr mem_attr(tenant_id_, "FTPluginHandle"); + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("ObTenantFTParserMgr has been initialized", K(ret), K(is_inited_)); + } else if (OB_FAIL(handler_allocator_.init(lib::ObMallocAllocator::get_instance(), + OB_MALLOC_NORMAL_BLOCK_SIZE, + mem_attr))) { + LOG_WARN("fail to init tenant plugin handler allocator", K(ret)); + } else if (OB_FAIL(init_plugin_handler())) { + LOG_WARN("fail to init plugin handler", K(ret)); + } else if (OB_FAIL(init_and_set_stopword_list())) { + LOG_WARN("fail to init and set stopword list", K(ret)); + } else { + is_inited_ = true; + FLOG_INFO("succeed to initialize ObTenantFTPluginMgr", KP(this)); + } + + if (OB_UNLIKELY(!is_inited_)) { + destroy(); + } + return ret; +} + +int ObTenantFTPluginMgr::init_plugin_handler() +{ + int ret = OB_SUCCESS; + if (OB_FAIL(handler_map_.create(DEFAULT_PLUGIN_BUCKET_NUM, "FTPluginMap", "FTPluginMap", tenant_id_))) { + LOG_WARN("fail to init plugin handlers map", K(ret)); + } else if (OB_FAIL(set_plugin_handler())) { + LOG_WARN("fail to set default fulltext parser", K(ret)); + } else if (OB_FAIL(set_plugin_handler())) { + LOG_WARN("fail to set ngram fulltext parser", K(ret)); + } + return ret; +} + +int ObTenantFTPluginMgr::init_and_set_stopword_list() +{ + int ret = OB_SUCCESS; + if (OB_FAIL(stopword_set_.create(DEFAULT_STOPWORD_BUCKET_NUM, "StopWordSet", "StopWordSet", tenant_id_))) { + LOG_WARN("fail to create stop word set", K(ret)); + } else { + stopword_type_ = ObCollationType::CS_TYPE_UTF8MB4_GENERAL_CI; + const int64_t stopword_count = sizeof(ob_stop_word_list) / sizeof(ob_stop_word_list[0]); + for (int64_t i = 0; OB_SUCC(ret) && i < stopword_count; ++i) { + ObFTWord stopword(STRLEN(ob_stop_word_list[i]), ob_stop_word_list[i], stopword_type_); + if (OB_FAIL(stopword_set_.set_refactored(stopword))) { + LOG_WARN("fail to set stop word", K(ret), K(stopword)); + } + } + } + return ret; +} + +int ObTenantFTPluginMgr::get_plugin_handler( + const share::ObPluginName &name, + lib::ObIPluginHandler *&plugin_handler) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObTenantFTPluginMgr hasn't been initialized", K(ret), K(is_inited_)); + } else if (OB_UNLIKELY(!name.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(name)); + } else if (OB_FAIL(handler_map_.get_refactored(name, plugin_handler))) { + LOG_WARN("fail to get plugin handler", K(ret), K(name)); + } + return ret; +} + +int ObTenantFTPluginMgr::get_ft_parser( + const share::ObPluginName &parser_name, + ObFTParser &parser) +{ + int ret = OB_SUCCESS; + int64_t version = 0; + lib::ObIPluginHandler *handler = nullptr; + lib::ObPlugin *plugin = nullptr; + if (OB_FAIL(get_plugin_handler(parser_name, handler))) { + LOG_WARN("fail to get plugin handler", K(ret), K(parser_name)); + } else if (OB_ISNULL(handler)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, parser handler is nullptr", K(ret), K(parser_name), KP(handler)); + } else if (OB_FAIL(handler->get_plugin(plugin))) { + LOG_WARN("fail to get plugin", K(ret), K(parser_name), KPC(handler)); + } else if (OB_ISNULL(plugin)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, plugin is nullptr", K(ret), K(parser_name), KPC(handler)); + } else if (OB_UNLIKELY(lib::ObPluginType::OB_FT_PARSER_PLUGIN != plugin->type_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, plugin type isn't fulltext parser", K(ret), K(parser_name), KPC(plugin)); + } else if (OB_UNLIKELY(plugin->version_ <= 0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, plugin version is invalid", K(ret), K(parser_name), KPC(plugin)); + } else { + version = plugin->version_; + parser.set_name_and_version(parser_name, version); + } + return ret; +} + +int ObTenantFTPluginMgr::check_stopword( + const ObFTWord &word, + bool &is_stopword) +{ + int ret = OB_SUCCESS; + common::ObArenaAllocator allocator(lib::ObMemAttr(MTL_ID(), "ChkStopWord")); + common::ObString cmp_word_str; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObTenantFTPluginMgr hasn't been initialized", K(ret), K(is_inited_)); + } else if (OB_UNLIKELY(word.empty())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("word is empty", K(ret), K(word)); + } else { + const bool conv_cs = word.get_collation_type() != stopword_type_; + if (conv_cs && OB_FAIL(common::ObCharset::charset_convert(allocator, word.get_word(), word.get_collation_type(), + stopword_type_, cmp_word_str))) { + LOG_WARN("fail to convert charset", K(ret), K(word), K(stopword_type_)); + } else { + ObFTWord cmp_word(cmp_word_str.length(), cmp_word_str.ptr(), stopword_type_); + ret = stopword_set_.exist_refactored(conv_cs ? cmp_word : word); + if (OB_HASH_NOT_EXIST == ret) { + is_stopword = false; + ret = OB_SUCCESS; + } else if (OB_HASH_EXIST == ret) { + is_stopword = true; + ret = OB_SUCCESS; + } else if (OB_SUCC(ret)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("the exist of hastset shouldn't return success", K(ret), K(word), K(conv_cs), K(cmp_word)); + } else { + LOG_WARN("fail to do exist", K(ret), K(word), K(conv_cs), K(cmp_word)); + } + } + } + return ret; +} + +} // end storage +} // end oceanbase diff --git a/src/storage/fts/ob_fts_plugin_mgr.h b/src/storage/fts/ob_fts_plugin_mgr.h new file mode 100644 index 0000000000..d081c3eb70 --- /dev/null +++ b/src/storage/fts/ob_fts_plugin_mgr.h @@ -0,0 +1,155 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OB_FTS_PLUGIN_MGR_H_ +#define OB_FTS_PLUGIN_MGR_H_ + +#include "lib/ob_plugin.h" +#include "lib/hash/ob_hashmap.h" +#include "lib/hash/ob_hashset.h" +#include "lib/allocator/ob_fifo_allocator.h" +#include "share/ob_define.h" +#include "share/ob_plugin_helper.h" +#include "storage/fts/ob_fts_struct.h" + +namespace oceanbase +{ +namespace storage +{ + +class ObFTParser; + +class ObTenantFTPluginMgr final +{ +public: + static int register_plugins(); + static void unregister_plugins(); + + static int mtl_new(ObTenantFTPluginMgr *&ft_parser_mgr); + static void mtl_destroy(ObTenantFTPluginMgr *&ft_parser_mgr); + static ObTenantFTPluginMgr &get_ft_plugin_mgr(); + ~ObTenantFTPluginMgr() { destroy(); } + + int init(); + void destroy(); + + int get_plugin_handler( + const share::ObPluginName &name, + lib::ObIPluginHandler *&plugin_handler); + int get_ft_parser( + const share::ObPluginName &parser_name, + ObFTParser &parser); + int check_stopword( + const ObFTWord &word, + bool &is_stopword); + + TO_STRING_KV(K_(tenant_id), K_(is_inited), "plugin count", handler_map_.size(), + "stopword count", stopword_set_.size()); + +private: + static const int64_t DEFAULT_PLUGIN_BUCKET_NUM = 53L; + static const int64_t DEFAULT_STOPWORD_BUCKET_NUM = 37L; + typedef common::hash::ObHashMap PluginHandlerMap; + typedef common::hash::ObHashSet StopWordSet; + +private: + explicit ObTenantFTPluginMgr(const uint64_t tenant_id); + int init_plugin_handler(); + int init_and_set_stopword_list(); + template + int set_plugin_handler(); + template + static int register_plugin(); + template + static int unregister_plugin(); + +private: + common::ObFIFOAllocator handler_allocator_; + PluginHandlerMap handler_map_; + StopWordSet stopword_set_; + ObCollationType stopword_type_; + uint64_t tenant_id_; + bool is_inited_; + + DISALLOW_COPY_AND_ASSIGN(ObTenantFTPluginMgr); +}; + +template +int ObTenantFTPluginMgr::set_plugin_handler() +{ + int ret = OB_SUCCESS; + share::ObPluginName plugin_name; + PluginHandler *plugin_handler = nullptr; + void *buf = nullptr; + if (OB_ISNULL(buf = handler_allocator_.alloc(sizeof(PluginHandler)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + STORAGE_FTS_LOG(WARN, "fail to allocate plugin handler memory", K(ret)); + } else { + lib::ObPlugin *plugin = nullptr; + plugin_handler = new (buf) PluginHandler(); + if (OB_FAIL(plugin_handler->get_plugin(plugin))) { + STORAGE_FTS_LOG(WARN, "fail to get plugin", K(ret), KPC(plugin_handler)); + } else if (OB_FAIL(plugin_name.set_name(plugin->name_))) { + STORAGE_FTS_LOG(WARN, "fail to set name", K(ret), KPC(plugin)); + } else if (OB_FAIL(handler_map_.set_refactored(plugin_name, + static_cast(plugin_handler)))) { + STORAGE_FTS_LOG(WARN, "fail to set plugin handler", K(ret), K(plugin_name)); + } + } + if (OB_FAIL(ret) && OB_NOT_NULL(buf)) { + if (OB_NOT_NULL(plugin_handler)) { + plugin_handler->~PluginHandler(); + plugin_handler = nullptr; + } + handler_allocator_.free(buf); + buf = nullptr; + } + return ret; +} + +template +int ObTenantFTPluginMgr::register_plugin() +{ + int ret = OB_SUCCESS; + PluginHandler plugin_handler; + lib::ObPlugin *plugin = nullptr; + lib::ObPluginParam plugin_param; + if (OB_FAIL(plugin_handler.get_plugin(plugin))) { + STORAGE_FTS_LOG(WARN, "fail to get plugin", K(ret), K(plugin_handler)); + } else if (FALSE_IT(plugin_param.desc_ = plugin->desc_)) { + } else if (OB_FAIL(plugin->desc_->init(&plugin_param))) { + STORAGE_FTS_LOG(WARN, "fail to init plugin descriptor", K(ret), K(plugin_param)); + } + return ret; +} + +template +int ObTenantFTPluginMgr::unregister_plugin() +{ + int ret = OB_SUCCESS; + PluginHandler plugin_handler; + lib::ObPlugin *plugin = nullptr; + lib::ObPluginParam plugin_param; + if (OB_FAIL(plugin_handler.get_plugin(plugin))) { + STORAGE_FTS_LOG(WARN, "fail to get plugin", K(ret), K(plugin_handler)); + } else if (FALSE_IT(plugin_param.desc_ = plugin->desc_)) { + } else if (OB_FAIL(plugin->desc_->deinit(&plugin_param))) { + STORAGE_FTS_LOG(WARN, "fail to deinit plugin descriptor", K(ret)); + } + return ret; +} + +#define OB_FT_PLUGIN_MGR ObTenantFTPluginMgr::get_ft_plugin_mgr() + +} // end storage +} // end oceanbase +#endif // OB_FTS_PLUGIN_MGR_H_ diff --git a/src/storage/fts/ob_fts_stop_word.cpp b/src/storage/fts/ob_fts_stop_word.cpp new file mode 100644 index 0000000000..d57df7fc0d --- /dev/null +++ b/src/storage/fts/ob_fts_stop_word.cpp @@ -0,0 +1,120 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE_FTS + +#include "storage/fts/ob_fts_stop_word.h" +#include "storage/fts/ob_fts_plugin_mgr.h" + +namespace oceanbase +{ +namespace storage +{ + +ObNoStopWordAddWord::ObNoStopWordAddWord( + const ObCollationType &type, + common::ObIAllocator &allocator, + common::ObIArray &word) + : collation_type_(type), + allocator_(allocator), + words_(word), + word_count_(0) +{ +} + +int ObNoStopWordAddWord::operator()( + lib::ObFTParserParam *param, + const char *word, + const int64_t word_len) +{ + int ret = OB_SUCCESS; + char *w_buf = nullptr; + if (OB_ISNULL(param) || OB_ISNULL(word) || OB_UNLIKELY(0 >= word_len)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KPC(param), KP(word), K(word_len)); + } else if (OB_ISNULL(w_buf = static_cast(allocator_.alloc(word_len)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate memory for fulltext word", K(ret), K(word_len)); + } else { + MEMCPY(w_buf, word, word_len); + ObFTWord ft_word(word_len, w_buf, collation_type_); + if (OB_FAIL(words_.push_back(ft_word))) { + LOG_WARN("fail to push word into words array", K(ret), K(ft_word)); + } else { + ++word_count_; + } + } + if (OB_FAIL(ret)) { + if (OB_NOT_NULL(w_buf)) { + allocator_.free(w_buf); + w_buf = nullptr; + } + } + LOG_DEBUG("add word", K(ret), KPC(param), KP(word), K(word_len)); + return ret; +} + +ObStopWordAddWord::ObStopWordAddWord( + const ObCollationType &type, + common::ObIAllocator &allocator, + common::ObIArray &word) + : collation_type_(type), + allocator_(allocator), + words_(word), + non_stopword_count_(0), + stopword_count_(0) +{ +} + +int ObStopWordAddWord::operator()( + lib::ObFTParserParam *param, + const char *word, + const int64_t word_len) +{ + int ret = OB_SUCCESS; + bool is_stopword = false; + ObFTWord ft_word(word_len, word, collation_type_); + if (OB_ISNULL(param) || OB_ISNULL(word) || OB_UNLIKELY(0 >= word_len)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KPC(param), KP(word), K(word_len)); + } else if (OB_FAIL(OB_FT_PLUGIN_MGR.check_stopword(ft_word, is_stopword))) { + LOG_WARN("fail to check stopword", K(ret)); + } else if (is_stopword) { + // the word is stop word, just skip it. + ++stopword_count_; + } else { + char *w_buf = nullptr; + if (OB_ISNULL(w_buf = static_cast(allocator_.alloc(word_len)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate memory for fulltext word", K(ret), K(word_len)); + } else { + MEMCPY(w_buf, word, word_len); + ObFTWord non_stopword_ft_word(word_len, w_buf, collation_type_); + if (OB_FAIL(words_.push_back(non_stopword_ft_word))) { + LOG_WARN("fail to push word into words array", K(ret), K(non_stopword_ft_word)); + } else { + ++non_stopword_count_; + } + } + if (OB_FAIL(ret)) { + if (OB_NOT_NULL(w_buf)) { + allocator_.free(w_buf); + w_buf = nullptr; + } + } + } + LOG_DEBUG("add word", K(ret), KPC(param), KP(word), K(word_len), K(is_stopword)); + return ret; +} + +} // end namespace storage +} // end namespace oceanbase diff --git a/src/storage/fts/ob_fts_stop_word.h b/src/storage/fts/ob_fts_stop_word.h new file mode 100644 index 0000000000..44f5cba164 --- /dev/null +++ b/src/storage/fts/ob_fts_stop_word.h @@ -0,0 +1,114 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OB_FTS_STOP_WORD_H_ +#define OB_FTS_STOP_WORD_H_ + +#include "lib/ob_plugin.h" +#include "lib/container/ob_iarray.h" +#include "storage/fts/ob_fts_struct.h" + +namespace oceanbase +{ +namespace storage +{ + +#define FTS_STOP_WORD_MAX_LENGTH 10 + +static const char ob_stop_word_list[][FTS_STOP_WORD_MAX_LENGTH] = { + "a", + "about", + "an", + "are", + "as", + "at", + "be", + "by", + "com", + "de", + "en", + "for", + "from", + "how", + "i", + "in", + "is", + "it", + "la", + "of", + "on", + "or", + "that", + "the", + "this", + "to", + "was", + "what", + "when", + "where", + "who", + "will", + "with", + "und", + "the", + "www" +}; + +class ObNoStopWordAddWord final : public lib::ObFTParserParam::ObIAddWord +{ +public: + ObNoStopWordAddWord( + const ObCollationType &type, + common::ObIAllocator &allocator, + common::ObIArray &word); + virtual ~ObNoStopWordAddWord() = default; + virtual int operator()( + lib::ObFTParserParam *param, + const char *word, + const int64_t word_len) override; + virtual int64_t get_add_word_count() const override { return word_count_; } + VIRTUAL_TO_STRING_KV(K_(collation_type), K_(word_count), K_(words)); +private: + OB_INLINE common::ObIArray &get_words() { return words_; } +private: + ObCollationType collation_type_; + common::ObIAllocator &allocator_; + common::ObIArray &words_; + int64_t word_count_; +}; + +class ObStopWordAddWord final : public lib::ObFTParserParam::ObIAddWord +{ +public: + ObStopWordAddWord( + const ObCollationType &type, + common::ObIAllocator &allocator, + common::ObIArray &word); + virtual ~ObStopWordAddWord() = default; + virtual int operator()( + lib::ObFTParserParam *param, + const char *word, + const int64_t word_len) override; + virtual int64_t get_add_word_count() const { return non_stopword_count_; } + VIRTUAL_TO_STRING_KV(K_(collation_type), K_(non_stopword_count), K_(stopword_count), K_(words)); +private: + ObCollationType collation_type_; + common::ObIAllocator &allocator_; + common::ObIArray &words_; + int64_t non_stopword_count_; + int64_t stopword_count_; +}; + +} // end namespace storage +} // end namespace oceanbase + +#endif // OB_FTS_STOP_WORD_H_ diff --git a/src/storage/fts/ob_fts_struct.h b/src/storage/fts/ob_fts_struct.h new file mode 100644 index 0000000000..c912103f92 --- /dev/null +++ b/src/storage/fts/ob_fts_struct.h @@ -0,0 +1,82 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OB_FTS_STRUCT_H_ +#define OB_FTS_STRUCT_H_ + +#include "lib/charset/ob_charset.h" + +namespace oceanbase +{ +namespace storage +{ + +class ObFTWord final +{ +public: + ObFTWord() : word_(), type_(ObCollationType::CS_TYPE_INVALID) {} + ObFTWord(const int64_t length, const char *ptr, const ObCollationType &type) : word_(length, ptr), type_(type) {} + ~ObFTWord() = default; + + OB_INLINE const common::ObString &get_word() const { return word_; } + OB_INLINE const ObCollationType &get_collation_type() const { return type_; } + OB_INLINE int hash(uint64_t &hash_val) const + { + hash_val = ObCharset::hash(type_, word_); + return common::OB_SUCCESS; + } + OB_INLINE uint64_t hash() const { return word_.hash(); } + OB_INLINE bool empty() const { return word_.empty(); } + + OB_INLINE bool operator ==(const ObFTWord &other) const + { + bool is_equal = false; + if (other.type_ == type_) { + is_equal = 0 == ObCharset::strcmp(type_, word_, other.word_); + } + return is_equal; + } + OB_INLINE bool operator !=(const ObFTWord &other) const { return !(other == *this); } + + TO_STRING_KV(K_(type), K_(word)); +private: + common::ObString word_; + ObCollationType type_; +}; + +class ObFTWordCount final +{ +public: + ObFTWordCount() : ft_word_(), word_cnt_(0) {} + ~ObFTWordCount() = default; + OB_INLINE int hash(uint64_t &hash_val) const + { + hash_val = hash(); + return common::OB_SUCCESS; + } + OB_INLINE uint64_t hash() const + { + int64_t hash_value = ft_word_.hash(); + hash_value = common::murmurhash(&word_cnt_, sizeof(word_cnt_), hash_value); + return hash_value; + } + OB_INLINE bool is_valid() const { return !ft_word_.empty() && word_cnt_ > 1; } + TO_STRING_KV(K_(ft_word), K_(word_cnt)); +public: + ObFTWord ft_word_; + int64_t word_cnt_; +}; + +} // end namespace storage +} // end namespace oceanbase + +#endif// OB_FTS_STRUCT_H_ diff --git a/src/storage/fts/ob_ngram_ft_parser.cpp b/src/storage/fts/ob_ngram_ft_parser.cpp new file mode 100644 index 0000000000..135ddc4668 --- /dev/null +++ b/src/storage/fts/ob_ngram_ft_parser.cpp @@ -0,0 +1,122 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE_FTS + +#include "lib/string/ob_string.h" +#include "ob_ngram_ft_parser.h" + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace storage +{ + +#define true_word_char(ctype, character) ((ctype) & (_MY_U | _MY_L | _MY_NMR) || (character) == '_') + +/*static*/ int ObNgramFTParser::segment( + lib::ObFTParserParam *param, + const char *fulltext, + const int64_t ft_len) +{ + int ret = OB_SUCCESS; + int64_t c_nums = 0; + const char *start = fulltext; + const char *next = start; + const char *end = start + ft_len; + if (OB_ISNULL(param) || OB_ISNULL(fulltext) || OB_UNLIKELY(ft_len <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KP(param), KP(fulltext), K(ft_len)); + } else { + const ObCharsetInfo *cs = param->cs_; + while (OB_SUCC(ret) && next < end) { + const int64_t c_len = ob_mbcharlen_ptr(cs, next, end); + if (next + c_len > end || 0 == c_len) { // if char is invalid, just skip the rest of doc. + break; + } else { + int ctype; + cs->cset->ctype(cs, &ctype, (uchar *)next, (uchar *)end); + if (1 == c_len && (' ' == *next || !true_word_char(ctype, *next))) { + start = next + 1; + next = start; + c_nums = 0; + continue; + } + next += c_len; + ++c_nums; + } + if (NGRAM_TOKEN_SIZE == c_nums) { + if (OB_FAIL(add_word(param, start, next - start))) { + LOG_WARN("fail to add word", K(ret), KP(param), KP(start), KP(next)); + } else { + start += ob_mbcharlen_ptr(cs, start, end); + c_nums = NGRAM_TOKEN_SIZE - 1; + } + } + } + } + return ret; +} + +/*static*/ int ObNgramFTParser::add_word( + lib::ObFTParserParam *param, + const char *word, + int64_t word_len) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(param) + || OB_ISNULL(word) + || OB_UNLIKELY(0 >= word_len)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KPC(param), KP(word), K(word_len)); + } else if (OB_FAIL(param->add_word(param, word, word_len))) { + LOG_WARN("fail to add word", K(ret), KPC(param), K(ObString(word_len, word))); + } + return ret; +} + +ObNgramFTParserDesc::ObNgramFTParserDesc() + : is_inited_(false) +{ +} + +int ObNgramFTParserDesc::init(lib::ObPluginParam *param) +{ + is_inited_ = true; + return OB_SUCCESS; +} + +int ObNgramFTParserDesc::deinit(lib::ObPluginParam *param) +{ + reset(); + return OB_SUCCESS; +} + +int ObNgramFTParserDesc::segment(lib::ObFTParserParam *param) const +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ngram ft parser desc hasn't be initialized", K(ret), K(is_inited_)); + } else if (OB_ISNULL(param) || OB_ISNULL(param->fulltext_) || OB_UNLIKELY(!param->is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(param)); + } else if (OB_FAIL(ObNgramFTParser::segment(param, param->fulltext_, param->ft_length_))) { + LOG_WARN("fail to segment words for fulltext by ngram", K(ret), KPC(param), + K(param->fulltext_), K(param->ft_length_)); + } + return ret; +} + +} // end namespace storage +} // end namespace oceanbase diff --git a/src/storage/fts/ob_ngram_ft_parser.h b/src/storage/fts/ob_ngram_ft_parser.h new file mode 100644 index 0000000000..64522b27b4 --- /dev/null +++ b/src/storage/fts/ob_ngram_ft_parser.h @@ -0,0 +1,63 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OB_NGRAM_FT_PARSER_H_ +#define OB_NGRAM_FT_PARSER_H_ + +#include "lib/ob_plugin.h" +#include "lib/utility/ob_macro_utils.h" +#include "lib/utility/ob_print_utils.h" + +namespace oceanbase +{ +namespace storage +{ + +class ObNgramFTParser final +{ +public: + static const int64_t NGRAM_TOKEN_SIZE = 2; // TODO: @jinzhu, please apply one system variable later, and keep the same as mysql. +public: + ObNgramFTParser() = default; + ~ObNgramFTParser() = default; + static int segment( + lib::ObFTParserParam *param, + const char *fulltext, + const int64_t ft_len); +private: + static int add_word( + lib::ObFTParserParam *param, + const char *word, + int64_t word_len); +private: + DISABLE_COPY_ASSIGN(ObNgramFTParser); +}; + +class ObNgramFTParserDesc final : public lib::ObIFTParserDesc +{ +public: + ObNgramFTParserDesc(); + virtual ~ObNgramFTParserDesc() = default; + virtual int init(lib::ObPluginParam *param) override; + virtual int deinit(lib::ObPluginParam *param) override; + virtual int segment(lib::ObFTParserParam *param) const override; + OB_INLINE void reset() { is_inited_ = false; } +private: + bool is_inited_; +}; + +static ObNgramFTParserDesc ngram_parser; + +} // end namespace storage +} // end namespace oceanbase + +#endif // OB_NGRAM_FT_PARSER_H_ diff --git a/src/storage/fts/ob_text_retrieval_iterator.cpp b/src/storage/fts/ob_text_retrieval_iterator.cpp new file mode 100644 index 0000000000..46edc7d8cc --- /dev/null +++ b/src/storage/fts/ob_text_retrieval_iterator.cpp @@ -0,0 +1,683 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE_FTS + +#include "sql/engine/expr/ob_expr_bm25.h" +#include "sql/das/ob_text_retrieval_op.h" +#include "storage/fts/ob_text_retrieval_iterator.h" +#include "storage/tx_storage/ob_access_service.h" + +namespace oceanbase +{ +namespace storage +{ + +bool ObTokenRetrievalParam::need_relevance() const +{ + OB_ASSERT(nullptr != ir_ctdef_); + return ir_ctdef_->need_calc_relevance(); +} +const share::ObLSID &ObTokenRetrievalParam::get_ls_id() const +{ + return ls_id_; +} + +const sql::ObDASIRScanCtDef *ObTokenRetrievalParam::get_ir_ctdef() const +{ + return ir_ctdef_; +} + +sql::ObDASIRScanRtDef *ObTokenRetrievalParam::get_ir_rtdef() +{ + return ir_rtdef_; +} +const sql::ObDASScanCtDef *ObTokenRetrievalParam::get_inv_idx_scan_ctdef() const +{ + OB_ASSERT(nullptr != ir_ctdef_); + return ir_ctdef_->get_inv_idx_scan_ctdef(); +} + +const sql::ObDASScanCtDef *ObTokenRetrievalParam::get_inv_idx_agg_ctdef() const +{ + OB_ASSERT(nullptr != ir_ctdef_); + return ir_ctdef_->get_inv_idx_agg_ctdef(); +} + +const sql::ObDASScanCtDef *ObTokenRetrievalParam::get_fwd_idx_agg_ctdef() const +{ + OB_ASSERT(nullptr != ir_ctdef_); + return ir_ctdef_->get_fwd_idx_agg_ctdef(); +} + +const sql::ObDASScanCtDef *ObTokenRetrievalParam::get_doc_id_idx_agg_ctdef() const +{ + OB_ASSERT(nullptr != ir_ctdef_); + return ir_ctdef_->get_doc_id_idx_agg_ctdef(); +} + +const common::ObTabletID &ObTokenRetrievalParam::get_inv_idx_tablet_id() const +{ + return inv_idx_tablet_id_; +} +const common::ObTabletID &ObTokenRetrievalParam::get_fwd_idx_tablet_id() const +{ + return fwd_idx_tablet_id_; +} + +const common::ObTabletID &ObTokenRetrievalParam::get_doc_id_idx_tablet_id() const +{ + return doc_id_idx_tablet_id_; +} + +ObTextRetrievalIterator::ObTextRetrievalIterator() + : ObNewRowIterator(), + mem_context_(nullptr), + retrieval_param_(nullptr), + tx_desc_(nullptr), + snapshot_(nullptr), + inv_idx_scan_param_(), + inv_idx_agg_param_(), + fwd_idx_scan_param_(), + calc_exprs_(), + inverted_idx_iter_(nullptr), + forward_idx_iter_(nullptr), + fwd_range_objs_(nullptr), + doc_token_cnt_expr_(nullptr), + need_fwd_idx_agg_(false), + need_inv_idx_agg_(false), + inv_idx_agg_evaluated_(false), + is_inited_(false) +{ +} + +ObTextRetrievalIterator::~ObTextRetrievalIterator() +{ + reset(); +} + +int ObTextRetrievalIterator::init( + ObTokenRetrievalParam &retrieval_param, + const ObString &query_token, + transaction::ObTxDesc *tx_desc, + transaction::ObTxReadSnapshot *snapshot) +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("double initialization", K(ret), KPC(this)); + } else if (OB_UNLIKELY(nullptr == tx_desc || nullptr == snapshot || !tx_desc->is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(tx_desc), KPC(snapshot)); + } else { + retrieval_param_ = &retrieval_param; + tx_desc_ = tx_desc; + snapshot_ = snapshot; + need_fwd_idx_agg_ = retrieval_param.get_ir_ctdef()->has_fwd_agg_; + need_inv_idx_agg_ = retrieval_param.need_relevance(); + + if (OB_ISNULL(mem_context_)) { + lib::ContextParam param; + param.set_mem_attr(MTL_ID(), "TextIRIter", ObCtxIds::DEFAULT_CTX_ID); + if (OB_FAIL(CURRENT_CONTEXT->CREATE_CONTEXT(mem_context_, param))) { + LOG_WARN("failed to create text retrieval iterator memory context", K(ret)); + } + } + + if (FAILEDx(init_inv_idx_scan_param(query_token))) { + LOG_WARN("failed to init inverted index scan param", K(ret), K_(inv_idx_scan_param), K_(inv_idx_agg_param)); + } else if (need_fwd_idx_agg_ && OB_FAIL(init_fwd_idx_scan_param())) { + LOG_WARN("failed to init forward index scan param", K(ret), K_(fwd_idx_scan_param)); + } else if (OB_FAIL(init_calc_exprs())) { + LOG_WARN("failed to init row-wise calc exprs", K(ret)); + } else { + is_inited_ = true; + } + } + return ret; +} + +void ObTextRetrievalIterator::reset() +{ + int ret = OB_SUCCESS; + ObAccessService *tsc_service = MTL(ObAccessService *); + if (nullptr == tsc_service) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("failed to get access service when reset text retrieval iterator", K(ret)); + } else { + if (nullptr != inverted_idx_iter_) { + if (OB_FAIL(tsc_service->revert_scan_iter(inverted_idx_iter_))) { + LOG_ERROR("failed to revert inverted index iter", K(ret)); + } + inverted_idx_iter_ = nullptr; + } + if (nullptr != forward_idx_iter_) { + if (OB_FAIL(tsc_service->revert_scan_iter(forward_idx_iter_))) { + LOG_ERROR("failed to revert forward index iter", K(ret)); + } + forward_idx_iter_ = nullptr; + } + } + inv_idx_scan_param_.need_switch_param_ = false; + inv_idx_scan_param_.destroy_schema_guard(); + inv_idx_agg_param_.need_switch_param_ = false; + inv_idx_agg_param_.destroy_schema_guard(); + fwd_idx_scan_param_.need_switch_param_ = false; + fwd_idx_scan_param_.destroy_schema_guard(); + calc_exprs_.reset(); + + if (nullptr != mem_context_) { + mem_context_->reset_remain_one_page(); + DESTROY_CONTEXT(mem_context_); + mem_context_ = nullptr; + } + fwd_range_objs_ = nullptr; + doc_token_cnt_expr_ = nullptr; + retrieval_param_ = nullptr; + tx_desc_ = nullptr; + snapshot_ = nullptr; + need_fwd_idx_agg_ = false; + need_inv_idx_agg_ = false; + inv_idx_agg_evaluated_ = false; + is_inited_ = false; +} + +int ObTextRetrievalIterator::get_next_row(ObNewRow *&row) +{ + UNUSED(row); + return OB_NOT_IMPLEMENT; +} + +int ObTextRetrievalIterator::get_next_row() +{ + int ret = OB_SUCCESS; + ObAccessService *tsc_service = MTL(ObAccessService *); + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("retrieval iterator not inited", K(ret)); + } else if (!inv_idx_agg_evaluated_) { + if (OB_FAIL(do_doc_cnt_agg())) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("Fail to do document count aggregation", K(ret), K_(inv_idx_agg_param)); + } + } else if (OB_FAIL(tsc_service->revert_scan_iter(inverted_idx_iter_))) { + LOG_WARN("Fail to revert inverted index scan iterator after count aggregation", K(ret)); + } else if (FALSE_IT(inverted_idx_iter_ = nullptr)) { + } else if (OB_FAIL(tsc_service->table_scan(inv_idx_scan_param_, inverted_idx_iter_))) { + LOG_WARN("failed to init inverted index scan iterator", K(ret)); + } else { + inv_idx_agg_evaluated_ = true; + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(get_next_single_row(inv_idx_scan_param_.op_->is_vectorized(), inverted_idx_iter_))) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to get next row from inverted index", K(ret), K_(inv_idx_scan_param), KPC_(inverted_idx_iter)); + } + } else { + LOG_DEBUG("get one invert index scan row", "row", + ROWEXPR2STR(*retrieval_param_->get_ir_rtdef()->get_inv_idx_scan_rtdef()->eval_ctx_, + *inv_idx_scan_param_.output_exprs_)); + clear_row_wise_evaluated_flag(); + if (OB_FAIL(get_next_doc_token_cnt(need_fwd_idx_agg_))) { + LOG_WARN("failed to get next doc token count", K(ret)); + } else if (OB_FAIL(project_relevance_expr())) { + LOG_WARN("failed to evaluate simarity expr", K(ret)); + } + } + return ret; +} + +int ObTextRetrievalIterator::get_next_rows(int64_t &count, int64_t capacity) +{ + UNUSEDx(count, capacity); + return OB_NOT_IMPLEMENT; +} + +int ObTextRetrievalIterator::get_curr_iter_row( + const sql::ExprFixedArray *&curr_row, + sql::ObEvalCtx *&curr_eval_ctx) +{ + UNUSEDx(curr_row, curr_eval_ctx); + return OB_NOT_IMPLEMENT; +} + +int ObTextRetrievalIterator::get_curr_doc_id() +{ + return OB_NOT_IMPLEMENT; +} + +int ObTextRetrievalIterator::forward_to_doc(const ObDocId &doc_id) +{ + UNUSED(doc_id); + return OB_NOT_IMPLEMENT; +} + +int ObTextRetrievalIterator::init_inv_idx_scan_param(const ObString &query_token) +{ + int ret = OB_SUCCESS; + ObNewRange inv_idx_scan_range; + if (OB_FAIL(gen_inv_idx_scan_range(query_token, inv_idx_scan_range))) { + LOG_WARN("failed to generate inverted index scan range", K(ret), K(query_token)); + } else if (OB_FAIL(init_base_idx_scan_param( + retrieval_param_->get_ls_id(), + retrieval_param_->get_inv_idx_tablet_id(), + retrieval_param_->get_inv_idx_scan_ctdef(), + retrieval_param_->get_ir_rtdef()->get_inv_idx_scan_rtdef(), + tx_desc_, + snapshot_, + inv_idx_scan_param_))) { + LOG_WARN("fail to init inverted index scan param", K(ret), KPC_(retrieval_param)); + } else if (OB_FAIL(inv_idx_scan_param_.key_ranges_.push_back(inv_idx_scan_range))) { + LOG_WARN("failed to append scan range", K(ret)); + } + + if (OB_SUCC(ret) && need_inv_idx_agg_) { + if (OB_FAIL(init_base_idx_scan_param( + retrieval_param_->get_ls_id(), + retrieval_param_->get_inv_idx_tablet_id(), + retrieval_param_->get_inv_idx_agg_ctdef(), + retrieval_param_->get_ir_rtdef()->get_inv_idx_agg_rtdef(), + tx_desc_, + snapshot_, + inv_idx_agg_param_))) { + LOG_WARN("fail to init inverted index count aggregate param", K(ret), KPC_(retrieval_param)); + } else if (OB_FAIL(inv_idx_agg_param_.key_ranges_.push_back(inv_idx_scan_range))) { + LOG_WARN("failed to append scan range", K(ret)); + } else { + if (OB_UNLIKELY(!static_cast( + retrieval_param_->get_inv_idx_agg_ctdef()->pd_expr_spec_.pd_storage_flag_).is_aggregate_pushdown())) { + ret = OB_NOT_IMPLEMENT; + LOG_ERROR("not pushdown aggregate not supported", K(ret), K_(retrieval_param)); + } + } + } + + return ret; +} + +int ObTextRetrievalIterator::init_fwd_idx_scan_param() +{ + int ret = OB_SUCCESS; + + if (!retrieval_param_->need_relevance()) { + } else if (OB_FAIL(init_base_idx_scan_param( + retrieval_param_->get_ls_id(), + retrieval_param_->get_fwd_idx_tablet_id(), + retrieval_param_->get_fwd_idx_agg_ctdef(), + retrieval_param_->get_ir_rtdef()->get_fwd_idx_agg_rtdef(), + tx_desc_, + snapshot_, + fwd_idx_scan_param_))) { + LOG_WARN("Fail to init foward index scan param", K(ret), KPC_(retrieval_param)); + } + return ret; +} + +int ObTextRetrievalIterator::init_base_idx_scan_param( + const share::ObLSID &ls_id, + const common::ObTabletID &tablet_id, + const sql::ObDASScanCtDef *ctdef, + sql::ObDASScanRtDef *rtdef, + transaction::ObTxDesc *tx_desc, + transaction::ObTxReadSnapshot *snapshot, + ObTableScanParam &scan_param) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(ctdef) || OB_ISNULL(rtdef)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(ctdef), KPC(rtdef), K(ls_id), K(tablet_id)); + } else { + uint64_t tenant_id = MTL_ID(); + scan_param.tenant_id_ = tenant_id; + scan_param.key_ranges_.set_attr(ObMemAttr(tenant_id, "ScanParamKR")); + scan_param.ss_key_ranges_.set_attr(ObMemAttr(tenant_id, "ScanParamSSKR")); + scan_param.tx_lock_timeout_ = rtdef->tx_lock_timeout_; + scan_param.index_id_ = ctdef->ref_table_id_; + scan_param.is_get_ = false; // scan + scan_param.is_for_foreign_check_ = false; + scan_param.timeout_ = rtdef->timeout_ts_; + scan_param.scan_flag_ = rtdef->scan_flag_; + scan_param.reserved_cell_count_ = ctdef->access_column_ids_.count(); + scan_param.allocator_ = &rtdef->stmt_allocator_; + scan_param.scan_allocator_ = &rtdef->scan_allocator_; + scan_param.sql_mode_ = rtdef->sql_mode_; + scan_param.frozen_version_ = rtdef->frozen_version_; + scan_param.force_refresh_lc_ = rtdef->force_refresh_lc_; + scan_param.output_exprs_ = &(ctdef->pd_expr_spec_.access_exprs_); + scan_param.calc_exprs_ = &(ctdef->pd_expr_spec_.calc_exprs_); + scan_param.aggregate_exprs_ = &(ctdef->pd_expr_spec_.pd_storage_aggregate_output_); + scan_param.table_param_ = &(ctdef->table_param_); + scan_param.op_ = rtdef->p_pd_expr_op_; + scan_param.row2exprs_projector_ = rtdef->p_row2exprs_projector_; + scan_param.schema_version_ = ctdef->schema_version_; + scan_param.tenant_schema_version_ = rtdef->tenant_schema_version_; + scan_param.limit_param_ = rtdef->limit_param_; + scan_param.need_scn_ = rtdef->need_scn_; + scan_param.pd_storage_flag_ = ctdef->pd_expr_spec_.pd_storage_flag_.pd_flag_; + scan_param.fb_snapshot_ = rtdef->fb_snapshot_; + scan_param.fb_read_tx_uncommitted_ = rtdef->fb_read_tx_uncommitted_; + scan_param.ls_id_ = ls_id; + scan_param.tablet_id_ = tablet_id; + if (!ctdef->pd_expr_spec_.pushdown_filters_.empty()) { + scan_param.op_filters_ = &ctdef->pd_expr_spec_.pushdown_filters_; + } + scan_param.pd_storage_filters_ = rtdef->p_pd_expr_op_->pd_storage_filters_; + if (OB_NOT_NULL(tx_desc)) { + scan_param.tx_id_ = tx_desc->get_tx_id(); + } else { + scan_param.tx_id_.reset(); + } + + if (OB_NOT_NULL(snapshot)) { + scan_param.snapshot_ = *snapshot; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("null snapshot", K(ret), KP(snapshot)); + } + + if (FAILEDx(scan_param.column_ids_.assign(ctdef->access_column_ids_))) { + LOG_WARN("failed to init column ids", K(ret)); + } + } + return ret; +} + +int ObTextRetrievalIterator::do_doc_cnt_agg() +{ + int ret = OB_SUCCESS; + ObAccessService *tsc_service = MTL(ObAccessService *); + if (OB_ISNULL(tsc_service)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get table access service", K(ret)); + } else if (OB_FAIL(tsc_service->table_scan(inv_idx_agg_param_, inverted_idx_iter_))) { + if (OB_SNAPSHOT_DISCARDED == ret && inv_idx_agg_param_.fb_snapshot_.is_valid()) { + ret = OB_INVALID_QUERY_TIMESTAMP; + } else if (OB_TRY_LOCK_ROW_CONFLICT != ret) { + LOG_WARN("failed to do table scan for document count aggregation", K(ret)); + } + } else { + if (OB_UNLIKELY(!static_cast(inv_idx_agg_param_.pd_storage_flag_).is_aggregate_pushdown())) { + ret = OB_NOT_SUPPORTED; + LOG_ERROR("aggregate without pushdown not supported", K(ret)); + } else if (OB_FAIL(get_next_single_row(inv_idx_agg_param_.op_->is_vectorized(), inverted_idx_iter_))) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to get aggregated row from iter", K(ret)); + } + } + } + return ret; +} + +int ObTextRetrievalIterator::get_next_doc_token_cnt(const bool use_fwd_idx_agg) +{ + int ret = OB_SUCCESS; + if (use_fwd_idx_agg) { + common::ObDocId cur_doc_id; + int64_t token_cnt = 0; + if (OB_FAIL(get_inv_idx_scan_doc_id(cur_doc_id))) { + LOG_WARN("failed to get current doc id", K(ret)); + } else if (OB_FAIL(do_token_cnt_agg(cur_doc_id, token_cnt))) { + LOG_WARN("failed to do token count agg on fwd index", K(ret)); + } + } else { + if (OB_FAIL(fill_token_cnt_with_doc_len())) { + LOG_WARN("failed to fill token cnt with document length", K(ret)); + } + } + return ret; +} + +int ObTextRetrievalIterator::get_inv_idx_scan_doc_id(ObDocId &doc_id) +{ + int ret = OB_SUCCESS; + sql::ObExpr *doc_id_expr = retrieval_param_->get_ir_ctdef()->inv_scan_doc_id_col_; + sql::ObEvalCtx *eval_ctx = retrieval_param_->get_ir_rtdef()->get_inv_idx_scan_rtdef()->eval_ctx_; + ObDatum &doc_id_datum = doc_id_expr->locate_expr_datum(*eval_ctx); + if (OB_FAIL(doc_id.from_string(doc_id_datum.get_string()))) { + LOG_WARN("failed to get ObDocId from datum", K(ret)); + } + + return ret; +} + +int ObTextRetrievalIterator::do_token_cnt_agg(const ObDocId &doc_id, int64_t &token_count) +{ + int ret = OB_SUCCESS; + + token_count = 0; + ObNewRange scan_range; + if (OB_FAIL(reuse_fwd_idx_iter())) { + LOG_WARN("failed to reuse forward index iterator", K(ret)); + } else if (OB_UNLIKELY(!fwd_idx_scan_param_.key_ranges_.empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected non empty forward index scan range", K(ret)); + } else if (OB_FAIL(gen_fwd_idx_scan_range(doc_id, scan_range))) { + LOG_WARN("failed to generate forward index scan range", K(ret)); + } else if (OB_FAIL(fwd_idx_scan_param_.key_ranges_.push_back(scan_range))) { + LOG_WARN("failed to add forward index scan range", K(ret), K(scan_range)); + } + + if (OB_SUCC(ret)) { + ObAccessService *tsc_service = MTL(ObAccessService *); + if (OB_ISNULL(tsc_service)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get table access service", K(ret)); + } else if (nullptr == forward_idx_iter_) { + if (OB_FAIL(tsc_service->table_scan(fwd_idx_scan_param_, forward_idx_iter_))) { + if (OB_SNAPSHOT_DISCARDED == ret && fwd_idx_scan_param_.fb_snapshot_.is_valid()) { + ret = OB_INVALID_QUERY_TIMESTAMP; + } else if (OB_TRY_LOCK_ROW_CONFLICT != ret) { + LOG_WARN("failed to init forward index scan iterator", K(ret), K_(fwd_idx_scan_param)); + } + } + } else { + const ObTabletID &storage_tablet_id = fwd_idx_scan_param_.tablet_id_; + const bool need_switch_param = + storage_tablet_id.is_valid() && storage_tablet_id != retrieval_param_->get_fwd_idx_tablet_id(); + fwd_idx_scan_param_.need_switch_param_ = need_switch_param; + if (OB_FAIL(tsc_service->reuse_scan_iter(need_switch_param, forward_idx_iter_))) { + LOG_WARN("failed to reuse scan iter", K(ret)); + } else if (OB_FAIL(tsc_service->table_rescan(fwd_idx_scan_param_, forward_idx_iter_))) { + LOG_WARN("failed to rescan forward index table", K(ret), K_(fwd_idx_scan_param)); + } + } + + if (OB_SUCC(ret)) { + if (!static_cast( + retrieval_param_->get_fwd_idx_agg_ctdef()->pd_expr_spec_.pd_storage_flag_).is_aggregate_pushdown()) { + ret = OB_NOT_IMPLEMENT; + LOG_ERROR("aggregate without pushdown not implemented", K(ret)); + } else { + if (OB_FAIL(forward_idx_iter_->get_next_row())) { + LOG_WARN("failed to get next row from forward index iterator", K(ret)); + } else { + const sql::ObExpr *agg_expr = retrieval_param_->get_fwd_idx_agg_ctdef()->pd_expr_spec_.pd_storage_aggregate_output_.at(0); + sql::ObEvalCtx *eval_ctx = retrieval_param_->get_ir_rtdef()->get_fwd_idx_agg_rtdef()->eval_ctx_; + const ObDatum &word_cnt_datum = agg_expr->locate_expr_datum(*eval_ctx); + token_count = word_cnt_datum.get_int(); + LOG_DEBUG("retrieval iterator get token cnt for doc", K(ret), K(doc_id), K(token_count)); + } + } + } + } + + return ret; +} + +int ObTextRetrievalIterator::fill_token_cnt_with_doc_len() +{ + int ret = OB_SUCCESS; + const sql::ObExpr *agg_expr = doc_token_cnt_expr_; + const sql::ObExpr *doc_length_expr = retrieval_param_->get_ir_ctdef()->inv_scan_doc_length_col_; + sql::ObEvalCtx *eval_ctx = retrieval_param_->get_ir_rtdef()->eval_ctx_; + ObDatum *doc_length_datum = nullptr; + if (OB_ISNULL(agg_expr) || OB_ISNULL(doc_length_expr) || OB_ISNULL(eval_ctx) + || OB_UNLIKELY(agg_expr->datum_meta_.get_type() != ObDecimalIntType)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null expr", K(ret), KPC(agg_expr), KP(doc_length_expr), KP(eval_ctx)); + } else if (OB_FAIL(doc_length_expr->eval(*eval_ctx, doc_length_datum))) { + LOG_WARN("failed to evaluate document length expr", K(ret)); + } else { + ObDatum &agg_datum = agg_expr->locate_datum_for_write(*eval_ctx); + agg_datum.set_decimal_int(doc_length_datum->get_uint()); + } + return ret; +} + +int ObTextRetrievalIterator::project_relevance_expr() +{ + int ret = OB_SUCCESS; + const sql::ObDASIRScanRtDef *ir_rtdef = retrieval_param_->get_ir_rtdef(); + sql::ObExpr *relevance_expr = retrieval_param_->get_ir_ctdef()->relevance_expr_; + ObDatum *relevance_datum = nullptr; + if (OB_ISNULL(relevance_expr)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid relevance expr", K(ret)); + } else if (OB_FAIL(relevance_expr->eval(*ir_rtdef->eval_ctx_, relevance_datum))) { + LOG_WARN("failed to evaluate relevance", K(ret)); + } + return ret; +} + +int ObTextRetrievalIterator::reuse_fwd_idx_iter() +{ + int ret = OB_SUCCESS; + if (nullptr != forward_idx_iter_) { + fwd_idx_scan_param_.key_ranges_.reuse(); + } + return ret; +} + +int ObTextRetrievalIterator::gen_inv_idx_scan_range(const ObString &query_token, ObNewRange &scan_range) +{ + int ret = OB_SUCCESS; + void *buf = nullptr; + ObObj *obj_ptr = nullptr; + common::ObArenaAllocator &ctx_alloc = mem_context_->get_arena_allocator(); + constexpr int64_t obj_cnt = INV_IDX_ROWKEY_COL_CNT * 2; + ObObj tmp_obj; + tmp_obj.set_string(ObVarcharType, query_token); + // We need to ensure collation type / level between query text and token column is compatible + tmp_obj.set_meta_type(retrieval_param_->get_ir_ctdef()->search_text_->obj_meta_); + + if (OB_ISNULL(buf = ctx_alloc.alloc(sizeof(ObObj) * obj_cnt))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory for rowkey obj", K(ret)); + } else if (OB_ISNULL(obj_ptr = new (buf) ObObj[obj_cnt])) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(ret)); + } else if (OB_FAIL(ob_write_obj(ctx_alloc, tmp_obj, obj_ptr[0]))) { + LOG_WARN("failed to write obj", K(ret)); + } else if (OB_FAIL(ob_write_obj(ctx_alloc, tmp_obj, obj_ptr[2]))) { + LOG_WARN("failed to write obj", K(ret)); + } else { + obj_ptr[1].set_min_value(); + obj_ptr[3].set_max_value(); + ObRowkey start_key(obj_ptr, INV_IDX_ROWKEY_COL_CNT); + ObRowkey end_key(&obj_ptr[2], INV_IDX_ROWKEY_COL_CNT); + common::ObTableID inv_table_id = retrieval_param_->get_inv_idx_scan_ctdef()->ref_table_id_; + scan_range.table_id_ = inv_table_id; + scan_range.start_key_.assign(obj_ptr, INV_IDX_ROWKEY_COL_CNT); + scan_range.end_key_.assign(&obj_ptr[2], INV_IDX_ROWKEY_COL_CNT); + scan_range.border_flag_.set_inclusive_start(); + scan_range.border_flag_.set_inclusive_end(); + } + return ret; +} + +int ObTextRetrievalIterator::gen_fwd_idx_scan_range(const ObDocId &doc_id, ObNewRange &scan_range) +{ + int ret = OB_SUCCESS; + if (nullptr == fwd_range_objs_) { + void *buf = nullptr; + common::ObArenaAllocator &ctx_alloc = mem_context_->get_arena_allocator(); + constexpr int64_t obj_cnt = FWD_IDX_ROWKEY_COL_CNT * 2; + if (OB_ISNULL(buf = ctx_alloc.alloc(sizeof(ObObj) * obj_cnt))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory for rowkey obj", K(ret)); + } else if (OB_ISNULL(fwd_range_objs_ = new (buf) ObObj[obj_cnt])) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret)); + } + } + if (OB_SUCC(ret)) { + fwd_range_objs_[0].set_varbinary(doc_id.get_string()); + fwd_range_objs_[1].set_min_value(); + fwd_range_objs_[2].set_varbinary(doc_id.get_string()); + fwd_range_objs_[3].set_max_value(); + scan_range.table_id_ = retrieval_param_->get_fwd_idx_agg_ctdef()->ref_table_id_; + scan_range.start_key_.assign(fwd_range_objs_, FWD_IDX_ROWKEY_COL_CNT); + scan_range.end_key_.assign(&fwd_range_objs_[2], FWD_IDX_ROWKEY_COL_CNT); + scan_range.border_flag_.set_inclusive_start(); + scan_range.border_flag_.set_inclusive_end(); + } + return ret; +} + +int ObTextRetrievalIterator::init_calc_exprs() +{ + int ret = OB_SUCCESS; + if (retrieval_param_->get_ir_ctdef()->need_calc_relevance()) { + sql::ObExpr *relevance_expr = retrieval_param_->get_ir_ctdef()->relevance_expr_; + sql::ObEvalCtx *eval_ctx = retrieval_param_->get_ir_rtdef()->eval_ctx_; + if (OB_ISNULL(relevance_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null relevance expr", K(ret)); + } else if (OB_FAIL(calc_exprs_.push_back(relevance_expr))) { + LOG_WARN("failed to append relevance expr", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < relevance_expr->arg_cnt_; ++i) { + sql::ObExpr *arg_expr = relevance_expr->args_[i]; + if (OB_ISNULL(arg_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null arg expr", K(ret)); + } else if (T_FUN_SYS_CAST == arg_expr->type_) { + // cast expr is evaluated with relevance expr + if (OB_FAIL(calc_exprs_.push_back(arg_expr))) { + LOG_WARN("failed to append cast expr", K(ret)); + } + } + } + + if (OB_SUCC(ret)) { + sql::ObExpr *doc_token_cnt_param_expr = relevance_expr->args_[sql::ObExprBM25::DOC_TOKEN_CNT_PARAM_IDX]; + if (T_FUN_SYS_CAST == doc_token_cnt_param_expr->type_) { + doc_token_cnt_param_expr = doc_token_cnt_param_expr->args_[0]; + } + if (OB_UNLIKELY(doc_token_cnt_param_expr->type_ != T_FUN_SUM)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected doc token cnt expr type", K(ret), KPC(doc_token_cnt_param_expr)); + } else { + doc_token_cnt_expr_ = doc_token_cnt_param_expr; + } + } + } + return ret; +} + +void ObTextRetrievalIterator::clear_row_wise_evaluated_flag() +{ + sql::ObEvalCtx *eval_ctx = retrieval_param_->get_ir_rtdef()->eval_ctx_; + for (int64_t i = 0; i < calc_exprs_.count(); ++i) { + sql::ObExpr *expr = calc_exprs_.at(i); + if (expr->is_batch_result()) { + expr->get_evaluated_flags(*eval_ctx).unset(eval_ctx->get_batch_idx()); + } else { + expr->get_eval_info(*eval_ctx).clear_evaluated_flag(); + } + } +} + +} // end storage +} // end oceanbase diff --git a/src/storage/fts/ob_text_retrieval_iterator.h b/src/storage/fts/ob_text_retrieval_iterator.h new file mode 100644 index 0000000000..2d3cd88dc3 --- /dev/null +++ b/src/storage/fts/ob_text_retrieval_iterator.h @@ -0,0 +1,175 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OB_TEXT_RETRIEVAL_ITERATOR_H_ +#define OB_TEXT_RETRIEVAL_ITERATOR_H_ + +#include "common/row/ob_row_iterator.h" +#include "storage/access/ob_dml_param.h" + + +namespace oceanbase +{ +namespace sql +{ +struct ObDASScanCtDef; +struct ObDASScanRtDef; +struct ObDASIRScanCtDef; +struct ObDASIRScanRtDef; +} +namespace storage +{ + +struct ObTokenRetrievalParam +{ +public: + ObTokenRetrievalParam() + : app_avg_tablet_doc_token_cnt_(0), + ls_id_(), + inv_idx_tablet_id_(), + fwd_idx_tablet_id_(), + doc_id_idx_tablet_id_(), + ir_ctdef_(nullptr), + ir_rtdef_(nullptr) + {} + ~ObTokenRetrievalParam() {} + + bool need_relevance() const; + const share::ObLSID &get_ls_id() const; + const sql::ObDASIRScanCtDef *get_ir_ctdef() const; + sql::ObDASIRScanRtDef *get_ir_rtdef(); + const sql::ObDASScanCtDef *get_inv_idx_scan_ctdef() const; + const sql::ObDASScanCtDef *get_inv_idx_agg_ctdef() const; + const sql::ObDASScanCtDef *get_fwd_idx_agg_ctdef() const; + const sql::ObDASScanCtDef *get_doc_id_idx_agg_ctdef() const; + const common::ObTabletID &get_inv_idx_tablet_id() const; + const common::ObTabletID &get_fwd_idx_tablet_id() const; + const common::ObTabletID &get_doc_id_idx_tablet_id() const; + inline void set_param( + const share::ObLSID &ls_id, + const ObTabletID &inv_idx_tablet_id, + const ObTabletID &fwd_idx_tablet_id, + const ObTabletID &doc_id_idx_tablet_id, + const sql::ObDASIRScanCtDef *ir_ctdef, + sql::ObDASIRScanRtDef *ir_rtdef, + const int64_t approx_avg_token_cnt = 0) + { + ls_id_ = ls_id; + inv_idx_tablet_id_ = inv_idx_tablet_id; + fwd_idx_tablet_id_ = fwd_idx_tablet_id; + doc_id_idx_tablet_id_ = doc_id_idx_tablet_id; + ir_ctdef_ = ir_ctdef; + ir_rtdef_ = ir_rtdef; + } + + TO_STRING_KV(K_(app_avg_tablet_doc_token_cnt), + K_(ls_id), + K_(inv_idx_tablet_id), + K_(fwd_idx_tablet_id), + K_(doc_id_idx_tablet_id)); +private: + int64_t app_avg_tablet_doc_token_cnt_; // TODO: use app avg tablet doc token cnt to calc bm25 idf + share::ObLSID ls_id_; + common::ObTabletID inv_idx_tablet_id_; + common::ObTabletID fwd_idx_tablet_id_; + common::ObTabletID doc_id_idx_tablet_id_; + const sql::ObDASIRScanCtDef *ir_ctdef_; + sql::ObDASIRScanRtDef *ir_rtdef_; +}; + +// Single token retrieval iter +class ObTextRetrievalIterator : public common::ObNewRowIterator +{ +public: + ObTextRetrievalIterator(); + virtual ~ObTextRetrievalIterator(); + + int init( + ObTokenRetrievalParam &retrieval_param, + const ObString &query_token, + transaction::ObTxDesc *tx_desc, + transaction::ObTxReadSnapshot *snapshot); + + virtual int get_next_row(ObNewRow *&row) override; + virtual int get_next_row() override; + virtual int get_next_rows(int64_t &count, int64_t capacity) override; + virtual void reset() override; + + int get_curr_iter_row(const sql::ExprFixedArray *&curr_row, sql::ObEvalCtx *&curr_eval_ctx); + + int get_curr_doc_id(); + int forward_to_doc(const common::ObDocId &doc_id); // TODO: impl this primitive for skipping scan in conjunctive processing + + TO_STRING_KV(KPC_(retrieval_param), K_(is_inited)); +private: + int init_inv_idx_scan_param(const ObString &query_token); + int init_fwd_idx_scan_param(); + static int init_base_idx_scan_param( + const share::ObLSID &ls_id, + const common::ObTabletID &tablet_id, + const sql::ObDASScanCtDef *ctdef, + sql::ObDASScanRtDef *rtdef, + transaction::ObTxDesc *tx_desc, + transaction::ObTxReadSnapshot *snapshot, + ObTableScanParam &scan_param); + int get_next_doc_token_cnt(const bool use_fwd_idx_agg); + int do_doc_cnt_agg(); + int do_token_cnt_agg(const ObDocId &doc_id, int64_t &token_count); + int get_inv_idx_scan_doc_id(ObDocId &doc_id); + int fill_token_cnt_with_doc_len(); + int project_relevance_expr(); + int reuse_fwd_idx_iter(); + int gen_inv_idx_scan_range(const ObString &query_token, ObNewRange &scan_range); + int gen_fwd_idx_scan_range(const ObDocId &doc_id, ObNewRange &scan_range); + inline bool need_calc_relevance() { return true; } // TODO: reduce tsc ops if no need to calc relevance + int init_calc_exprs(); + void clear_row_wise_evaluated_flag(); + + // TODO: delete this after enable standard vectorized execution + inline int get_next_single_row(const bool is_vectorized, ObNewRowIterator *iter) + { + int ret = OB_SUCCESS; + if (is_vectorized) { + int64_t scan_row_cnt = 0; + ret = iter->get_next_rows(scan_row_cnt, 1); + } else { + ret = iter->get_next_row(); + } + return ret; + } +private: + static const int64_t FWD_IDX_ROWKEY_COL_CNT = 2; + static const int64_t INV_IDX_ROWKEY_COL_CNT = 2; +private: + lib::MemoryContext mem_context_; + ObTokenRetrievalParam *retrieval_param_; + transaction::ObTxDesc *tx_desc_; + transaction::ObTxReadSnapshot *snapshot_; + ObTableScanParam inv_idx_scan_param_; + ObTableScanParam inv_idx_agg_param_; + ObTableScanParam fwd_idx_scan_param_; + common::ObSEArray calc_exprs_; + common::ObNewRowIterator *inverted_idx_iter_; + common::ObNewRowIterator *forward_idx_iter_; + ObObj *fwd_range_objs_; + sql::ObExpr *doc_token_cnt_expr_; + bool need_fwd_idx_agg_; + bool need_inv_idx_agg_; + bool inv_idx_agg_evaluated_; + bool is_inited_; +}; + +} +} + + +#endif //OB_TEXT_RETRIEVAL_ITERATOR_H_ diff --git a/src/storage/fts/ob_whitespace_ft_parser.cpp b/src/storage/fts/ob_whitespace_ft_parser.cpp new file mode 100644 index 0000000000..a2776290d5 --- /dev/null +++ b/src/storage/fts/ob_whitespace_ft_parser.cpp @@ -0,0 +1,177 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE_FTS + +#include "lib/string/ob_string.h" +#include "storage/fts/ob_whitespace_ft_parser.h" + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace storage +{ + +/*static*/ int ObSpaceFTParser::segment( + lib::ObFTParserParam *param, + const char *ft, + const int64_t ft_len) +{ + int ret = OB_SUCCESS; + ObDatum doc; + doc.set_string(ft, ft_len); + ObSpaceFTParser parser; + share::ObITokenStream *token_stream = nullptr; + if (OB_ISNULL(param) || OB_ISNULL(param->cs_) || OB_ISNULL(ft) || OB_UNLIKELY(0 >= ft_len)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KPC(param), KP(ft), K(ft_len)); + } else if (OB_FAIL(parser.init(param))) { + LOG_WARN("fail to initialize space parser", K(ret), KPC(param)); + } else if (FALSE_IT(doc.set_string(ft, ft_len))) { + } else if (OB_FAIL(parser.segment(doc, token_stream))) { + LOG_WARN("fail to segment fulltext by parser", K(ret), KP(ft), K(ft_len)); + } else if (OB_ISNULL(token_stream)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("token stream is nullptr", K(ret), KP(token_stream)); + } else { + ObDatum token; + int64_t token_freq = 0; + while (OB_SUCC(ret)) { + if (OB_FAIL(token_stream->get_next(token, token_freq))) { + if (OB_ITER_END != ret) { + LOG_WARN("fail to get next token", K(ret), KPC(token_stream)); + } + } else if (OB_FAIL(add_word(param, param->allocator_, token.ptr_, token.len_))) { + LOG_WARN("fail to add word", K(ret), K(token), KPC(param)); + } + } + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + } + } + return ret; +} + +/*static*/ int ObSpaceFTParser::add_word( + lib::ObFTParserParam *param, + common::ObIAllocator *allocator, + const char *word, + int64_t word_len) +{ + int ret = OB_SUCCESS; + char *buf = nullptr; + if (OB_ISNULL(param) + || OB_ISNULL(allocator) + || OB_ISNULL(word) + || OB_UNLIKELY(0 >= word_len)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KPC(param), KP(allocator), KP(word), K(word_len)); + } else if (OB_ISNULL(buf = static_cast(allocator->alloc(word_len)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate word memory", K(ret), K(word_len)); + } else if (FALSE_IT(MEMCPY(buf, word, word_len))) { + } else if (OB_FAIL(param->add_word(param, buf, word_len))) { + LOG_WARN("fail to add word", K(ret), KPC(param), K(ObString(word_len, buf)), K(ObString(word_len, word))); + } else { + LOG_DEBUG("succeed to add word", K(ObString(word_len, word))); + } + return ret; +} + +int ObSpaceFTParser::init(lib::ObFTParserParam *param) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("init twice", K(ret), K(is_inited_)); + } else if (OB_ISNULL(param) || OB_UNLIKELY(!param->is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("param is nullptr", K(ret), KPC(param)); + } else if (OB_UNLIKELY(UINT32_MAX < param->ft_length_)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("too large document, english analyzer hasn't be supported", K(ret), K(param->ft_length_)); + } else { + analysis_ctx_.cs_ = param->cs_; + analysis_ctx_.filter_stopword_ = false; + analysis_ctx_.need_grouping_ = false; + if (OB_FAIL(english_analyzer_.init(analysis_ctx_, *param->allocator_))) { + LOG_WARN("fail to init english analyzer", K(ret), KPC(param), K(analysis_ctx_)); + } else { + is_inited_ = true; + } + } + if (OB_FAIL(ret) && OB_UNLIKELY(!is_inited_)) { + reset(); + } + return ret; +} + +int ObSpaceFTParser::segment( + const common::ObDatum &doc, + share::ObITokenStream *&token_stream) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(doc.ptr_) || OB_UNLIKELY(0 >= doc.len_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KP(doc.ptr_), K(doc.len_)); + } else if (OB_UNLIKELY(UINT32_MAX < doc.len_)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("too large document, english analyzer hasn't be supported", K(ret), K(doc.len_)); + } else if (OB_FAIL(english_analyzer_.analyze(doc, token_stream))) { + LOG_WARN("fail to analyze document", K(ret), K(english_analyzer_), KP(doc.ptr_), K(doc.len_)); + } + return ret; +} + +void ObSpaceFTParser::reset() +{ + analysis_ctx_.reset(); + english_analyzer_.reset(); + is_inited_ = false; +} + +ObWhiteSpaceFTParserDesc::ObWhiteSpaceFTParserDesc() + : is_inited_(false) +{ +} + +int ObWhiteSpaceFTParserDesc::init(lib::ObPluginParam *param) +{ + is_inited_ = true; + return OB_SUCCESS; +} + +int ObWhiteSpaceFTParserDesc::deinit(lib::ObPluginParam *param) +{ + reset(); + return OB_SUCCESS; +} + +int ObWhiteSpaceFTParserDesc::segment(lib::ObFTParserParam *param) const +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("default ft parser desc hasn't be initialized", K(ret), K(is_inited_)); + } else if (OB_ISNULL(param) || OB_ISNULL(param->fulltext_) || OB_UNLIKELY(!param->is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(param)); + } else if (OB_FAIL(ObSpaceFTParser::segment(param, param->fulltext_, param->ft_length_))) { + LOG_WARN("fail to segment words for fulltext by spaces", K(ret), KPC(param), + K(param->fulltext_), K(param->ft_length_)); + } + return ret; +} + +} // end namespace storage +} // end namespace oceanbase diff --git a/src/storage/fts/ob_whitespace_ft_parser.h b/src/storage/fts/ob_whitespace_ft_parser.h new file mode 100644 index 0000000000..5dff4464da --- /dev/null +++ b/src/storage/fts/ob_whitespace_ft_parser.h @@ -0,0 +1,80 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OB_DEFAULT_FT_PARSER_H_ +#define OB_DEFAULT_FT_PARSER_H_ + +#include "lib/ob_plugin.h" +#include "lib/utility/ob_macro_utils.h" +#include "lib/utility/ob_print_utils.h" +#include "share/text_analysis/ob_text_analyzer.h" + +namespace oceanbase +{ +namespace storage +{ + +class ObSpaceFTParser final +{ +public: + static int segment( + lib::ObFTParserParam *param, + const char *fulltext, + const int64_t ft_len); + +private: + ObSpaceFTParser() + : analysis_ctx_(), + english_analyzer_(), + is_inited_(false) + {} + ~ObSpaceFTParser() = default; + + static int add_word( + lib::ObFTParserParam *param, + common::ObIAllocator *allocator, + const char *word, + int64_t word_len); + int init(lib::ObFTParserParam *param); + void reset(); + int segment( + const common::ObDatum &doc, + share::ObITokenStream *&token_stream); + TO_STRING_KV(K_(analysis_ctx), K_(english_analyzer), K_(is_inited)); + +private: + share::ObTextAnalysisCtx analysis_ctx_; + share::ObEnglishTextAnalyzer english_analyzer_; + bool is_inited_; + + DISALLOW_COPY_AND_ASSIGN(ObSpaceFTParser); +}; + +class ObWhiteSpaceFTParserDesc final : public lib::ObIFTParserDesc +{ +public: + ObWhiteSpaceFTParserDesc(); + virtual ~ObWhiteSpaceFTParserDesc() = default; + virtual int init(lib::ObPluginParam *param) override; + virtual int deinit(lib::ObPluginParam *param) override; + virtual int segment(lib::ObFTParserParam *param) const override; + OB_INLINE void reset() { is_inited_ = false; } +private: + bool is_inited_; +}; + +static ObWhiteSpaceFTParserDesc whitespace_parser; + +} // end namespace storage +} // end namespace oceanbase + +#endif // OB_DEFAULT_FT_PARSER_H_ diff --git a/src/storage/ls/ob_ls_tablet_service.cpp b/src/storage/ls/ob_ls_tablet_service.cpp index 67bc09b476..bbfd375391 100644 --- a/src/storage/ls/ob_ls_tablet_service.cpp +++ b/src/storage/ls/ob_ls_tablet_service.cpp @@ -4218,7 +4218,7 @@ int ObLSTabletService::insert_lob_tablet_row( LOG_WARN("[STORAGE_LOB]failed to get lob manager handle.", K(ret)); } else if (row.row_val_.count_ != col_cnt) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("[STORAGE_LOB]column count invalid", K(ret), K(col_cnt), K(row.row_val_.count_)); + LOG_WARN("[STORAGE_LOB]column count invalid", K(ret), K(col_cnt), K(row.row_val_.count_), KPC(run_ctx.col_descs_)); } else { for (int64_t i = 0; OB_SUCC(ret) && i < col_cnt; ++i) { const ObColDesc &column = run_ctx.col_descs_->at(i); diff --git a/src/storage/meta_mem/ob_tablet_map_key.cpp b/src/storage/meta_mem/ob_tablet_map_key.cpp index 85e7eb8e4d..9ef8648166 100644 --- a/src/storage/meta_mem/ob_tablet_map_key.cpp +++ b/src/storage/meta_mem/ob_tablet_map_key.cpp @@ -56,4 +56,4 @@ uint64_t ObTabletMapKey::hash() const return hash_val; } } // namespace storage -} // namespace oceanbase \ No newline at end of file +} // namespace oceanbase diff --git a/src/storage/meta_mem/ob_tenant_meta_mem_mgr.cpp b/src/storage/meta_mem/ob_tenant_meta_mem_mgr.cpp index f50a248024..fa5c68c562 100644 --- a/src/storage/meta_mem/ob_tenant_meta_mem_mgr.cpp +++ b/src/storage/meta_mem/ob_tenant_meta_mem_mgr.cpp @@ -224,9 +224,7 @@ int ObTenantMetaMemMgr::mtl_new(ObTenantMetaMemMgr *&meta_mem_mgr) int ObTenantMetaMemMgr::init() { int ret = OB_SUCCESS; - lib::ObMemAttr mem_attr(tenant_id_, "MetaAllocator", ObCtxIds::META_OBJ_CTX_ID); - lib::ObMemAttr map_attr(tenant_id_, "TabletMap"); - lib::ObMemAttr other_attr(tenant_id_, "T3MOtherMem"); + const lib::ObMemAttr map_attr(tenant_id_, "TabletMap"); const int64_t mem_limit = 4 * 1024 * 1024 * 1024LL; const int64_t bucket_num = cal_adaptive_bucket_num(); const int64_t pin_set_bucket_num = common::hash::cal_next_prime(DEFAULT_BUCKET_NUM); diff --git a/src/storage/ob_relative_table.cpp b/src/storage/ob_relative_table.cpp index 265d5b69c4..9c93628ad9 100644 --- a/src/storage/ob_relative_table.cpp +++ b/src/storage/ob_relative_table.cpp @@ -483,6 +483,11 @@ bool ObRelativeTable::is_spatial_index() const return schema_param_->is_spatial_index(); } +bool ObRelativeTable::is_fts_index() const +{ + return schema_param_->is_fts_index(); +} + int ObRelativeTable::check_rowkey_in_column_ids( const common::ObIArray &column_ids, const bool has_other_column) const diff --git a/src/storage/ob_relative_table.h b/src/storage/ob_relative_table.h index b7fed7b201..6f884c9f3f 100644 --- a/src/storage/ob_relative_table.h +++ b/src/storage/ob_relative_table.h @@ -91,6 +91,7 @@ public: bool is_unique_index() const; bool is_domain_index() const; bool is_spatial_index() const; + bool is_fts_index() const; int check_rowkey_in_column_ids(const common::ObIArray &column_ids, const bool has_other_column) const; int build_index_row(const common::ObNewRow &table_row, diff --git a/src/storage/ob_storage_schema.h b/src/storage/ob_storage_schema.h index be074ccea0..f0c1273546 100644 --- a/src/storage/ob_storage_schema.h +++ b/src/storage/ob_storage_schema.h @@ -219,6 +219,7 @@ public: } inline bool is_materialized_view() const { return share::schema::ObTableSchema::is_materialized_view(table_type_); } inline bool is_mlog_table() const { return share::schema::ObTableSchema::is_mlog_table(table_type_); } + inline bool is_fts_index() const { return share::schema::is_fts_index(index_type_); } virtual inline bool is_global_index_table() const override { return share::schema::ObSimpleTableSchemaV2::is_global_index_table(index_type_); } virtual inline int64_t get_block_size() const override { return block_size_; } diff --git a/src/storage/tablet/ob_tablet_create_mds_helper.cpp b/src/storage/tablet/ob_tablet_create_mds_helper.cpp index dd76f0175a..2cd0e60cc5 100644 --- a/src/storage/tablet/ob_tablet_create_mds_helper.cpp +++ b/src/storage/tablet/ob_tablet_create_mds_helper.cpp @@ -621,6 +621,10 @@ bool ObTabletCreateMdsHelper::check_need_create_empty_major_sstable( bool need_create_empty_major_sstable = true; if (DATA_VERSION_4_3_0_0 <= create_tablet_extra_info.tenant_data_version_) { need_create_empty_major_sstable = create_tablet_extra_info.need_create_empty_major_; + // TODO: @jinzhu, please remove me later, after hanxuan implement fts ddl task for post-creating index. + if (create_tablet_schema.is_fts_index() && !create_tablet_schema.can_read_index()) { + need_create_empty_major_sstable = true; + } } else { need_create_empty_major_sstable = !(create_tablet_schema.is_user_hidden_table() || (create_tablet_schema.is_index_table() && !create_tablet_schema.can_read_index())); diff --git a/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_filter_mysql.result b/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_filter_mysql.result index 9c92edf247..81979ae7a4 100644 --- a/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_filter_mysql.result +++ b/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_filter_mysql.result @@ -83,7 +83,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |478 | +|0 |TABLE FULL SCAN|t(idx)|1 |355 | ================================================= Outputs & filters: ------------------------------------- @@ -125,17 +125,17 @@ Optimization Info: ------------------------------------- t: table_rows:8 - physical_range_rows:12 - logical_range_rows:12 - index_back_rows:3 + physical_range_rows:8 + logical_range_rows:8 + index_back_rows:2 output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -145,7 +145,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |478 | +|0 |TABLE FULL SCAN|t(idx)|1 |355 | ================================================= Outputs & filters: ------------------------------------- @@ -187,17 +187,17 @@ Optimization Info: ------------------------------------- t: table_rows:8 - physical_range_rows:12 - logical_range_rows:12 - index_back_rows:3 + physical_range_rows:8 + logical_range_rows:8 + index_back_rows:2 output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -207,7 +207,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |478 | +|0 |TABLE FULL SCAN|t(idx)|1 |355 | ================================================= Outputs & filters: ------------------------------------- @@ -249,17 +249,17 @@ Optimization Info: ------------------------------------- t: table_rows:8 - physical_range_rows:12 - logical_range_rows:12 - index_back_rows:3 + physical_range_rows:8 + logical_range_rows:8 + index_back_rows:2 output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -269,7 +269,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |879 | +|0 |TABLE FULL SCAN|t(idx)|1 |363 | ================================================= Outputs & filters: ------------------------------------- @@ -312,17 +312,17 @@ Optimization Info: ------------------------------------- t: table_rows:8 - physical_range_rows:26 - logical_range_rows:26 - index_back_rows:6 - output_rows:1 + physical_range_rows:8 + logical_range_rows:8 + index_back_rows:2 + output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -332,7 +332,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |478 | +|0 |TABLE FULL SCAN|t(idx)|1 |355 | ================================================= Outputs & filters: ------------------------------------- @@ -374,17 +374,17 @@ Optimization Info: ------------------------------------- t: table_rows:8 - physical_range_rows:12 - logical_range_rows:12 - index_back_rows:3 + physical_range_rows:8 + logical_range_rows:8 + index_back_rows:2 output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -394,7 +394,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |493 | +|0 |TABLE FULL SCAN|t(idx)|1 |359 | ================================================= Outputs & filters: ------------------------------------- @@ -437,17 +437,17 @@ Optimization Info: ------------------------------------- t: table_rows:8 - physical_range_rows:12 - logical_range_rows:12 - index_back_rows:3 + physical_range_rows:8 + logical_range_rows:8 + index_back_rows:2 output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -457,7 +457,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |799 | +|0 |TABLE FULL SCAN|t(idx)|1 |351 | ================================================= Outputs & filters: ------------------------------------- @@ -500,17 +500,17 @@ Optimization Info: ------------------------------------- t: table_rows:8 - physical_range_rows:24 - logical_range_rows:24 - index_back_rows:6 - output_rows:1 + physical_range_rows:8 + logical_range_rows:8 + index_back_rows:2 + output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -520,7 +520,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |478 | +|0 |TABLE FULL SCAN|t(idx)|1 |355 | ================================================= Outputs & filters: ------------------------------------- @@ -562,17 +562,17 @@ Optimization Info: ------------------------------------- t: table_rows:8 - physical_range_rows:12 - logical_range_rows:12 - index_back_rows:3 + physical_range_rows:8 + logical_range_rows:8 + index_back_rows:2 output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -641,7 +641,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|3 |185 | +|0 |TABLE FULL SCAN|t(idx)|2 |166 | ================================================= Outputs & filters: ------------------------------------- @@ -683,17 +683,17 @@ Optimization Info: ------------------------------------- t: table_rows:8 - physical_range_rows:12 - logical_range_rows:12 - index_back_rows:5 - output_rows:2 + physical_range_rows:8 + logical_range_rows:8 + index_back_rows:3 + output_rows:1 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -703,7 +703,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |453 | +|0 |TABLE FULL SCAN|t(idx)|1 |338 | ================================================= Outputs & filters: ------------------------------------- @@ -745,8 +745,8 @@ Optimization Info: ------------------------------------- t: table_rows:8 - physical_range_rows:12 - logical_range_rows:12 + physical_range_rows:8 + logical_range_rows:8 index_back_rows:0 output_rows:0 table_dop:1 @@ -754,8 +754,8 @@ Optimization Info: avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -972,7 +972,7 @@ Query Plan =========================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ----------------------------------------------------------- -|0 |TABLE FULL SCAN|geo_table2(geom)|1 |305 | +|0 |TABLE FULL SCAN|geo_table2(geom)|1 |187 | =========================================================== Outputs & filters: ------------------------------------- @@ -1016,17 +1016,17 @@ Optimization Info: ------------------------------------- geo_table2: table_rows:2 - physical_range_rows:6 - logical_range_rows:6 - index_back_rows:1 + physical_range_rows:2 + logical_range_rows:2 + index_back_rows:0 output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[geom, geo_table2] pruned_index_name:[geo_table2] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -1042,7 +1042,7 @@ Query Plan =========================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ----------------------------------------------------------- -|0 |TABLE FULL SCAN|geo_table2(geom)|1 |305 | +|0 |TABLE FULL SCAN|geo_table2(geom)|1 |187 | =========================================================== Outputs & filters: ------------------------------------- @@ -1086,17 +1086,17 @@ Optimization Info: ------------------------------------- geo_table2: table_rows:2 - physical_range_rows:6 - logical_range_rows:6 - index_back_rows:1 + physical_range_rows:2 + logical_range_rows:2 + index_back_rows:0 output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[geom, geo_table2] pruned_index_name:[geo_table2] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -1127,8 +1127,8 @@ Query Plan ============================================================ |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------------------ -|0 |SORT | |1 |305 | -|1 |└─TABLE FULL SCAN|geo_table(geom)|1 |305 | +|0 |SORT | |1 |299 | +|1 |└─TABLE FULL SCAN|geo_table(geom)|1 |299 | ============================================================ Outputs & filters: ------------------------------------- @@ -1181,8 +1181,8 @@ Optimization Info: avaiable_index_name:[geom, geo_table] pruned_index_name:[geo_table] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -1348,7 +1348,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |999 | +|0 |TABLE FULL SCAN|t(idx)|1 |523 | ================================================= Outputs & filters: ------------------------------------- @@ -1390,17 +1390,17 @@ Optimization Info: ------------------------------------- t: table_rows:14 - physical_range_rows:31 - logical_range_rows:31 - index_back_rows:7 - output_rows:1 + physical_range_rows:14 + logical_range_rows:14 + index_back_rows:3 + output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -1410,7 +1410,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |999 | +|0 |TABLE FULL SCAN|t(idx)|1 |523 | ================================================= Outputs & filters: ------------------------------------- @@ -1452,17 +1452,17 @@ Optimization Info: ------------------------------------- t: table_rows:14 - physical_range_rows:31 - logical_range_rows:31 - index_back_rows:7 - output_rows:1 + physical_range_rows:14 + logical_range_rows:14 + index_back_rows:3 + output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -1472,7 +1472,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |999 | +|0 |TABLE FULL SCAN|t(idx)|1 |523 | ================================================= Outputs & filters: ------------------------------------- @@ -1514,17 +1514,17 @@ Optimization Info: ------------------------------------- t: table_rows:14 - physical_range_rows:31 - logical_range_rows:31 - index_back_rows:7 - output_rows:1 + physical_range_rows:14 + logical_range_rows:14 + index_back_rows:3 + output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -1534,7 +1534,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|3 |1128 | +|0 |TABLE FULL SCAN|t(idx)|1 |540 | ================================================= Outputs & filters: ------------------------------------- @@ -1578,17 +1578,17 @@ Optimization Info: ------------------------------------- t: table_rows:14 - physical_range_rows:35 - logical_range_rows:35 - index_back_rows:8 - output_rows:2 + physical_range_rows:14 + logical_range_rows:14 + index_back_rows:3 + output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -1598,7 +1598,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |999 | +|0 |TABLE FULL SCAN|t(idx)|1 |523 | ================================================= Outputs & filters: ------------------------------------- @@ -1640,17 +1640,17 @@ Optimization Info: ------------------------------------- t: table_rows:14 - physical_range_rows:31 - logical_range_rows:31 - index_back_rows:7 - output_rows:1 + physical_range_rows:14 + logical_range_rows:14 + index_back_rows:3 + output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -1660,7 +1660,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |1031 | +|0 |TABLE FULL SCAN|t(idx)|1 |527 | ================================================= Outputs & filters: ------------------------------------- @@ -1703,17 +1703,17 @@ Optimization Info: ------------------------------------- t: table_rows:14 - physical_range_rows:32 - logical_range_rows:32 - index_back_rows:8 - output_rows:2 + physical_range_rows:14 + logical_range_rows:14 + index_back_rows:3 + output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -1723,7 +1723,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |295 | +|0 |TABLE FULL SCAN|t(idx)|1 |519 | ================================================= Outputs & filters: ------------------------------------- @@ -1766,17 +1766,17 @@ Optimization Info: ------------------------------------- t: table_rows:14 - physical_range_rows:6 - logical_range_rows:6 - index_back_rows:1 + physical_range_rows:14 + logical_range_rows:14 + index_back_rows:3 output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -1786,7 +1786,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |999 | +|0 |TABLE FULL SCAN|t(idx)|1 |523 | ================================================= Outputs & filters: ------------------------------------- @@ -1828,17 +1828,17 @@ Optimization Info: ------------------------------------- t: table_rows:14 - physical_range_rows:31 - logical_range_rows:31 - index_back_rows:7 - output_rows:1 + physical_range_rows:14 + logical_range_rows:14 + index_back_rows:3 + output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -1960,7 +1960,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |999 | +|0 |TABLE FULL SCAN|t(idx)|1 |523 | ================================================= Outputs & filters: ------------------------------------- @@ -2002,17 +2002,17 @@ Optimization Info: ------------------------------------- t: table_rows:14 - physical_range_rows:31 - logical_range_rows:31 - index_back_rows:7 - output_rows:1 + physical_range_rows:14 + logical_range_rows:14 + index_back_rows:3 + output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2022,7 +2022,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |999 | +|0 |TABLE FULL SCAN|t(idx)|1 |523 | ================================================= Outputs & filters: ------------------------------------- @@ -2064,17 +2064,17 @@ Optimization Info: ------------------------------------- t: table_rows:14 - physical_range_rows:31 - logical_range_rows:31 - index_back_rows:7 - output_rows:1 + physical_range_rows:14 + logical_range_rows:14 + index_back_rows:3 + output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2084,7 +2084,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |999 | +|0 |TABLE FULL SCAN|t(idx)|1 |523 | ================================================= Outputs & filters: ------------------------------------- @@ -2126,17 +2126,17 @@ Optimization Info: ------------------------------------- t: table_rows:14 - physical_range_rows:31 - logical_range_rows:31 - index_back_rows:7 - output_rows:1 + physical_range_rows:14 + logical_range_rows:14 + index_back_rows:3 + output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2146,7 +2146,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|4 |1526 | +|0 |TABLE FULL SCAN|t(idx)|1 |531 | ================================================= Outputs & filters: ------------------------------------- @@ -2189,17 +2189,17 @@ Optimization Info: ------------------------------------- t: table_rows:14 - physical_range_rows:49 - logical_range_rows:49 - index_back_rows:12 - output_rows:3 + physical_range_rows:14 + logical_range_rows:14 + index_back_rows:3 + output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2209,7 +2209,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |999 | +|0 |TABLE FULL SCAN|t(idx)|1 |523 | ================================================= Outputs & filters: ------------------------------------- @@ -2251,17 +2251,17 @@ Optimization Info: ------------------------------------- t: table_rows:14 - physical_range_rows:31 - logical_range_rows:31 - index_back_rows:7 - output_rows:1 + physical_range_rows:14 + logical_range_rows:14 + index_back_rows:3 + output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2271,7 +2271,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |1031 | +|0 |TABLE FULL SCAN|t(idx)|1 |527 | ================================================= Outputs & filters: ------------------------------------- @@ -2314,17 +2314,17 @@ Optimization Info: ------------------------------------- t: table_rows:14 - physical_range_rows:32 - logical_range_rows:32 - index_back_rows:8 - output_rows:2 + physical_range_rows:14 + logical_range_rows:14 + index_back_rows:3 + output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2334,7 +2334,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|3 |1387 | +|0 |TABLE FULL SCAN|t(idx)|1 |519 | ================================================= Outputs & filters: ------------------------------------- @@ -2377,17 +2377,17 @@ Optimization Info: ------------------------------------- t: table_rows:14 - physical_range_rows:45 - logical_range_rows:45 - index_back_rows:11 - output_rows:2 + physical_range_rows:14 + logical_range_rows:14 + index_back_rows:3 + output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2397,7 +2397,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |999 | +|0 |TABLE FULL SCAN|t(idx)|1 |523 | ================================================= Outputs & filters: ------------------------------------- @@ -2439,17 +2439,17 @@ Optimization Info: ------------------------------------- t: table_rows:14 - physical_range_rows:31 - logical_range_rows:31 - index_back_rows:7 - output_rows:1 + physical_range_rows:14 + logical_range_rows:14 + index_back_rows:3 + output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2459,7 +2459,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |999 | +|0 |TABLE FULL SCAN|t(idx)|1 |523 | ================================================= Outputs & filters: ------------------------------------- @@ -2501,17 +2501,17 @@ Optimization Info: ------------------------------------- t: table_rows:14 - physical_range_rows:31 - logical_range_rows:31 - index_back_rows:7 - output_rows:1 + physical_range_rows:14 + logical_range_rows:14 + index_back_rows:3 + output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2521,7 +2521,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |999 | +|0 |TABLE FULL SCAN|t(idx)|1 |523 | ================================================= Outputs & filters: ------------------------------------- @@ -2563,17 +2563,17 @@ Optimization Info: ------------------------------------- t: table_rows:14 - physical_range_rows:31 - logical_range_rows:31 - index_back_rows:7 - output_rows:1 + physical_range_rows:14 + logical_range_rows:14 + index_back_rows:3 + output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2583,7 +2583,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |999 | +|0 |TABLE FULL SCAN|t(idx)|1 |523 | ================================================= Outputs & filters: ------------------------------------- @@ -2625,17 +2625,17 @@ Optimization Info: ------------------------------------- t: table_rows:14 - physical_range_rows:31 - logical_range_rows:31 - index_back_rows:7 - output_rows:1 + physical_range_rows:14 + logical_range_rows:14 + index_back_rows:3 + output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2645,7 +2645,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|3 |1128 | +|0 |TABLE FULL SCAN|t(idx)|1 |540 | ================================================= Outputs & filters: ------------------------------------- @@ -2689,17 +2689,17 @@ Optimization Info: ------------------------------------- t: table_rows:14 - physical_range_rows:35 - logical_range_rows:35 - index_back_rows:8 - output_rows:2 + physical_range_rows:14 + logical_range_rows:14 + index_back_rows:3 + output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2709,7 +2709,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |999 | +|0 |TABLE FULL SCAN|t(idx)|1 |523 | ================================================= Outputs & filters: ------------------------------------- @@ -2751,17 +2751,17 @@ Optimization Info: ------------------------------------- t: table_rows:14 - physical_range_rows:31 - logical_range_rows:31 - index_back_rows:7 - output_rows:1 + physical_range_rows:14 + logical_range_rows:14 + index_back_rows:3 + output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2771,7 +2771,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |1031 | +|0 |TABLE FULL SCAN|t(idx)|1 |527 | ================================================= Outputs & filters: ------------------------------------- @@ -2814,17 +2814,17 @@ Optimization Info: ------------------------------------- t: table_rows:14 - physical_range_rows:32 - logical_range_rows:32 - index_back_rows:8 - output_rows:2 + physical_range_rows:14 + logical_range_rows:14 + index_back_rows:3 + output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2834,7 +2834,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |295 | +|0 |TABLE FULL SCAN|t(idx)|1 |519 | ================================================= Outputs & filters: ------------------------------------- @@ -2877,17 +2877,17 @@ Optimization Info: ------------------------------------- t: table_rows:14 - physical_range_rows:6 - logical_range_rows:6 - index_back_rows:1 + physical_range_rows:14 + logical_range_rows:14 + index_back_rows:3 output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -2897,7 +2897,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|2 |999 | +|0 |TABLE FULL SCAN|t(idx)|1 |523 | ================================================= Outputs & filters: ------------------------------------- @@ -2939,17 +2939,17 @@ Optimization Info: ------------------------------------- t: table_rows:14 - physical_range_rows:31 - logical_range_rows:31 - index_back_rows:7 - output_rows:1 + physical_range_rows:14 + logical_range_rows:14 + index_back_rows:3 + output_rows:0 table_dop:1 dop_method:Table DOP avaiable_index_name:[idx, t] pruned_index_name:[t] stats version:0 - dynamic sampling level:0 - estimation method:[DEFAULT, STORAGE] + dynamic sampling level:1 + estimation method:[DYNAMIC SAMPLING BASIC] Plan Type: LOCAL Note: @@ -3201,7 +3201,7 @@ Query Plan ======================================================================================== |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ---------------------------------------------------------------------------------------- -|0 |TABLE FULL SCAN|spatial_point_in_line(index_spatial_line_geo)|1 |324 | +|0 |TABLE FULL SCAN|spatial_point_in_line(index_spatial_line_geo)|1 |279 | ======================================================================================== Outputs & filters: ------------------------------------- diff --git a/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_index2_mysql.result b/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_index2_mysql.result index f36c28fba3..732037e7eb 100644 --- a/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_index2_mysql.result +++ b/tools/deploy/mysql_test/test_suite/geometry/r/mysql/geometry_index2_mysql.result @@ -540,7 +540,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |478 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -565,7 +565,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |478 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -590,7 +590,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |478 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -615,7 +615,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |478 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -640,7 +640,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |478 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -665,7 +665,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |478 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -690,7 +690,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |478 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -715,7 +715,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |478 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -740,7 +740,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |304 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -765,7 +765,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |304 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -790,7 +790,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |304 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -815,7 +815,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |304 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -840,7 +840,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |478 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -865,7 +865,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |478 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -947,7 +947,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |218 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -972,7 +972,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |218 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -997,7 +997,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |218 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -1022,7 +1022,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |218 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -1047,7 +1047,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |218 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -1072,7 +1072,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |218 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -1097,7 +1097,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |218 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -1122,7 +1122,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |218 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -1147,7 +1147,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |218 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -1172,7 +1172,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |218 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -1197,7 +1197,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |218 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -1222,7 +1222,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |218 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -1247,7 +1247,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |159 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- @@ -1272,7 +1272,7 @@ Query Plan ================================================= |ID|OPERATOR |NAME |EST.ROWS|EST.TIME(us)| ------------------------------------------------- -|0 |TABLE FULL SCAN|t(idx)|1 |159 | +|0 |TABLE FULL SCAN|t(idx)|1 |299 | ================================================= Outputs & filters: ------------------------------------- diff --git a/tools/deploy/mysql_test/test_suite/information_schema/r/mysql/information_schema_desc.result b/tools/deploy/mysql_test/test_suite/information_schema/r/mysql/information_schema_desc.result index 5a8ba8356f..371cb3bec5 100644 --- a/tools/deploy/mysql_test/test_suite/information_schema/r/mysql/information_schema_desc.result +++ b/tools/deploy/mysql_test/test_suite/information_schema/r/mysql/information_schema_desc.result @@ -272,7 +272,7 @@ SESSION_VARIABLES CREATE TABLE `SESSION_VARIABLES` ( ) DEFAULT CHARSET = utf8mb4 COMPRESSION = 'none' REPLICA_NUM = 1 BLOCK_SIZE = 16384 USE_BLOOM_FILTER = FALSE TABLET_SIZE = 134217728 PCTFREE = 10 show create table statistics; View Create View character_set_client collation_connection -STATISTICS CREATE VIEW `STATISTICS` AS SELECT CAST('def' AS CHAR(512)) AS TABLE_CATALOG, V.TABLE_SCHEMA collate utf8mb4_name_case AS TABLE_SCHEMA, V.TABLE_NAME collate utf8mb4_name_case AS TABLE_NAME, CAST(V.NON_UNIQUE AS SIGNED) AS NON_UNIQUE, V.INDEX_SCHEMA collate utf8mb4_name_case AS INDEX_SCHEMA, V.INDEX_NAME collate utf8mb4_name_case AS INDEX_NAME, CAST(V.SEQ_IN_INDEX AS UNSIGNED) AS SEQ_IN_INDEX, V.COLUMN_NAME AS COLUMN_NAME, CAST('A' AS CHAR(1)) AS COLLATION, CAST(NULL AS SIGNED) AS CARDINALITY, CAST(V.SUB_PART AS SIGNED) AS SUB_PART, CAST(NULL AS CHAR(10)) AS PACKED, CAST(V.NULLABLE AS CHAR(3)) AS NULLABLE, CAST(V.INDEX_TYPE AS CHAR(16)) AS INDEX_TYPE, CAST(V.COMMENT AS CHAR(16)) AS COMMENT, CAST(V.INDEX_COMMENT AS CHAR(1024)) AS INDEX_COMMENT, CAST(V.IS_VISIBLE AS CHAR(3)) AS IS_VISIBLE, V.EXPRESSION AS EXPRESSION FROM (SELECT db.database_name AS TABLE_SCHEMA, t.table_name AS TABLE_NAME, CASE WHEN i.index_type IN (2,4,8) THEN 0 ELSE 1 END AS NON_UNIQUE, db.database_name AS INDEX_SCHEMA, substr(i.table_name, 7 + instr(substr(i.table_name, 7), '_')) AS INDEX_NAME, c.index_position AS SEQ_IN_INDEX, CASE WHEN d_col.column_name IS NOT NULL THEN d_col.column_name ELSE c.column_name END AS COLUMN_NAME, CASE WHEN d_col.column_name IS NOT NULL THEN c.data_length ELSE NULL END AS SUB_PART, CASE WHEN c.nullable = 1 THEN 'YES' ELSE '' END AS NULLABLE, CASE WHEN i.index_using_type = 0 THEN 'BTREE' ELSE (CASE WHEN i.index_using_type = 1 THEN 'HASH' ELSE 'UNKOWN' END)END AS INDEX_TYPE, CASE i.index_status WHEN 2 THEN 'VALID' WHEN 3 THEN 'CHECKING' WHEN 4 THEN 'INELEGIBLE' WHEN 5 THEN 'ERROR' ELSE 'UNUSABLE' END AS COMMENT, i.comment AS INDEX_COMMENT, CASE WHEN (i.index_attributes_set & 1) THEN 'NO' ELSE 'YES' END AS IS_VISIBLE, d_col2.cur_default_value_v2 AS EXPRESSION FROM oceanbase.__all_table i JOIN oceanbase.__all_table t ON i.data_table_id=t.table_id AND i.tenant_id = t.tenant_id AND i.database_id = t.database_id AND i.table_type = 5 AND i.table_mode >> 12 & 15 in (0,1) AND t.table_type in (0,3) JOIN oceanbase.__all_column c ON i.table_id=c.table_id AND i.tenant_id = c.tenant_id AND c.index_position > 0 JOIN oceanbase.__all_database db ON i.tenant_id = db.tenant_id AND i.database_id = db.database_id AND db.in_recyclebin = 0 AND db.database_name != '__recyclebin' LEFT JOIN oceanbase.__all_column d_col ON i.data_table_id = d_col.table_id AND i.tenant_id = d_col.tenant_id AND (case when (c.is_hidden = 1 and substr(c.column_name, 1, 8) = '__substr') then substr(c.column_name, 8 + instr(substr(c.column_name, 8), '_')) else 0 end) = d_col.column_id LEFT JOIN oceanbase.__all_column d_col2 ON i.data_table_id = d_col2.table_id AND i.tenant_id = d_col2.tenant_id AND c.column_id = d_col2.column_id AND d_col2.cur_default_value_v2 is not null AND d_col2.is_hidden = 1 AND (d_col2.column_flags & (0x1 << 0) = 1 or d_col2.column_flags & (0x1 << 1) = 1) AND substr(d_col2.column_name, 1, 6) = 'SYS_NC' UNION ALL SELECT db.database_name AS TABLE_SCHEMA, t.table_name AS TABLE_NAME, 0 AS NON_UNIQUE, db.database_name AS INDEX_SCHEMA, 'PRIMARY' AS INDEX_NAME, c.rowkey_position AS SEQ_IN_INDEX, c.column_name AS COLUMN_NAME, NULL AS SUB_PART, '' AS NULLABLE, CASE WHEN t.index_using_type = 0 THEN 'BTREE' ELSE ( CASE WHEN t.index_using_type = 1 THEN 'HASH' ELSE 'UNKOWN' END) END AS INDEX_TYPE, 'VALID' AS COMMENT, t.comment AS INDEX_COMMENT, 'YES' AS IS_VISIBLE, NULL AS EXPRESSION FROM oceanbase.__all_table t JOIN oceanbase.__all_column c ON t.table_id=c.table_id AND t.tenant_id = c.tenant_id AND c.rowkey_position > 0 AND c.is_hidden = 0 AND t.table_type in (0,3) JOIN oceanbase.__all_database db ON t.tenant_id = db.tenant_id AND t.database_id = db.database_id AND db.in_recyclebin = 0 AND db.database_name != '__recyclebin' UNION ALL SELECT db.database_name AS TABLE_SCHEMA, t.table_name AS TABLE_NAME, CASE WHEN i.index_type IN (2,4,8) THEN 0 ELSE 1 END AS NON_UNIQUE, db.database_name AS INDEX_SCHEMA, substr(i.table_name, 7 + instr(substr(i.table_name, 7), '_')) AS INDEX_NAME, c.index_position AS SEQ_IN_INDEX, CASE WHEN d_col.column_name IS NOT NULL THEN d_col.column_name ELSE c.column_name END AS COLUMN_NAME, CASE WHEN d_col.column_name IS NOT NULL THEN c.data_length ELSE NULL END AS SUB_PART, CASE WHEN c.nullable = 1 THEN 'YES' ELSE '' END AS NULLABLE, CASE WHEN i.index_using_type = 0 THEN 'BTREE' ELSE (CASE WHEN i.index_using_type = 1 THEN 'HASH' ELSE 'UNKOWN' END)END AS INDEX_TYPE, CASE i.index_status WHEN 2 THEN 'VALID' WHEN 3 THEN 'CHECKING' WHEN 4 THEN 'INELEGIBLE' WHEN 5 THEN 'ERROR' ELSE 'UNUSABLE' END AS COMMENT, i.comment AS INDEX_COMMENT, CASE WHEN (i.index_attributes_set & 1) THEN 'NO' ELSE 'YES' END AS IS_VISIBLE, d_col2.cur_default_value_v2 AS EXPRESSION FROM oceanbase.__ALL_VIRTUAL_CORE_ALL_TABLE i JOIN oceanbase.__ALL_VIRTUAL_CORE_ALL_TABLE t ON i.data_table_id=t.table_id AND i.tenant_id = t.tenant_id AND i.database_id = t.database_id AND i.table_type = 5 AND t.table_type in (0,3) AND t.tenant_id = EFFECTIVE_TENANT_ID() JOIN oceanbase.__ALL_VIRTUAL_CORE_COLUMN_TABLE c ON i.table_id=c.table_id AND i.tenant_id = c.tenant_id AND c.index_position > 0 JOIN oceanbase.__all_database db ON i.database_id = db.database_id LEFT JOIN oceanbase.__ALL_VIRTUAL_CORE_COLUMN_TABLE d_col ON i.data_table_id = d_col.table_id AND i.tenant_id = d_col.tenant_id AND (case when (c.is_hidden = 1 and substr(c.column_name, 1, 8) = '__substr') then substr(c.column_name, 8 + instr(substr(c.column_name, 8), '_')) else 0 end) = d_col.column_id LEFT JOIN oceanbase.__ALL_VIRTUAL_CORE_COLUMN_TABLE d_col2 ON i.data_table_id = d_col2.table_id AND i.tenant_id = d_col2.tenant_id AND c.column_id = d_col2.column_id AND d_col2.cur_default_value_v2 is not null AND d_col2.is_hidden = 1 AND (d_col2.column_flags & (0x1 << 0) = 1 or d_col2.column_flags & (0x1 << 1) = 1) AND substr(d_col2.column_name, 1, 6) = 'SYS_NC' UNION ALL SELECT db.database_name AS TABLE_SCHEMA, t.table_name AS TABLE_NAME, 0 AS NON_UNIQUE, db.database_name AS INDEX_SCHEMA, 'PRIMARY' AS INDEX_NAME, c.rowkey_position AS SEQ_IN_INDEX, c.column_name AS COLUMN_NAME, NULL AS SUB_PART, '' AS NULLABLE, CASE WHEN t.index_using_type = 0 THEN 'BTREE' ELSE ( CASE WHEN t.index_using_type = 1 THEN 'HASH' ELSE 'UNKOWN' END) END AS INDEX_TYPE, 'VALID' AS COMMENT, t.comment AS INDEX_COMMENT, 'YES' AS IS_VISIBLE, NULL AS EXPRESSION FROM oceanbase.__ALL_VIRTUAL_CORE_ALL_TABLE t JOIN oceanbase.__ALL_VIRTUAL_CORE_COLUMN_TABLE c ON t.table_id=c.table_id AND t.tenant_id = c.tenant_id AND t.tenant_id = EFFECTIVE_TENANT_ID() AND c.rowkey_position > 0 AND c.is_hidden = 0 AND t.table_type in (0,3) JOIN oceanbase.__all_database db ON t.database_id = db.database_id)V utf8mb4 utf8mb4_general_ci +STATISTICS CREATE VIEW `STATISTICS` AS SELECT CAST('def' AS CHAR(512)) AS TABLE_CATALOG, V.TABLE_SCHEMA collate utf8mb4_name_case AS TABLE_SCHEMA, V.TABLE_NAME collate utf8mb4_name_case AS TABLE_NAME, CAST(V.NON_UNIQUE AS SIGNED) AS NON_UNIQUE, V.INDEX_SCHEMA collate utf8mb4_name_case AS INDEX_SCHEMA, V.INDEX_NAME collate utf8mb4_name_case AS INDEX_NAME, CAST(V.SEQ_IN_INDEX AS UNSIGNED) AS SEQ_IN_INDEX, V.COLUMN_NAME AS COLUMN_NAME, CAST('A' AS CHAR(1)) AS COLLATION, CAST(NULL AS SIGNED) AS CARDINALITY, CAST(V.SUB_PART AS SIGNED) AS SUB_PART, CAST(NULL AS CHAR(10)) AS PACKED, CAST(V.NULLABLE AS CHAR(3)) AS NULLABLE, CAST(V.INDEX_TYPE AS CHAR(16)) AS INDEX_TYPE, CAST(V.COMMENT AS CHAR(16)) AS COMMENT, CAST(V.INDEX_COMMENT AS CHAR(1024)) AS INDEX_COMMENT, CAST(V.IS_VISIBLE AS CHAR(3)) AS IS_VISIBLE, V.EXPRESSION AS EXPRESSION FROM (SELECT db.database_name AS TABLE_SCHEMA, t.table_name AS TABLE_NAME, CASE WHEN i.index_type IN (2,4,8) THEN 0 ELSE 1 END AS NON_UNIQUE, db.database_name AS INDEX_SCHEMA, substr(i.table_name, 7 + instr(substr(i.table_name, 7), '_')) AS INDEX_NAME, c.index_position AS SEQ_IN_INDEX, CASE WHEN d_col.column_name IS NOT NULL THEN d_col.column_name ELSE c.column_name END AS COLUMN_NAME, CASE WHEN d_col.column_name IS NOT NULL THEN c.data_length ELSE NULL END AS SUB_PART, CASE WHEN c.nullable = 1 THEN 'YES' ELSE '' END AS NULLABLE, CASE WHEN i.index_type in (15, 18, 21) THEN 'FULLTEXT' WHEN i.index_using_type = 0 THEN 'BTREE' WHEN i.index_using_type = 1 THEN 'HASH' ELSE 'UNKOWN' END AS INDEX_TYPE, CASE i.index_status WHEN 2 THEN 'VALID' WHEN 3 THEN 'CHECKING' WHEN 4 THEN 'INELEGIBLE' WHEN 5 THEN 'ERROR' ELSE 'UNUSABLE' END AS COMMENT, i.comment AS INDEX_COMMENT, CASE WHEN (i.index_attributes_set & 1) THEN 'NO' ELSE 'YES' END AS IS_VISIBLE, d_col2.cur_default_value_v2 AS EXPRESSION FROM oceanbase.__all_table i JOIN oceanbase.__all_table t ON i.data_table_id=t.table_id AND i.tenant_id = t.tenant_id AND i.database_id = t.database_id AND i.table_type = 5 AND i.index_type NOT IN (13, 14, 16, 17, 19, 20, 22) AND i.table_mode >> 12 & 15 in (0,1) AND t.table_type in (0,3) JOIN oceanbase.__all_column c ON i.table_id=c.table_id AND i.tenant_id = c.tenant_id AND c.index_position > 0 JOIN oceanbase.__all_database db ON i.tenant_id = db.tenant_id AND i.database_id = db.database_id AND db.in_recyclebin = 0 AND db.database_name != '__recyclebin' LEFT JOIN oceanbase.__all_column d_col ON i.data_table_id = d_col.table_id AND i.tenant_id = d_col.tenant_id AND (case when (c.is_hidden = 1 and substr(c.column_name, 1, 8) = '__substr') then substr(c.column_name, 8 + instr(substr(c.column_name, 8), '_')) else 0 end) = d_col.column_id LEFT JOIN oceanbase.__all_column d_col2 ON i.data_table_id = d_col2.table_id AND i.tenant_id = d_col2.tenant_id AND c.column_id = d_col2.column_id AND d_col2.cur_default_value_v2 is not null AND d_col2.is_hidden = 1 AND (d_col2.column_flags & (0x1 << 0) = 1 or d_col2.column_flags & (0x1 << 1) = 1) AND substr(d_col2.column_name, 1, 6) = 'SYS_NC' UNION ALL SELECT db.database_name AS TABLE_SCHEMA, t.table_name AS TABLE_NAME, 0 AS NON_UNIQUE, db.database_name AS INDEX_SCHEMA, 'PRIMARY' AS INDEX_NAME, c.rowkey_position AS SEQ_IN_INDEX, c.column_name AS COLUMN_NAME, NULL AS SUB_PART, '' AS NULLABLE, CASE WHEN t.index_using_type = 0 THEN 'BTREE' ELSE ( CASE WHEN t.index_using_type = 1 THEN 'HASH' ELSE 'UNKOWN' END) END AS INDEX_TYPE, 'VALID' AS COMMENT, t.comment AS INDEX_COMMENT, 'YES' AS IS_VISIBLE, NULL AS EXPRESSION FROM oceanbase.__all_table t JOIN oceanbase.__all_column c ON t.table_id=c.table_id AND t.tenant_id = c.tenant_id AND c.rowkey_position > 0 AND c.is_hidden = 0 AND t.table_type in (0,3) JOIN oceanbase.__all_database db ON t.tenant_id = db.tenant_id AND t.database_id = db.database_id AND db.in_recyclebin = 0 AND db.database_name != '__recyclebin' UNION ALL SELECT db.database_name AS TABLE_SCHEMA, t.table_name AS TABLE_NAME, CASE WHEN i.index_type IN (2,4,8) THEN 0 ELSE 1 END AS NON_UNIQUE, db.database_name AS INDEX_SCHEMA, substr(i.table_name, 7 + instr(substr(i.table_name, 7), '_')) AS INDEX_NAME, c.index_position AS SEQ_IN_INDEX, CASE WHEN d_col.column_name IS NOT NULL THEN d_col.column_name ELSE c.column_name END AS COLUMN_NAME, CASE WHEN d_col.column_name IS NOT NULL THEN c.data_length ELSE NULL END AS SUB_PART, CASE WHEN c.nullable = 1 THEN 'YES' ELSE '' END AS NULLABLE, CASE WHEN i.index_type in (15, 18, 21) THEN 'FULLTEXT' WHEN i.index_using_type = 0 THEN 'BTREE' WHEN i.index_using_type = 1 THEN 'HASH' ELSE 'UNKOWN' END AS INDEX_TYPE, CASE i.index_status WHEN 2 THEN 'VALID' WHEN 3 THEN 'CHECKING' WHEN 4 THEN 'INELEGIBLE' WHEN 5 THEN 'ERROR' ELSE 'UNUSABLE' END AS COMMENT, i.comment AS INDEX_COMMENT, CASE WHEN (i.index_attributes_set & 1) THEN 'NO' ELSE 'YES' END AS IS_VISIBLE, d_col2.cur_default_value_v2 AS EXPRESSION FROM oceanbase.__ALL_VIRTUAL_CORE_ALL_TABLE i JOIN oceanbase.__ALL_VIRTUAL_CORE_ALL_TABLE t ON i.data_table_id=t.table_id AND i.tenant_id = t.tenant_id AND i.database_id = t.database_id AND i.table_type = 5 AND i.index_type NOT IN (13, 14, 16, 17, 19, 20, 22) AND t.table_type in (0,3) AND t.tenant_id = EFFECTIVE_TENANT_ID() JOIN oceanbase.__ALL_VIRTUAL_CORE_COLUMN_TABLE c ON i.table_id=c.table_id AND i.tenant_id = c.tenant_id AND c.index_position > 0 JOIN oceanbase.__all_database db ON i.database_id = db.database_id LEFT JOIN oceanbase.__ALL_VIRTUAL_CORE_COLUMN_TABLE d_col ON i.data_table_id = d_col.table_id AND i.tenant_id = d_col.tenant_id AND (case when (c.is_hidden = 1 and substr(c.column_name, 1, 8) = '__substr') then substr(c.column_name, 8 + instr(substr(c.column_name, 8), '_')) else 0 end) = d_col.column_id LEFT JOIN oceanbase.__ALL_VIRTUAL_CORE_COLUMN_TABLE d_col2 ON i.data_table_id = d_col2.table_id AND i.tenant_id = d_col2.tenant_id AND c.column_id = d_col2.column_id AND d_col2.cur_default_value_v2 is not null AND d_col2.is_hidden = 1 AND (d_col2.column_flags & (0x1 << 0) = 1 or d_col2.column_flags & (0x1 << 1) = 1) AND substr(d_col2.column_name, 1, 6) = 'SYS_NC' UNION ALL SELECT db.database_name AS TABLE_SCHEMA, t.table_name AS TABLE_NAME, 0 AS NON_UNIQUE, db.database_name AS INDEX_SCHEMA, 'PRIMARY' AS INDEX_NAME, c.rowkey_position AS SEQ_IN_INDEX, c.column_name AS COLUMN_NAME, NULL AS SUB_PART, '' AS NULLABLE, CASE WHEN t.index_using_type = 0 THEN 'BTREE' ELSE ( CASE WHEN t.index_using_type = 1 THEN 'HASH' ELSE 'UNKOWN' END) END AS INDEX_TYPE, 'VALID' AS COMMENT, t.comment AS INDEX_COMMENT, 'YES' AS IS_VISIBLE, NULL AS EXPRESSION FROM oceanbase.__ALL_VIRTUAL_CORE_ALL_TABLE t JOIN oceanbase.__ALL_VIRTUAL_CORE_COLUMN_TABLE c ON t.table_id=c.table_id AND t.tenant_id = c.tenant_id AND t.tenant_id = EFFECTIVE_TENANT_ID() AND c.rowkey_position > 0 AND c.is_hidden = 0 AND t.table_type in (0,3) JOIN oceanbase.__all_database db ON t.database_id = db.database_id)V utf8mb4 utf8mb4_general_ci show create table table_constraints; View Create View character_set_client collation_connection TABLE_CONSTRAINTS CREATE VIEW `TABLE_CONSTRAINTS` AS SELECT CAST('def' AS CHAR(64)) AS CONSTRAINT_CATALOG, CAST(d.database_name AS CHAR(128)) collate utf8mb4_name_case AS CONSTRAINT_SCHEMA, CAST('PRIMARY' AS CHAR(256)) AS CONSTRAINT_NAME, CAST(d.database_name AS CHAR(128)) collate utf8mb4_name_case AS TABLE_SCHEMA, CAST(t.table_name AS CHAR(256)) collate utf8mb4_name_case AS TABLE_NAME, CAST('PRIMARY KEY' AS CHAR(11)) AS CONSTRAINT_TYPE, CAST('YES' AS CHAR(3)) AS ENFORCED FROM oceanbase.__all_database d JOIN oceanbase.__all_table t ON d.database_id = t.database_id WHERE (d.database_id = 201003 OR d.database_id > 500000) AND d.in_recyclebin = 0 AND t.table_type = 3 AND t.table_mode >> 16 & 1 = 0 AND t.table_mode >> 12 & 15 in (0,1) union all SELECT CAST('def' AS CHAR(64)) AS CONSTRAINT_CATALOG, CAST(d.database_name AS CHAR(128)) collate utf8mb4_name_case AS CONSTRAINT_SCHEMA, CAST(SUBSTR(it.table_name, 7 + INSTR(SUBSTR(it.table_name, 7), '_')) AS CHAR(256)) AS CONSTRAINT_NAME, CAST(d.database_name AS CHAR(128)) collate utf8mb4_name_case AS TABLE_SCHEMA, CAST(ut.table_name AS CHAR(256)) collate utf8mb4_name_case AS TABLE_NAME, CAST('UNIQUE' AS CHAR(11)) AS CONSTRAINT_TYPE, CAST('YES' AS CHAR(3)) AS ENFORCED FROM oceanbase.__all_database d JOIN oceanbase.__all_table it ON d.database_id = it.database_id JOIN oceanbase.__all_table ut ON it.data_table_id = ut.table_id WHERE d.database_id > 500000 AND d.in_recyclebin = 0 AND it.table_type = 5 AND it.index_type IN (2, 4, 8) union all SELECT CAST('def' AS CHAR(64)) AS CONSTRAINT_CATALOG, CAST(d.database_name AS CHAR(128)) collate utf8mb4_name_case AS CONSTRAINT_SCHEMA, CAST(c.constraint_name AS CHAR(256)) AS CONSTRAINT_NAME, CAST(d.database_name AS CHAR(128)) collate utf8mb4_name_case AS TABLE_SCHEMA, CAST(t.table_name AS CHAR(256)) collate utf8mb4_name_case AS TABLE_NAME, CAST('CHECK' AS CHAR(11)) AS CONSTRAINT_TYPE, CAST(CASE WHEN c.enable_flag = 1 THEN 'YES' ELSE 'NO' END AS CHAR(3)) AS ENFORCED FROM oceanbase.__all_database d JOIN oceanbase.__all_table t ON d.database_id = t.database_id JOIN oceanbase.__all_constraint c ON t.table_id = c.table_id WHERE d.database_id > 500000 AND d.in_recyclebin = 0 AND t.table_type = 3 AND c.constraint_type = 3 union all SELECT CAST('def' AS CHAR(64)) AS CONSTRAINT_CATALOG, CAST(f.constraint_schema AS CHAR(128)) collate utf8mb4_name_case AS CONSTRAINT_SCHEMA, CAST(f.constraint_name AS CHAR(256)) AS CONSTRAINT_NAME, CAST(f.constraint_schema AS CHAR(128)) collate utf8mb4_name_case AS TABLE_SCHEMA, CAST(f.table_name AS CHAR(256)) collate utf8mb4_name_case AS TABLE_NAME, CAST('FOREIGN KEY' AS CHAR(11)) AS CONSTRAINT_TYPE, CAST('YES' AS CHAR(3)) AS ENFORCED FROM information_schema.REFERENTIAL_CONSTRAINTS f utf8mb4 utf8mb4_general_ci diff --git a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_sys_parameter_stat.result b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_sys_parameter_stat.result index fed39f5008..f23c528e29 100644 --- a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_sys_parameter_stat.result +++ b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_sys_parameter_stat.result @@ -282,6 +282,7 @@ _delay_resource_recycle_after_correctness_issue _enable_active_txn_transfer _enable_adaptive_compaction _enable_adaptive_merge_schedule +_enable_add_fulltext_index_to_existing_table _enable_backtrace_function _enable_balance_kill_transaction _enable_block_file_punch_hole diff --git a/tools/ob_admin/dumpsst/ob_admin_dumpsst_executor.cpp b/tools/ob_admin/dumpsst/ob_admin_dumpsst_executor.cpp index 19dde33cc7..5b54961aaf 100644 --- a/tools/ob_admin/dumpsst/ob_admin_dumpsst_executor.cpp +++ b/tools/ob_admin/dumpsst/ob_admin_dumpsst_executor.cpp @@ -345,7 +345,7 @@ int ObAdminDumpsstExecutor::dump_macro_block(const ObDumpMacroBlockContext ¯ } macro_handle.reset(); - STORAGE_LOG(INFO, "finish dump macro block", K(macro_block_context)); + STORAGE_LOG(INFO, "finish dump macro block", K(common_header), K(macro_block_context)); return ret; } diff --git a/unittest/share/CMakeLists.txt b/unittest/share/CMakeLists.txt index c51946fd72..b66b070ac8 100644 --- a/unittest/share/CMakeLists.txt +++ b/unittest/share/CMakeLists.txt @@ -76,6 +76,8 @@ ob_unittest(test_json_path) ob_unittest(test_json_schema) ob_unittest(test_json_tree) +ob_unittest(test_text_analyzer text_analysis/test_text_analyzer.cpp) + if(OB_BUILD_CLOSE_MODULES) ob_unittest(test_xml_bin) ob_unittest(test_xml_parser) diff --git a/unittest/share/test_json_tree.cpp b/unittest/share/test_json_tree.cpp index 487448c130..303ab5ac3d 100644 --- a/unittest/share/test_json_tree.cpp +++ b/unittest/share/test_json_tree.cpp @@ -2305,6 +2305,43 @@ TEST_F(TestJsonTree, test_big_json) } +TEST_F(TestJsonTree, test_parse_big_json) +{ + common::ObArenaAllocator allocator(ObModIds::TEST); + ObJsonBuffer j_buf(&allocator); + ASSERT_EQ(j_buf.reserve(1024 * 1024), 0); + + const char* j_string = "{\"nested_level0\": {\"key_0\":\"rFHOjQsWNRJ1fnbGP8dqmEYXk9ZpIw\", \"nested_level1\": {\"key_0\": false, \"key_1\": {\"key1\": [null, null], \"key2\": [{\"key1\": null, \"key2\": null, \"key3\": [{\"key1\": null, \"key2\": 21}, null]}, {\"key1\": {\"key1\": \"GwbmUpJ6RMigaVrfXqAWsQYbus409K\", \"key2\": [[{\"key1\": null}]], \"key3\": \"kuaI4mKt2W9bLAZ0R8zhH5yioGBN3V\"}, \"key2\": {\"key1\": {\"key1\": null}, \"key2\": null, \"key3\": [null, \"vID3PZfurRUT5LMFkG8QtOoWpK1JhH\", {\"key1\": \"KXFkMT35aDyGEj4mSxAU8YLqw9VspO\"}]}}], \"key3\": \"UoLf0FwEsQnGVeKy4SMdkqmslctDiY\"}, \"key_2\": {\"key1\": false, \"key2\": [null], \"key3\": \"GYxPF1X5kuby0SEwBO6aoWTss92McU\"}, \"key_3\": \"Xd01h4umROkcqHw23s8jCAyGpxD9MY\", \"key_4\": true, \"nested_level2\": {\"key_0\": null, \"key_1\": false, \"key_2\": 59, \"key_3\": [{\"key1\": 17, \"key2\": 54}], \"key_4\": {\"key1\": {\"key1\": null, \"key2\": 4, \"key3\": 82}, \"key2\": [\"el16BGbz2TESkOdJ7Zf8hW4Kw0RoV5\"], \"key3\": 19}, \"nested_level3\": {\"key_0\": [null], \"key_1\": {\"key1\": \"idKvulYwSAORrILPJWyMZXzq4tbU31\", \"key2\": [null, false, {\"key1\": {\"key1\": \"UvbyeVAJZuLwdPkqQ409Rss1XtaIN2\"}, \"key2\": \"G2RSg5jhwnXZOuPT17ovErQUtpbx43\"}]}, \"key_2\": \"MFvr5t8WHJXuBK7LVn6OfTbEkxYS2z\", \"key_3\": null, \"key_4\": false, \"nested_level4\": {\"key_0\": {\"key1\": 92, \"key2\": 85, \"key3\": \"rVXPL9us6CTuhwkc0DNHmGy1fWdJnU\"}, \"key_1\": {\"key1\": {\"key1\": {\"key1\": null, \"key2\": \"VE7s2KA5lvIT6t0Zhc4gx3UXRauquj\"}, \"key2\": 75, \"key3\": {\"key1\": {\"key1\": null, \"key2\": {\"key1\": 100, \"key2\": null}, \"key3\": null}, \"key2\": \"ngfA0JpjzRLGlxdIVhsmkawZTsy8XY\", \"key3\": [\"HFk4PXzxRGNMof7wgAWsJervbnyO5j\", \"v3xmXpZ6Ag0cKSs8ua19JbGEQd5Mit\"]}}, \"key2\": [{\"key1\": true, \"key2\": \"l9qfbRybMXaFkOWAPGJumwv3cUI6Y8\", \"key3\": 89}], \"key3\": true}, \"key_2\": [{\"key1\": {\"key1\": 26, \"key2\": [[[[{\"key1\": {\"key1\": null, \"key2\": null}}, null, false]], 11], {\"key1\": [false, \"8AR2jptqZEYMWDsr65PxfbVozQC3Xn\"]}, false], \"key3\": null}, \"key2\": true, \"key3\": true}, \"MaLcwBsy0W1nRtQbNFoi92CkvuK3md\"], \"nested_level5\": {\"key_0\": \"gxhDrz6nR2FZOUIyAjuuqt749XNMHs\", \"key_1\": true, \"nested_level6\": {\"key_0\": {\"key1\": [true], \"key2\": \"VgZsqym3j65eLJw9Crxf8WBIhRpbiT\"}, \"key_1\": [\"mVbEYeovLnj2ktWRXIusxZcHPb9rFB\"], \"key_2\": \"lXuouzj4svAx5Q6qBMyFerZEOV8YUp\", \"key_3\": null, \"nested_level7\": {\"key_0\": [true], \"key_1\": false, \"key_2\": [\"Oo93mYLJR1MHKZ7DbFVjbPaCh05lxf\"], \"nested_level8\": {\"key_0\": 72, \"key_1\": {\"key1\": \"aJzG0c5uR6wMhBpEPZKyNrlxDSI9jk\", \"key2\": [null, [[{\"key1\": 100}, {\"key1\": {\"key1\": [{\"key1\": 68, \"key2\": 34, \"key3\": {\"key1\": \"dX57msTtWEKZw0Pvi9zUaApsuRjS3e\"}}, true, null]}, \"key2\": null, \"key3\": [null, 39, \"8Wdcbl0wNkK4DBCqsJ7oajHI3V6Qus\"]}], {\"key1\": null, \"key2\": \"D0rIQwbL1PqEj8OypscdR3fNuVFslh\"}], [\"iS6BDbbXqnmTAJsLCs2v7y9aoPeuYU\"]]}, \"key_2\": [6, {\"key1\": 33, \"key2\": [\"ESoDuF8srvJqlkCu6tLWh52PBXyZpV\", \"m7NXHuhTjo8bigLWwnd01ZbOQVMzSv\"]}, \"9zPJN74yxCipGF6e2RqDWdtHB1bnKT\"], \"nested_level9\": {\"key_0\": {\"key1\": null, \"key2\": {\"key1\": [null, \"sFuZ57K4hUmTbJnYeoGVsquXSdIa8L\"]}, \"key3\": {\"key1\": false, \"key2\": 83, \"key3\": false}}, \"key_1\": false, \"nested_level10\": {\"key_0\": null, \"key_1\": true, \"key_2\": {\"key1\": null, \"key2\": null, \"key3\": false}, \"key_3\": {\"key1\": true}, \"key_4\": null, \"nested_level11\": {\"key_0\": [{\"key1\": {\"key1\": [{\"key1\": null}], \"key2\": true}}, [65]], \"key_1\": false, \"key_2\": \"34Esmiq7vl8Jy6TsnpfrWBkMtLZCcF\", \"key_3\": null, \"nested_level12\": {\"key_0\": \"l87CXV3fdbKPAJyvLZchr2uNezopOR\", \"key_1\": 7, \"nested_level13\": {\"key_0\": true, \"key_1\": null, \"key_2\": \"uqr9EYIc36kjWdPKXpOw2lf4MZmJSB\", \"nested_level14\": {\"key_0\": 78, \"key_1\": {\"key1\": false}, \"key_2\": null, \"nested_level15\": {\"key_0\": 79, \"key_1\": [null, {\"key1\": null, \"key2\": null, \"key3\": null}, \"6vRfNuGgiV02WL4Z5b1uyBdFo7HsJa\"], \"key_2\": null, \"nested_level16\": {\"key_0\": {\"key1\": null}, \"key_1\": null, \"key_2\": false, \"key_3\": false, \"nested_level17\": {\"key_0\": false, \"nested_level18\": {\"key_0\": null, \"key_1\": [null], \"key_2\": \"5fnC7lsItcugeZmuszXa1iQ6P32yOV\", \"nested_level19\": {\"key_0\": false, \"key_1\": 32, \"key_2\": {\"key1\": \"QIG7vZ9EhHR1uPr8ojebaTcOSYC3xK\", \"key2\": 31, \"key3\": 85}, \"key_3\": 76, \"nested_level20\": {\"key_0\": true, \"key_1\": [null], \"key_2\": {\"key1\": null, \"key2\": {\"key1\": false, \"key2\": false}}, \"key_3\": \"NGneWkhSHCY6yMl0xEBgtjs8aOIufi\", \"key_4\": true, \"nested_level21\": {\"key_0\": null, \"key_1\": {\"key1\": {\"key1\": {\"key1\": false, \"key2\": {\"key1\": 70, \"key2\": \"SbuP0nAVFimGDo9ONLqZQJbRt5kY3x\", \"key3\": null}}}, \"key2\": true, \"key3\": null}, \"key_2\": \"V9j3GruP6iYLAw7SFhEsymW2C1vQbB\", \"key_3\": true, \"nested_level22\": {\"key_0\": false, \"key_1\": [11, null, 40], \"key_2\": 32, \"nested_level23\": {\"key_0\": 43, \"key_1\": null, \"nested_level24\": {\"key_0\": [43, null, null], \"key_1\": 65, \"key_2\": [null, {\"key1\": null}], \"key_3\": null, \"nested_level25\": {\"key_0\": [{\"key1\": {\"key1\": [61], \"key2\": {\"key1\": [true], \"key2\": [null, 8]}}}, false], \"key_1\": {\"key1\": 45, \"key2\": {\"key1\": \"Y0fHj3yOuMFsti5neU6mkJXNrZguWb\", \"key2\": \"SzMi1P9Es0Dl8xAf73CqdkjIpKowuF\"}}, \"key_2\": 70, \"key_3\": [{\"key1\": \"csI9w0HunOjW7e32zvXgAMxo1thaGr\", \"key2\": null}, \"kByJSs7hUzqW6lp5iFuAP98rsctwVe\", {\"key1\": \"b9JY5BloueFMShLRgI6zGaZEfdtOpr\"}], \"key_4\": \"jSmlXdyoOGraMeVnNRT2sYzPqikBU6\", \"nested_level26\": {\"key_0\": [10], \"key_1\": 83, \"nested_level27\": {\"key_0\": false, \"key_1\": null, \"key_2\": 48, \"key_3\": null, \"key_4\": {\"key1\": false, \"key2\": null}, \"nested_level28\": {\"key_0\": null, \"key_1\": [\"aoMdfwsI2VB1jT9txYspDbzlGiq0Jg\"], \"key_2\": {\"key1\": null, \"key2\": \"1eb3XdFrbs7TJlhNsf2O5uAH8Qau0Y\", \"key3\": [\"KS0mbf4pqGjz9dLNAb5iolMkYJVuDs\", 53]}, \"key_3\": \"gRGcAbpKt6fPhuEOIbz7L5dweT2FuD\", \"nested_level29\": {\"key_0\": false, \"key_1\": null, \"key_2\": [true, 82], \"key_3\": \"sn2dwUzkPCbRS7fBWM8jGVtTa1iLe3\", \"key_4\": null, \"nested_level30\": {\"key_0\": 15, \"key_1\": {\"key1\": {\"key1\": {\"key1\": \"c6fJXenbCsgijM9WPpqATxEQhzYwI2\"}, \"key2\": 51, \"key3\": [true]}, \"key2\": \"wEqZtAk6XODMBGCTflgzj20FhuJ5NQ\", \"key3\": [true, {\"key1\": [33, {\"key1\": [52, \"gOCESk4bU1w7Z8KXhB6pedFzTsJMY3\", null]}, null]}]}, \"key_2\": \"oPEVR32FhwbUrYLO60bsgti7ljvDpc\", \"key_3\": [11], \"nested_level31\": {\"key_0\": 26, \"key_1\": false, \"key_2\": false, \"key_3\": [{\"key1\": 78, \"key2\": true, \"key3\": null}, false, [null, false, {\"key1\": {\"key1\": false}, \"key2\": {\"key1\": null, \"key2\": \"HqFrO72eoj1PQpbWRJt0Vy3hd4Uu5z\", \"key3\": false}}]], \"key_4\": 64, \"nested_level32\": {\"key_0\": null, \"key_1\": null, \"key_2\": {\"key1\": 7, \"key2\": \"KcUWAgEe1BiqZSXV2m8GdFahzPQbLM\"}, \"key_3\": \"UyDit9MGsXjrgRkluSQFBE2q56OAbu\", \"key_4\": 87, \"nested_level33\": {\"key_0\": {\"key1\": {\"key1\": {\"key1\": [\"IVKQWg7UsMo4kwGuehnZOizX5LxRNT\", \"skVtwZqWQrND3bdUxK5u8E1jmnhif0\"], \"key2\": false}}, \"key2\": [null]}, \"key_1\": [39, 45, true], \"nested_level34\": {\"key_0\": \"qF7ubaXmKAsJIijw0dUteTxN915DPz\", \"key_1\": 4, \"key_2\": \"qydnVaRsxw0kzmotYeUWJBT93g4Qsb\", \"key_3\": [\"p5Ts8YFGOj1yDvQUEnwctu6LhmeJK3\"], \"key_4\": [null], \"nested_level35\": {\"key_0\": [[{\"key1\": [\"9DVXfgU2loPiIqQjpvM3EROGtWshBd\", true], \"key2\": 21, \"key3\": 99}, {\"key1\": null}, {\"key1\": false, \"key2\": 92}], [[\"ixsu6CM5RZHBnjGe2oackwd8Jl43tq\"], 3], {\"key1\": false, \"key2\": {\"key1\": \"glr7Q4yVtpZC0X5oAvWjzTu6YuSKaM\", \"key2\": 98, \"key3\": [\"wxNqHTzES9WpyRdbhZtinODQA3FJlm\", 3, [\"ui5EOZeRkxYnjTr1lwVy8pcDobhzqS\"]]}, \"key3\": \"elG4iECrDUHBb6VvWKkds9hpM8cP17\"}], \"key_1\": 25, \"nested_level36\": {\"key_0\": \"JsRb0ytMH3oeh4gz97QBiVndsOYkIA\", \"key_1\": {\"key1\": \"jNiksS0R2vfKMoLhdO1gb8qyanAJc9\", \"key2\": \"V9wALy8oTBEMiUbFJfNtahz64rblC3\"}, \"key_2\": \"z5k0sWHTciSl6qyrCVm3ZJ9gYoba82\", \"key_3\": false, \"nested_level37\": {\"key_0\": \"YOn4jmScFweCxuksRATZb5GaNhypz1\", \"key_1\": null, \"key_2\": [{\"key1\": [\"k7Ez4jObJ1PVAGCbv25psiLlyhQwI3\"], \"key2\": {\"key1\": true}}], \"key_3\": {\"key1\": \"6vubTJsGDXmaWe2OcwhQHsVKtCuLjr\", \"key2\": null}, \"nested_level38\": {\"key_0\": [null], \"key_1\": false, \"key_2\": {\"key1\": [null, true], \"key2\": [97], \"key3\": null}, \"nested_level39\": {\"key_0\": [null], \"key_1\": {\"key1\": false, \"key2\": \"0xRhD1VTS4ANKGPLk8rQibofuB3CuY\", \"key3\": {\"key1\": \"lZdwviNxSWGC3uhOuY8E1npc0QUk4B\", \"key2\": \"daHs71Bk9DuWb8ClstZEKzLYNjxbnP\"}}, \"nested_level40\": {\"key_0\": null, \"nested_level41\": {\"key_0\": true, \"key_1\": \"wzWrhDO8EauX0bIYbRqcL5tNG1smnk\", \"nested_level42\": {\"key_0\": 5, \"key_1\": false, \"key_2\": 2, \"key_3\": 9, \"nested_level43\": {\"key_0\": 82, \"key_1\": 23, \"key_2\": true, \"nested_level44\": {\"key_0\": {\"key1\": true}, \"key_1\": \"gFTRk4bDEKHfysxtwcWBG3Is5AZhVl\", \"key_2\": \"XV8I6nCLQZg7q12H9rilpoRKWu5Eb0\", \"key_3\": \"Nq7RjfAsmbTFC8oWH6xKlIpwOtuX1c\", \"nested_level45\": {\"key_0\": {\"key1\": false, \"key2\": 100}, \"key_1\": 19, \"key_2\": [false], \"nested_level46\": {\"key_0\": null, \"key_1\": {\"key1\": [\"Ybd5Ewpklu3CWMcesF9ZuBTVj2yXQm\", 41, \"jH1KONP6QxGW8kBEeosthlAI735vZJ\"], \"key2\": {\"key1\": \"yBdgEDJlcOP4pfhXAYqFHj0M97nCve\"}}, \"key_2\": 50, \"key_3\": false, \"nested_level47\": {\"key_0\": [\"georEv7Yw5zFSJ8TNDVaqQkWsl4mMy\"], \"key_1\": {\"key1\": true, \"key2\": null}, \"nested_level48\": {\"key_0\": \"AqQDLRGK8iahu3jHsNomlIOJ96bv01\", \"key_1\": \"6yJEBOjsmgU1e9iKl3YvuunSDfQkCt\", \"key_2\": \"TW7tz9gFf4rDNlRsbq50QhJiuCI2Ej\", \"key_3\": {\"key1\": \"RS85hbtjzPd9uuavrLogKHbZfemAFl\", \"key2\": {\"key1\": false, \"key2\": false}}, \"key_4\": 36, \"nested_level49\": {\"key_0\": null, \"key_1\": \"w9eVTcFMN2IaSlufEd0kptnozxuOG5\", \"key_2\": false, \"key_3\": [null, [false], [true]], \"key_4\": 60, \"nested_level50\": {\"key_0\": false, \"key_1\": [null, 63, true], \"key_2\": 1, \"nested_level51\": {\"key_0\": {\"key1\": {\"key1\": false, \"key2\": {\"key1\": null, \"key2\": 53, \"key3\": 11}}, \"key2\": {\"key1\": null}}, \"key_1\": {\"key1\": \"oCrdu8SjOIuX1EkaKUicYfF5Ghsy9p\"}, \"key_2\": null, \"key_3\": 64, \"key_4\": {\"key1\": {\"key1\": null, \"key2\": 58, \"key3\": [[89, {\"key1\": false, \"key2\": [false, {\"key1\": \"2GRO8SIiCAkuKfh6Q7cZlbHLTJ9sqy\", \"key2\": 89, \"key3\": \"ORhbeqgAzlWSwCTXMPDZEmdj8cynu9\"}], \"key3\": 69}, {\"key1\": [47, {\"key1\": {\"key1\": [null, null, false]}}, \"oSW6L1JYzgCQw0TbaIbylHuNiekFqZ\"]}], \"XwWf25EnY4U9NKphPsd7A8juL0OZts\", null]}, \"key2\": 11, \"key3\": null}, \"nested_level52\": {\"key_0\": null, \"key_1\": [61, {\"key1\": {\"key1\": [43], \"key2\": [22, {\"key1\": 31}], \"key3\": [true]}, \"key2\": [33, \"Wq4irhIsSuBHGtkbbZmsNCvJX1R5MK\", \"cDQPO782RInHAYeoGvKFZuSqbhrbzf\"], \"key3\": false}, null], \"key_2\": [[{\"key1\": 49}, [false]]], \"key_3\": 17, \"key_4\": [41, [10, {\"key1\": 26, \"key2\": false, \"key3\": {\"key1\": [null, false]}}, null]], \"nested_level53\": {\"key_0\": [{\"key1\": [\"WRQzp2AocbrhimtugCsnEGByTavN6x\", \"zcgDLGbBIamVh93r0dCPnHvo1qNYWt\", [[null, 46], null, {\"key1\": [[null]], \"key2\": \"5vc8wm9ksTXtBZP7afeE1VpyS6hWqd\", \"key3\": null}]], \"key2\": 49, \"key3\": [\"TKHc5CpigazQDWMA72G9RObtuveBN3\", [null, null]]}, {\"key1\": [\"sldEGIPLjV1euFUmksRa0OQ6Mn4CiY\", null], \"key2\": {\"key1\": [false, null]}, \"key3\": {\"key1\": [[\"sYbCReSo7FJyPbBlhuX1tIKTWaGjgA\"], true, [[55]]]}}, [true]], \"key_1\": [true], \"key_2\": 85, \"nested_level54\": {\"key_0\": null, \"key_1\": true, \"nested_level55\": {\"key_0\": 4, \"key_1\": 66, \"nested_level56\": {\"key_0\": [false, 26], \"key_1\": \"npSVJbctCYzkmKGD4X7uqg9dFjviIl\", \"nested_level57\": {\"key_0\": false, \"key_1\": [null, 66, 71], \"nested_level58\": {\"key_0\": \"3bhO9EKBodsAsb2S4WRkvgNmDilpQf\", \"key_1\": \"xJOkcYlBjbt4siPzM9SGF8dCU1nrfZ\", \"key_2\": 44, \"key_3\": \"zARhki7nO6cbDbemdHtv5JSx8rXj9Y\", \"nested_level59\": {\"key_0\": \"B9QLVci2EOhwyxKuusUe4dFGA8rPNR\", \"key_1\": [null, [true, \"XbT6K3OYjHaJAmsyUwuDku7WrGoep8\"]], \"key_2\": \"drXzih3GPsjeMsZfbkJUKL89waYncu\", \"nested_level60\": {\"key_0\": [{\"key1\": {\"key1\": 97}, \"key2\": null}, null], \"key_1\": true, \"key_2\": true, \"key_3\": [null, 56], \"nested_level61\": {\"key_0\": \"AkzfZmrYSDbjxuWnwya0eQ42gTqU7H\", \"nested_level62\": {\"key_0\": {\"key1\": 56, \"key2\": null}, \"key_1\": false, \"key_2\": 47, \"key_3\": {\"key1\": \"O9zdy8ujaUKWEDI6vnfum51TgoYPlM\", \"key2\": null, \"key3\": 9}, \"key_4\": null, \"nested_level63\": {\"key_0\": [{\"key1\": [true, null], \"key2\": true, \"key3\": {\"key1\": [18, [64]]}}], \"key_1\": false, \"nested_level64\": {\"key_0\": false, \"nested_level65\": {\"key_0\": 2, \"key_1\": true, \"key_2\": null, \"key_3\": \"7sbbIQcHFB8tz49auyV3dr0fAZwg2e\", \"key_4\": false, \"nested_level66\": {\"key_0\": [42, {\"key1\": null}], \"key_1\": [\"pngrb7eyvYPF3M9INU8zthGqdZi2cs\", null, [true, {\"key1\": true}, null]], \"key_2\": 41, \"nested_level67\": {\"key_0\": true, \"key_1\": [\"FiZhtaSPInDJAury9RbbcN0osszuOK\", \"cvls8GubUN7Iq95rhSDYuRTs1LxziC\"], \"nested_level68\": {\"key_0\": null, \"nested_level69\": {\"key_0\": \"lMgOQX6bPuImKJexAs9rD2bRcnZLu5\", \"key_1\": true, \"key_2\": \"YPuyawZ49Lhp03nWkujQb5mTBq6GtR\", \"key_3\": null}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}"; + // correct json text + common::ObString json_text(j_string); + + const char *syntaxerr = NULL; + ObJsonNode *json_tree = NULL; + + struct timeval time_start, time_end; + gettimeofday(&time_start, nullptr); + ASSERT_EQ(OB_SUCCESS, ObJsonParser::parse_json_text(&allocator, json_text.ptr(), + json_text.length(), syntaxerr, NULL, json_tree)); + ASSERT_TRUE(json_tree != NULL); + + ObJsonBin bin(&allocator); + bin.parse_tree(json_tree); + + ObString raw_bin; + + ASSERT_EQ(OB_SUCCESS, bin.get_raw_binary(raw_bin, &allocator)); + + gettimeofday(&time_end, nullptr); + + + + cout << "time start : " << " sec = " << time_start.tv_sec << ", usec = " << time_start.tv_usec << endl; + cout << "time end : " << " sec = " << time_end.tv_sec << ", usec = " << time_end.tv_usec << endl; + +} + + + } // namespace common } // namespace oceanbase diff --git a/unittest/share/text_analysis/test_text_analyzer.cpp b/unittest/share/text_analysis/test_text_analyzer.cpp new file mode 100644 index 0000000000..aa60cdec51 --- /dev/null +++ b/unittest/share/text_analysis/test_text_analyzer.cpp @@ -0,0 +1,208 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SHARE + +#include +#define protected public +#define private public + +#include "share/datum/ob_datum_funcs.h" +#include "share/rc/ob_tenant_base.h" +#include "share/text_analysis/ob_text_analyzer.h" + + +namespace oceanbase +{ +namespace share +{ + +class TestTextAnalyzer : public ::testing::Test +{ +public: + TestTextAnalyzer() : allocator_(), analysis_ctx_(), token_cmp_func_() {} + virtual ~TestTextAnalyzer() {} + virtual void SetUp(); + virtual void TearDowm() {} +private: + void analyze_test( + ObITextAnalyzer &analyzer, + const char *raw_doc, + const int64_t raw_doc_len, + const char **target_tokens, + const int64_t *target_token_len, + const int64_t *target_token_freq, + const int64_t target_token_cnt); + void find_token_in_target_array( + const ObDatum &query_token, + const char **target_tokens, + const int64_t *target_token_len, + const int64_t target_token_cnt, + int64_t &idx); +private: + ObArenaAllocator allocator_; + ObTextAnalysisCtx analysis_ctx_; + common::ObDatumCmpFuncType token_cmp_func_; +}; + +void TestTextAnalyzer::SetUp() +{ + share::ObTenantEnv::get_tenant_local()->id_ = 500; + analysis_ctx_.cs_ = ObCharset::get_charset(CS_TYPE_UTF8MB4_GENERAL_CI); + sql::ObExprBasicFuncs *basic_funcs = ObDatumFuncs::get_basic_func(ObVarcharType, CS_TYPE_UTF8MB4_GENERAL_CI); + token_cmp_func_ = basic_funcs->null_first_cmp_; +} + +void TestTextAnalyzer::analyze_test( + ObITextAnalyzer &analyzer, + const char *raw_doc, + const int64_t raw_doc_len, + const char **target_tokens, + const int64_t *target_token_len, + const int64_t *target_token_freq, + const int64_t target_token_cnt) +{ + ObDatum doc_datum; + doc_datum.set_string(raw_doc, raw_doc_len); + LOG_DEBUG("start test one tokenization", K(analyzer), K(doc_datum), K(doc_datum.get_string())); + + ObITokenStream *token_stream; + ASSERT_EQ(OB_SUCCESS, analyzer.analyze(doc_datum, token_stream)); + ASSERT_NE(nullptr, token_stream); + + int ret = OB_SUCCESS; + int64_t token_cnt = 0; + while (OB_SUCC(ret)) { + ObDatum token; + int64_t token_freq = 0; + if (OB_FAIL(token_stream->get_next(token, token_freq))) { + if (OB_ITER_END != ret) { + LOG_WARN("Failed to get next token from token stream", KPC(token_stream)); + } + } else { + ASSERT_TRUE(token_cnt < target_token_cnt); + LOG_INFO("print token", K(token), K(token.get_string()), K(token_freq)); + int64_t idx = -1; + find_token_in_target_array(token, target_tokens, target_token_len, target_token_cnt, idx); + ASSERT_TRUE(idx >= 0 && idx < target_token_cnt) << "idx:" << idx; + ASSERT_EQ(token_freq, target_token_freq[idx]) << "token_freq:" << token_freq << "target_token_freq" << target_token_freq[idx]; + ++token_cnt; + } + } + ASSERT_EQ(OB_ITER_END, ret); + ASSERT_EQ(token_cnt, target_token_cnt); +} + +void TestTextAnalyzer::find_token_in_target_array( + const ObDatum &query_token, + const char **target_tokens, + const int64_t *target_token_len, + const int64_t target_token_cnt, + int64_t &idx) +{ + idx = -1; + for (int64_t i = 0; i < target_token_cnt; ++i) { + ObDatum target_token_datum; + target_token_datum.set_string(target_tokens[i], target_token_len[i]); + int cmp_ret = 0; + ASSERT_EQ(OB_SUCCESS, token_cmp_func_(target_token_datum, query_token, cmp_ret)); + if (0 == cmp_ret) { + idx = i; + break; + } + } + if (idx == -1) { + LOG_INFO("query token not found", K(query_token), K(query_token.get_string())); + } +} + +TEST_F(TestTextAnalyzer, test_basic_english_analyzer) +{ + ObEnglishTextAnalyzer analyzer; + analysis_ctx_.need_grouping_ = false; + + ASSERT_EQ(OB_SUCCESS, analyzer.init(analysis_ctx_, allocator_)); + + const int64_t doc_len_1 = 64; + const char doc_1[doc_len_1] = {"Try to tokenize basic english doc."}; + const int64_t token_cnt_1 = 6; + const char *tokens1[token_cnt_1] = {"try", "to", "tokenize", "basic", "english", "doc"}; + const int64_t tokens_len_1[token_cnt_1] = {3, 2, 8, 5, 7, 3}; + const int64_t tokens_freq_1[token_cnt_1] = {1, 1, 1, 1, 1, 1}; + analyze_test(analyzer, doc_1, doc_len_1, tokens1, tokens_len_1, tokens_freq_1, token_cnt_1); + + // not deduplicated + const int64_t doc_len_2 = 64; + const char doc_2[doc_len_2] = {"oceanbase@oceanbase.com, \t https://www.oceanbase.com/"}; + const int64_t token_cnt_2 = 7; + const char *tokens_2[token_cnt_2] = {"oceanbase", "oceanbase", "com", "https", "www", "oceanbase", "com"}; + const int64_t tokens_2_len[token_cnt_2] = {9, 9, 3, 5, 3, 9, 3}; + const int64_t tokens_freq_2[token_cnt_2] = {1, 1, 1, 1, 1, 1, 1}; + analyze_test(analyzer, doc_2, doc_len_2, tokens_2, tokens_2_len, tokens_freq_2, token_cnt_2); + + // won't trim extremely short phrase for now + const int64_t doc_len_3 = 64; + const char doc_3[doc_len_3] = {"if (a==b and c > !d) then x=1;"}; + const int64_t token_cnt_3 = 9; + const char *tokens_3[token_cnt_3] = {"if", "a", "b", "and", "c", "d", "then", "x", "1"}; + const int64_t tokens_len_3[token_cnt_3] = {2, 1, 1, 3, 1, 1, 4, 1, 1}; + const int64_t tokens_freq_3[token_cnt_3] = {1, 1, 1, 1, 1, 1, 1, 1, 1}; + analyze_test(analyzer, doc_3, doc_len_3, tokens_3, tokens_len_3, tokens_freq_3, token_cnt_3); + + // test paragraphs + const int64_t doc_len_4 = 128; + const char doc_4[doc_len_4] = {"PARAGRAPH1\nPARAGRAPH2\nPARAGRAPH3"}; + const int64_t token_cnt_4 = 3; + const char *tokens_4[token_cnt_4] = {"paragraph1","paragraph2","paragraph3"}; + const int64_t tokens_len_4[token_cnt_4] = {10,10,10}; + const int64_t tokens_freq_4[token_cnt_4] = {1, 1, 1}; + analyze_test(analyzer, doc_4, doc_len_4, tokens_4, tokens_len_4, tokens_freq_4, token_cnt_4); + + // test non-english text + const int64_t doc_len_5 = 128; + const char doc_5[doc_len_5] = {"乘骐骥以驰骋兮,来吾道夫先路"}; + const int64_t token_cnt_5 = 1; + const char *tokens_5[token_cnt_5] = {"乘骐骥以驰骋兮,来吾道夫先路"}; + const int64_t tokens_len_5[token_cnt_5] = {42}; + const int64_t tokens_freq_5[token_cnt_5] = {1}; + analyze_test(analyzer, doc_5, doc_len_5, tokens_5, tokens_len_5, tokens_freq_5, token_cnt_5); + + analyzer.reset(); + + // grouping test + analysis_ctx_.need_grouping_ = true; + ASSERT_EQ(OB_SUCCESS, analyzer.init(analysis_ctx_, allocator_)); + analyze_test(analyzer, doc_1, doc_len_1, tokens1, tokens_len_1, tokens_freq_1, token_cnt_1); + analyze_test(analyzer, doc_3, doc_len_3, tokens_3, tokens_len_3, tokens_freq_3, token_cnt_3); + analyze_test(analyzer, doc_4, doc_len_4, tokens_4, tokens_len_4, tokens_freq_4, token_cnt_4); + + const int64_t doc_len_6 = 64; + const char doc_6[doc_len_6] = {"oceanbase@oceanbase.com, \t https://www.oceanbase.com/"}; + const int64_t token_cnt_6 = 4; + const char *tokens_6[token_cnt_6] = {"oceanbase", "com", "https", "www"}; + const int64_t tokens_len_6[token_cnt_6] = {9, 3, 5, 3}; + const int64_t tokens_freq_6[token_cnt_6] = {3, 2, 1, 1}; + analyze_test(analyzer, doc_6, doc_len_6, tokens_6, tokens_len_6, tokens_freq_6, token_cnt_6); +} + +}; // namespace share +}; // namespace oceanbase + +int main(int argc, char **argv) +{ + system("rm -f test_text_analyzer.log*"); + OB_LOGGER.set_file_name("test_text_analyzer.log", true, false); + oceanbase::common::ObLogger::get_logger().set_log_level("INFO"); + // oceanbase::common::ObLogger::get_logger().set_log_level("DEBUG"); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/unittest/sql/rewrite/test_query_range.cpp b/unittest/sql/rewrite/test_query_range.cpp index 6da615bcc6..31d95ed8be 100644 --- a/unittest/sql/rewrite/test_query_range.cpp +++ b/unittest/sql/rewrite/test_query_range.cpp @@ -1215,7 +1215,7 @@ TEST_F(ObQueryRangeTest, serialize_geo_queryrange) pre_mbr.x_max_ = 60; pre_mbr.y_min_ = 60; pre_mbr.y_max_ = 90; - pre_mbr.mbr_type_ = ObGeoRelationType::T_INTERSECTS; + pre_mbr.mbr_type_ = ObDomainOpType::T_GEO_INTERSECTS; OK(mbr_array.push_back(pre_mbr)); ObGeoColumnInfo info1; info1.srid_ = 0; @@ -1277,7 +1277,7 @@ TEST_F(ObQueryRangeTest, serialize_geo_keypart) { // build geo keypart ObKeyPart pre_key_part(allocator_); - OK(pre_key_part.create_geo_key()); + OK(pre_key_part.create_domain_key()); ObObj wkb; // ST_GeomFromText('POINT(5 5)') char hexstring[25] ={'\x01', '\x01', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', @@ -1285,8 +1285,8 @@ TEST_F(ObQueryRangeTest, serialize_geo_keypart) '\x00', '\x00', '\x00', '\x14', '\x40', '\x00', '\x00', '\x00', '\x00'}; wkb.set_string(ObGeometryType ,hexstring, 25); - OK(ob_write_obj(allocator_, wkb, pre_key_part.geo_keypart_->wkb_)); - pre_key_part.geo_keypart_->geo_type_ = ObGeoRelationType::T_DWITHIN; + OK(ob_write_obj(allocator_, wkb, pre_key_part.domain_keypart_->const_param_)); + pre_key_part.domain_keypart_->domain_op_ = ObDomainOpType::T_GEO_DWITHIN; char buf[512 * 1024] = {'\0'}; int64_t pos = 0; int64_t data_len = 0; @@ -1296,8 +1296,8 @@ TEST_F(ObQueryRangeTest, serialize_geo_keypart) pos = 0; ObKeyPart dec_key_part(allocator_); OK(dec_key_part.deserialize(buf, data_len, pos)); - EXPECT_EQ(dec_key_part.geo_keypart_->wkb_, pre_key_part.geo_keypart_->wkb_); - EXPECT_EQ(dec_key_part.geo_keypart_->geo_type_, pre_key_part.geo_keypart_->geo_type_); + EXPECT_EQ(dec_key_part.domain_keypart_->const_param_, pre_key_part.domain_keypart_->const_param_); + EXPECT_EQ(dec_key_part.domain_keypart_->domain_op_, pre_key_part.domain_keypart_->domain_op_); } int main(int argc, char **argv) diff --git a/unittest/storage/CMakeLists.txt b/unittest/storage/CMakeLists.txt index e0cf29bfc1..04337a254b 100644 --- a/unittest/storage/CMakeLists.txt +++ b/unittest/storage/CMakeLists.txt @@ -6,6 +6,10 @@ add_library(mock_access_service SHARED mock_access_service.cpp) target_link_libraries(mock_access_service PUBLIC oceanbase) +add_library(mock_ft_parser SHARED + mock_ft_parser.cpp) +target_link_libraries(mock_ft_parser PUBLIC oceanbase) + function(storage_unittest case) ob_unittest(${ARGV}) target_link_libraries(${case} PRIVATE mockcontainer) @@ -16,6 +20,11 @@ function(storage_dml_unittest case) target_link_libraries(${case} PRIVATE mockcontainer mock_ls_tablet_service mock_access_service) endfunction() +function(storage_fts_unittest case) + ob_unittest(${ARGV}) + target_link_libraries(${case} PRIVATE mockcontainer mock_ft_parser) +endfunction() + add_subdirectory(mockcontainer) add_subdirectory(transaction) add_subdirectory(tx) @@ -69,6 +78,7 @@ storage_unittest(test_checkpoint_diagnose checkpoint/test_checkpoint_diagnose.cp #storage_unittest(test_create_tablet_memtable test_create_tablet_memtable.cpp) storage_unittest(test_tenant_meta_obj_pool test_tenant_meta_obj_pool.cpp) storage_unittest(test_tablet_pointer_map test_tablet_pointer_map.cpp) +storage_fts_unittest(test_fts_plugin test_fts_plugin.cpp) storage_unittest(test_storage_logger_manager slog/test_storage_logger_manager.cpp) storage_unittest(test_storage_log_read_write slog/test_storage_log_read_write.cpp) storage_unittest(test_storage_log_replay slog/test_storage_log_replay.cpp) diff --git a/unittest/storage/mock_ft_parser.cpp b/unittest/storage/mock_ft_parser.cpp new file mode 100644 index 0000000000..a24cfdde83 --- /dev/null +++ b/unittest/storage/mock_ft_parser.cpp @@ -0,0 +1,24 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "mock_ft_parser.h" + +OB_DECLARE_PLUGIN(mock_ft_parser) +{ + oceanbase::lib::ObPluginType::OB_FT_PARSER_PLUGIN, + "mock_ft_parser", + OB_PLUGIN_AUTHOR_OCEANBASE, + "This is mock fulltext parser plugin.", + 0x00001, + oceanbase::lib::ObPluginLicenseType::OB_MULAN_V2_LICENSE, + &oceanbase::storage::mock_ft_parser, +}; diff --git a/unittest/storage/mock_ft_parser.h b/unittest/storage/mock_ft_parser.h new file mode 100644 index 0000000000..1c5d45cef4 --- /dev/null +++ b/unittest/storage/mock_ft_parser.h @@ -0,0 +1,56 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef MOCK_FT_PARSER_H_ +#define MOCK_FT_PARSER_H_ + +#include "lib/ob_plugin.h" + +namespace oceanbase +{ +namespace storage +{ + +class ObMockFTParserDesc final : public lib::ObIFTParserDesc +{ +public: + ObMockFTParserDesc() = default; + virtual ~ObMockFTParserDesc() = default; + virtual int init(lib::ObPluginParam *param) override; + virtual int deinit(lib::ObPluginParam *param) override; + virtual int segment(lib::ObFTParserParam *param) const override; +}; + +int ObMockFTParserDesc::init(lib::ObPluginParam *param) +{ + UNUSEDx(param); + return OB_SUCCESS; +} + +int ObMockFTParserDesc::deinit(lib::ObPluginParam *param) +{ + UNUSED(param); + return OB_SUCCESS; +} + +int ObMockFTParserDesc::segment(lib::ObFTParserParam *param) const +{ + UNUSED(param); + return OB_SUCCESS; +} + +static ObMockFTParserDesc mock_ft_parser; + +} // end storage +} // end oceanbase + +#endif // MOCK_FT_PARSER_H_ diff --git a/unittest/storage/test_fts_plugin.cpp b/unittest/storage/test_fts_plugin.cpp new file mode 100644 index 0000000000..569888e1a1 --- /dev/null +++ b/unittest/storage/test_fts_plugin.cpp @@ -0,0 +1,552 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include + +#define USING_LOG_PREFIX STORAGE + +#define protected public +#define private public + +#include "lib/ob_plugin.h" +#include "share/rc/ob_tenant_base.h" +#include "storage/fts/ob_fts_plugin_helper.h" +#include "storage/fts/ob_fts_plugin_mgr.h" +#include "storage/fts/ob_whitespace_ft_parser.h" +#include "sql/das/ob_das_utils.h" + +namespace oceanbase +{ + +static storage::ObTenantFTPluginMgr ft_plugin_mgr(OB_SYS_TENANT_ID); + +namespace storage +{ + +ObTenantFTPluginMgr &ObTenantFTPluginMgr::get_ft_plugin_mgr() +{ + return ft_plugin_mgr; +} + +typedef common::hash::ObHashMap ObFTWordMap; + +int segment_and_calc_word_count( + common::ObIAllocator &allocator, + storage::ObFTParseHelper *helper, + const common::ObCollationType &type, + const ObString &fulltext, + ObFTWordMap &words_count) +{ + int ret = OB_SUCCESS; + int64_t doc_length = 0; + common::ObSEArray words; + if (OB_ISNULL(helper) + || OB_UNLIKELY(ObCollationType::CS_TYPE_INVALID == type + || ObCollationType::CS_TYPE_EXTENDED_MARK < type) + || OB_UNLIKELY(!words_count.created())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KPC(helper), K(type), K(words_count.created())); + } else if (OB_FAIL(helper->segment(type, fulltext.ptr(), fulltext.length(), doc_length, words))) { + LOG_WARN("fail to segment", K(ret), KPC(helper), K(type), K(fulltext)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < words.count(); ++i) { + const ObFTWord &ft_word = words.at(i); + int64_t word_count = 0; + if (OB_FAIL(words_count.get_refactored(ft_word, word_count)) && OB_HASH_NOT_EXIST != ret) { + LOG_WARN("fail to get ft word", K(ret), K(ft_word)); + } else { + word_count = OB_HASH_NOT_EXIST == ret ? 1 : ++word_count; + if (OB_FAIL(words_count.set_refactored(ft_word, word_count, 1/*overwrite*/))) { + LOG_WARN("fail to set ft word and count", K(ret), K(ft_word)); + } + } + } + } + return ret; +} + +class ObTestAddWord final : public lib::ObFTParserParam::ObIAddWord +{ +public: + static const char *TEST_FULLTEXT; + static const int64_t TEST_WORD_COUNT = 9; + static const int64_t TEST_WORD_COUNT_WITHOUT_STOPWORD = 6; +public: + ObTestAddWord(); + virtual ~ObTestAddWord() = default; + virtual int operator()( + lib::ObFTParserParam *param, + const char *word, + const int64_t word_len) override; + virtual int64_t get_add_word_count() const override { return ith_word_; } + VIRTUAL_TO_STRING_KV(K_(ith_word)); +private: + const char *words_[TEST_WORD_COUNT]; + const char *words_without_stopword_[TEST_WORD_COUNT_WITHOUT_STOPWORD]; + int64_t ith_word_; +}; + +const char *ObTestAddWord::TEST_FULLTEXT = "OceanBase fulltext search is No.1 in the world."; + +ObTestAddWord::ObTestAddWord() + : words_{"oceanbase", "fulltext", "search", "is", "no", "1", "in", "the", "world"}, + words_without_stopword_{"oceanbase", "fulltext", "search", "no", "1", "world"}, + ith_word_(0) +{ +} + +int ObTestAddWord::operator()( + lib::ObFTParserParam *param, + const char *word, + const int64_t word_len) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(param) || OB_ISNULL(word) || OB_UNLIKELY(0 >= word_len)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KP(word), KP(param), K(word_len)); + } else if (OB_UNLIKELY(0 != strncmp(words_[ith_word_], word, word_len))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("the ith word isn't default word", K(ret), K(ith_word_), KCSTRING(words_[ith_word_]), + KCSTRING(word), K(word_len)); + } else { + ++ith_word_; + } + return ret; +} + +class TestDefaultFTParser : public ::testing::Test +{ +public: + TestDefaultFTParser(); + virtual ~TestDefaultFTParser() = default; + + virtual void SetUp() override; + virtual void TearDown() override; + +private: + lib::ObPluginParam plugin_param_; + lib::ObFTParserParam ft_parser_param_; + ObTestAddWord add_word_; + ObWhiteSpaceFTParserDesc desc_; + common::ObArenaAllocator allocator_; +}; + +TestDefaultFTParser::TestDefaultFTParser() + : plugin_param_(), + ft_parser_param_(), + add_word_(), + desc_(), + allocator_() +{ + plugin_param_.desc_ = &desc_; +} + +void TestDefaultFTParser::SetUp() +{ + ASSERT_EQ(OB_SUCCESS, desc_.init(&plugin_param_)); + + ft_parser_param_.allocator_ = &allocator_; + ft_parser_param_.add_word_ = &add_word_; + ft_parser_param_.cs_ = common::ObCharset::get_charset(ObCollationType::CS_TYPE_UTF8MB4_BIN); + ft_parser_param_.parser_version_ = 0x00001; + ASSERT_TRUE(nullptr != ft_parser_param_.cs_); +} + +void TestDefaultFTParser::TearDown() +{ + ft_parser_param_.reset(); + + ASSERT_EQ(OB_SUCCESS, desc_.deinit(&plugin_param_)); +} + +TEST_F(TestDefaultFTParser, test_space_ft_parser_segment) +{ + const char *fulltext = ObTestAddWord::TEST_FULLTEXT; + const int64_t ft_len = strlen(fulltext); + + ASSERT_EQ(OB_INVALID_ARGUMENT, ObSpaceFTParser::segment(nullptr, nullptr, 0)); + ASSERT_EQ(OB_INVALID_ARGUMENT, ObSpaceFTParser::segment(&ft_parser_param_, nullptr, 0)); + ASSERT_EQ(OB_INVALID_ARGUMENT, ObSpaceFTParser::segment(&ft_parser_param_, fulltext, 0)); + ASSERT_EQ(OB_INVALID_ARGUMENT, ObSpaceFTParser::segment(&ft_parser_param_, fulltext, -1)); + + ft_parser_param_.fulltext_ = fulltext; + ft_parser_param_.ft_length_ = ft_len; + + LOG_INFO("before space segment", KCSTRING(fulltext), K(ft_len), K(ft_parser_param_)); + ASSERT_EQ(OB_SUCCESS, ObSpaceFTParser::segment(&ft_parser_param_, fulltext, ft_len)); + LOG_INFO("after space segment", KCSTRING(fulltext), K(ft_len), K(ft_parser_param_)); +} + +TEST_F(TestDefaultFTParser, test_default_ft_parser_desc) +{ + ASSERT_EQ(OB_INVALID_ARGUMENT, desc_.segment(&ft_parser_param_)); + + ft_parser_param_.fulltext_ = ObTestAddWord::TEST_FULLTEXT; + ft_parser_param_.ft_length_ = strlen(ft_parser_param_.fulltext_); + + ASSERT_EQ(OB_SUCCESS, desc_.segment(&ft_parser_param_)); + + ASSERT_EQ(OB_SUCCESS, desc_.deinit(&plugin_param_)); + ASSERT_EQ(OB_NOT_INIT, desc_.segment(&ft_parser_param_)); + + ASSERT_EQ(OB_SUCCESS, desc_.init(&plugin_param_)); + ASSERT_EQ(OB_INVALID_ARGUMENT, desc_.segment(nullptr)); +} + +class ObTestFTPluginHelper : public ::testing::Test +{ +public: + static const char *TEST_FULLTEXT; + static const char *file_name; +public: + ObTestFTPluginHelper(); + virtual ~ObTestFTPluginHelper() = default; + + virtual void SetUp() override; + virtual void TearDown() override; + +private: + share::ObPluginSoHandler handler_; + const char *plugin_name_; + const ObCharsetInfo *cs_; + common::ObArenaAllocator allocator_; +}; + +const char *ObTestFTPluginHelper::TEST_FULLTEXT = "Test fulltext plugin."; +const char *ObTestFTPluginHelper::file_name = "libmock_ft_parser.so"; + +ObTestFTPluginHelper::ObTestFTPluginHelper() + : handler_(), + plugin_name_("mock_ft_parser"), + cs_(nullptr), + allocator_() +{ +} + +void ObTestFTPluginHelper::SetUp() +{ + ASSERT_EQ(OB_SUCCESS, handler_.open(plugin_name_, file_name)); + + cs_ = common::ObCharset::get_charset(ObCollationType::CS_TYPE_UTF8MB4_BIN); + ASSERT_TRUE(nullptr != cs_); +} + +void ObTestFTPluginHelper::TearDown() +{ + cs_ = nullptr; + ASSERT_EQ(OB_SUCCESS, handler_.close()); +} + +TEST_F(ObTestFTPluginHelper, test_fts_plugin) +{ + int64_t version = -1; + ASSERT_EQ(OB_SUCCESS, handler_.get_plugin_version(version)); + ASSERT_EQ(OB_PLUGIN_INTERFACE_VERSION, version); + + int64_t size = -1; + ASSERT_EQ(OB_SUCCESS, handler_.get_plugin_size(size)); + ASSERT_EQ(sizeof(lib::ObPlugin), size); + + lib::ObPlugin *plugin = nullptr; + ASSERT_EQ(OB_SUCCESS, handler_.get_plugin(plugin)); + ASSERT_TRUE(nullptr != plugin); + ASSERT_TRUE(plugin->is_valid()); + ASSERT_EQ(lib::ObPluginType::OB_FT_PARSER_PLUGIN, plugin->type_); + LOG_INFO("jinzhu debug", KCSTRING(plugin->name_), KCSTRING(plugin->author_), KCSTRING(plugin->spec_)); + ASSERT_TRUE(0 == std::strncmp("mock_ft_parser", plugin->name_, std::strlen("mock_ft_parser"))); + ASSERT_TRUE(0 == std::strncmp(OB_PLUGIN_AUTHOR_OCEANBASE, plugin->author_, std::strlen(OB_PLUGIN_AUTHOR_OCEANBASE))); + ASSERT_TRUE(0 == std::strncmp("This is mock fulltext parser plugin.", plugin->spec_, std::strlen("This is mock fulltext parser plugin."))); + ASSERT_EQ(0x00001, plugin->version_); + ASSERT_EQ(lib::ObPluginLicenseType::OB_MULAN_V2_LICENSE, plugin->license_); + ASSERT_TRUE(nullptr != plugin->desc_); + + lib::ObIFTParserDesc *desc = nullptr; + ASSERT_EQ(OB_SUCCESS, ObFTParseHelper::get_fulltext_parser_desc(handler_, desc)); + ASSERT_TRUE(nullptr != desc); + + ObTestAddWord test_add_word; + ASSERT_EQ(OB_SUCCESS, ObFTParseHelper::segment(1/*plugin_vserion*/, desc, cs_, TEST_FULLTEXT, + strlen(TEST_FULLTEXT), allocator_, test_add_word)); +} + +TEST_F(ObTestFTPluginHelper, test_main_program_for_plugin) +{ + ASSERT_EQ(OB_SUCCESS, handler_.close()); + ASSERT_EQ(OB_SUCCESS, handler_.open(plugin_name_, nullptr/*use main program*/)); + + int64_t version = -1; + ASSERT_EQ(OB_SUCCESS, handler_.get_plugin_version(version)); + ASSERT_EQ(OB_PLUGIN_INTERFACE_VERSION, version); + + int64_t size = -1; + ASSERT_EQ(OB_SUCCESS, handler_.get_plugin_size(size)); + ASSERT_EQ(sizeof(lib::ObPlugin), size); + + lib::ObPlugin *plugin = nullptr; + ASSERT_EQ(OB_SUCCESS, handler_.get_plugin(plugin)); + ASSERT_TRUE(nullptr != plugin); + ASSERT_TRUE(plugin->is_valid()); + ASSERT_EQ(lib::ObPluginType::OB_FT_PARSER_PLUGIN, plugin->type_); + LOG_INFO("jinzhu debug", KCSTRING(plugin->name_), KCSTRING(plugin->author_), KCSTRING(plugin->spec_)); + ASSERT_TRUE(0 == std::strncmp("mock_ft_parser", plugin->name_, std::strlen("mock_ft_parser"))); + ASSERT_TRUE(0 == std::strncmp(OB_PLUGIN_AUTHOR_OCEANBASE, plugin->author_, std::strlen(OB_PLUGIN_AUTHOR_OCEANBASE))); + ASSERT_TRUE(0 == std::strncmp("This is mock fulltext parser plugin.", plugin->spec_, std::strlen("This is mock fulltext parser plugin."))); + ASSERT_EQ(0x00001, plugin->version_); + ASSERT_EQ(lib::ObPluginLicenseType::OB_MULAN_V2_LICENSE, plugin->license_); + ASSERT_TRUE(nullptr != plugin->desc_); + + lib::ObIFTParserDesc *desc = nullptr; + ASSERT_EQ(OB_SUCCESS, ObFTParseHelper::get_fulltext_parser_desc(handler_, desc)); + ASSERT_TRUE(nullptr != desc); + + ObTestAddWord test_add_word; + ASSERT_EQ(OB_SUCCESS, ObFTParseHelper::segment(1/*plugin_vserion*/, desc, cs_, TEST_FULLTEXT, + strlen(TEST_FULLTEXT), allocator_, test_add_word)); + + ASSERT_EQ(0, ObCharset::strcmp(ObCollationType::CS_TYPE_UTF8MB4_GENERAL_CI, "OceanBase", "Oceanbase")); +} + +TEST_F(ObTestFTPluginHelper, test_no_exist_symbol) +{ + void *sym_ptr = nullptr; + ASSERT_EQ(OB_SEARCH_NOT_FOUND, handler_.get_symbol_ptr("test_no_exist_symbol", sym_ptr)); + ASSERT_EQ(OB_INVALID_ARGUMENT, handler_.get_symbol_ptr(nullptr, sym_ptr)); + + ASSERT_EQ(OB_SUCCESS, handler_.close()); + ASSERT_EQ(OB_FILE_NOT_OPENED, handler_.get_symbol_ptr("test_no_exist_symbol", sym_ptr)); + + ASSERT_EQ(OB_ERR_SYS, handler_.open(plugin_name_, "./test_no_exist_file.so")); + ASSERT_EQ(OB_INVALID_ARGUMENT, handler_.open(nullptr/*plugin name*/, nullptr/*file_name*/)); + + ASSERT_EQ(OB_SUCCESS, handler_.open(plugin_name_, nullptr/*use main program*/)); + ASSERT_EQ(OB_INIT_TWICE, handler_.open(plugin_name_, nullptr/*use main program*/)); +} + +class ObTestFTParseHelper : public ::testing::Test +{ +public: + static const char *name_; + typedef common::hash::ObHashMap ObFTWordMap; +public: + ObTestFTParseHelper(); + virtual ~ObTestFTParseHelper() = default; + + static void SetUpTestCase(); + static void TearDownTestCase(); + virtual void SetUp() override; + virtual void TearDown() override; + +private: + const common::ObString plugin_name_; + const common::ObCollationType cs_type_; + common::ObArenaAllocator allocator_; + ObFTParseHelper parse_helper_; +}; + +const char *ObTestFTParseHelper::name_ = "space.1"; + +ObTestFTParseHelper::ObTestFTParseHelper() + : plugin_name_(STRLEN(name_), name_), + cs_type_(ObCollationType::CS_TYPE_UTF8MB4_BIN), + allocator_() +{ +} + +void ObTestFTParseHelper::SetUp() +{ + ASSERT_EQ(OB_SUCCESS, parse_helper_.init(&allocator_, plugin_name_)); +} + +void ObTestFTParseHelper::TearDown() +{ + parse_helper_.reset(); +} + +void ObTestFTParseHelper::SetUpTestCase() +{ + ASSERT_EQ(common::OB_SUCCESS, ObTenantFTPluginMgr::register_plugins()); + ASSERT_EQ(common::OB_SUCCESS, ft_plugin_mgr.init()); +} + +void ObTestFTParseHelper::TearDownTestCase() +{ + ft_plugin_mgr.destroy(); + ObTenantFTPluginMgr::unregister_plugins(); +} + +TEST_F(ObTestFTParseHelper, test_parse_fulltext) +{ + common::ObSEArray words; + int64_t doc_length = 0; + ASSERT_EQ(OB_SUCCESS, parse_helper_.segment(cs_type_, ObTestAddWord::TEST_FULLTEXT, + std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words)); + + ObTestAddWord test_add_word; + for (int64_t i = 0; i < words.count(); ++i) { + ASSERT_TRUE(0 == strncmp(test_add_word.words_without_stopword_[i], words[i].word_.ptr(), words[i].word_.length())); + } + + ObFTWordMap ft_word_map; + ASSERT_EQ(OB_SUCCESS, ft_word_map.create(words.count(), "TestParse")); + ASSERT_EQ(OB_SUCCESS, segment_and_calc_word_count(allocator_, &parse_helper_, + cs_type_, ObTestAddWord::TEST_FULLTEXT, ft_word_map)); + ASSERT_EQ(words.count(), ft_word_map.size()); + + ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(cs_type_, nullptr, std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words)); + ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(cs_type_, ObTestAddWord::TEST_FULLTEXT, 0, doc_length, words)); + ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(cs_type_, ObTestAddWord::TEST_FULLTEXT, -1, doc_length, words)); + + parse_helper_.reset(); + ASSERT_EQ(OB_NOT_INIT, parse_helper_.segment(cs_type_, ObTestAddWord::TEST_FULLTEXT, + std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words)); + + ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.init(nullptr, plugin_name_)); + ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.init(&allocator_, ObString())); + + ASSERT_EQ(OB_SUCCESS, parse_helper_.init(&allocator_, plugin_name_)); + + ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(CS_TYPE_INVALID, ObTestAddWord::TEST_FULLTEXT, + std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words)); + ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(CS_TYPE_EXTENDED_MARK, ObTestAddWord::TEST_FULLTEXT, + std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words)); + + ASSERT_EQ(OB_INIT_TWICE, parse_helper_.init(&allocator_, plugin_name_)); + + parse_helper_.reset(); + ASSERT_EQ(OB_SUCCESS, parse_helper_.init(&allocator_, plugin_name_)); + + parse_helper_.reset(); + ASSERT_EQ(OB_SUCCESS, parse_helper_.init(&allocator_, plugin_name_)); + ASSERT_EQ(OB_SUCCESS, parse_helper_.segment(cs_type_, ObTestAddWord::TEST_FULLTEXT, + std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words)); + for (int64_t i = 0; i < words.count(); ++i) { + ASSERT_TRUE(0 == strncmp(test_add_word.words_without_stopword_[i], words[i].word_.ptr(), words[i].word_.length())); + } +} + +class ObTestNgramFTParseHelper : public ::testing::Test +{ +public: + static const char *name_; + static const int64_t TEST_WORD_COUNT = 29; + typedef common::hash::ObHashMap ObFTWordMap; +public: + ObTestNgramFTParseHelper(); + virtual ~ObTestNgramFTParseHelper() = default; + + static void SetUpTestCase(); + static void TearDownTestCase(); + virtual void SetUp() override; + virtual void TearDown() override; + +private: + const common::ObString plugin_name_; + const char *ngram_words_[TEST_WORD_COUNT]; + const common::ObCollationType cs_type_; + common::ObArenaAllocator allocator_; + ObFTParseHelper parse_helper_; +}; + +const char *ObTestNgramFTParseHelper::name_ = "ngram.1"; + +ObTestNgramFTParseHelper::ObTestNgramFTParseHelper() + : plugin_name_(STRLEN(name_), name_), + ngram_words_{"Oc", "ce", "ea", "an", "nB", "Ba", "as", "se", "fu", "ul", "ll", "lt", "te", "ex", "xt", "se", "ea", "ar", "rc", "ch", "is", "No", "in", "th", "he", "wo", "or", "rl", "ld"}, + cs_type_(ObCollationType::CS_TYPE_UTF8MB4_BIN), + allocator_() +{ +} + +void ObTestNgramFTParseHelper::SetUp() +{ + ASSERT_EQ(OB_SUCCESS, parse_helper_.init(&allocator_, plugin_name_)); +} + +void ObTestNgramFTParseHelper::TearDown() +{ + parse_helper_.reset(); +} + +void ObTestNgramFTParseHelper::SetUpTestCase() +{ + ASSERT_EQ(common::OB_SUCCESS, ObTenantFTPluginMgr::register_plugins()); + ASSERT_EQ(common::OB_SUCCESS, ft_plugin_mgr.init()); +} + +void ObTestNgramFTParseHelper::TearDownTestCase() +{ + ft_plugin_mgr.destroy(); + ObTenantFTPluginMgr::unregister_plugins(); +} + +TEST_F(ObTestNgramFTParseHelper, test_parse_fulltext) +{ + int64_t doc_length = 0; + common::ObSEArray words; + ASSERT_EQ(OB_SUCCESS, parse_helper_.segment(cs_type_, ObTestAddWord::TEST_FULLTEXT, + std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words)); + + for (int64_t i = 0; i < words.count(); ++i) { + ASSERT_TRUE(0 == strncmp(ngram_words_[i], words[i].word_.ptr(), words[i].word_.length())); + } + + ObFTWordMap ft_word_map; + ASSERT_EQ(OB_SUCCESS, ft_word_map.create(words.count(), "TestParse")); + ASSERT_EQ(OB_SUCCESS, segment_and_calc_word_count(allocator_, &parse_helper_, + cs_type_, ObTestAddWord::TEST_FULLTEXT, ft_word_map)); + ASSERT_EQ(words.count(), ft_word_map.size() + 2); + + ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(cs_type_, nullptr, std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words)); + ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(cs_type_, ObTestAddWord::TEST_FULLTEXT, 0, doc_length, words)); + ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(cs_type_, ObTestAddWord::TEST_FULLTEXT, -1, doc_length, words)); + + parse_helper_.reset(); + ASSERT_EQ(OB_NOT_INIT, parse_helper_.segment(cs_type_, ObTestAddWord::TEST_FULLTEXT, + std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words)); + + ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.init(nullptr, plugin_name_)); + ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.init(&allocator_, ObString())); + + const char *plugin_name = "space.1"; + ASSERT_EQ(OB_SUCCESS, parse_helper_.init(&allocator_, common::ObString(STRLEN(plugin_name), plugin_name))); + + ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(CS_TYPE_INVALID, ObTestAddWord::TEST_FULLTEXT, + std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words)); + ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(CS_TYPE_EXTENDED_MARK, ObTestAddWord::TEST_FULLTEXT, + std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words)); + + ASSERT_EQ(OB_INIT_TWICE, parse_helper_.init(&allocator_, plugin_name_)); + + parse_helper_.reset(); + ASSERT_EQ(OB_SUCCESS, parse_helper_.init(&allocator_, plugin_name_)); + + parse_helper_.reset(); + ASSERT_EQ(OB_SUCCESS, parse_helper_.init(&allocator_, plugin_name_)); + ASSERT_EQ(OB_SUCCESS, parse_helper_.segment(cs_type_, ObTestAddWord::TEST_FULLTEXT, + std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words)); + for (int64_t i = 0; i < words.count(); ++i) { + ASSERT_TRUE(0 == strncmp(ngram_words_[i], words[i].word_.ptr(), words[i].word_.length())); + } +} + +} // end namespace storage +} // end namespace oceanbase + +int main(int argc, char **argv) +{ + system("rm -rf test_fts_plugin.log"); + OB_LOGGER.set_file_name("test_fts_plugin.log", true); + OB_LOGGER.set_log_level("INFO"); + oceanbase::storage::ObTestFTPluginHelper::file_name = argv[0]; + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +}