From 484e7de3c58f2f73e79a5f752b47d6d5b8f3aea1 Mon Sep 17 00:00:00 2001 From: "Yunfeng,Wu" Date: Thu, 4 Jun 2020 16:31:18 +0800 Subject: [PATCH] [Doirs On ES] fix bug for sparse docvalue context and remove the mistake usage of total (#3751) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The other PR : https://github.com/apache/incubator-doris/pull/3513 (https://github.com/apache/incubator-doris/issues/3479) try to resolved the `inner hits node is not an array` because when a query( batch-size) run against new segment without this field, as-well the filter_path just only take `hits.hits.fields` 、`hits.hits._source` into account, this would appear an null inner hits node: ``` { "_scroll_id": "DXF1ZXJ5QW5kRmV0Y2gBAAAAAAAAHaUWY1ExUVd0ZWlRY2", "hits": { "total": 1 } } ``` Unfortunately this PR introduce another serious inconsistent result with different batch_size because of misusing the `total`. To avoid this two problem, we just add `hits.hits._score` to filter_path when `docvalue_mode` is true, `_score` would always `null` , and populate the inner hits node: ``` { "_scroll_id": "DXF1ZXJ5QW5kRmV0Y2gBAAAAAAAAHaUWY1ExUVd0ZWlRY2", "hits": { "total": 1, "hits": [ { "_score": null } ] } } ``` related issue: https://github.com/apache/incubator-doris/issues/3752 --- be/src/exec/es/es_scan_reader.cpp | 15 ++++++----- be/src/exec/es/es_scroll_parser.cpp | 41 +++-------------------------- 2 files changed, 12 insertions(+), 44 deletions(-) diff --git a/be/src/exec/es/es_scan_reader.cpp b/be/src/exec/es/es_scan_reader.cpp index 4d6673815c..43af7862ae 100644 --- a/be/src/exec/es/es_scan_reader.cpp +++ b/be/src/exec/es/es_scan_reader.cpp @@ -27,14 +27,15 @@ #include "exec/es/es_scroll_query.h" namespace doris { -const std::string REUQEST_SCROLL_FILTER_PATH = "filter_path=_scroll_id,hits.hits._source,hits.total,_id,hits.hits._source.fields,hits.hits.fields"; + +const std::string SOURCE_SCROLL_SEARCH_FILTER_PATH = "filter_path=_scroll_id,hits.hits._source,hits.total,_id"; +const std::string DOCVALUE_SCROLL_SEARCH_FILTER_PATH = "filter_path=_scroll_id,hits.total,hits.hits._score,hits.hits.fields"; + const std::string REQUEST_SCROLL_PATH = "_scroll"; const std::string REQUEST_PREFERENCE_PREFIX = "&preference=_shards:"; const std::string REQUEST_SEARCH_SCROLL_PATH = "/_search/scroll"; const std::string REQUEST_SEPARATOR = "/"; -const std::string REQUEST_SEARCH_FILTER_PATH = "filter_path=hits.hits._source,hits.total,_id,hits.hits._source.fields,hits.hits.fields"; - ESScanReader::ESScanReader(const std::string& target, const std::map& props, bool doc_value_mode) : _scroll_keep_alive(config::es_scroll_keepalive), _http_timeout_ms(config::es_http_timeout_ms), @@ -57,6 +58,7 @@ ESScanReader::ESScanReader(const std::string& target, const std::map= _size) { - // _source is fetched from ES - if (!_doc_value_mode) { - return Status::OK(); - } - - // _fields(doc_value) is fetched from ES - if (_total <= 0 || _line_index >= _total) { - return Status::OK(); - } - - - // here is operations for `enable_doc_value_scan`. - // This indicates that the fields does not exist(e.g. never assign values to these fields), but other fields have values. - // so, number of rows is >= 0, we need fill `NULL` to these fields that does not exist. - _line_index++; - tuple->init(tuple_desc->byte_size()); - for (int i = 0; i < tuple_desc->slots().size(); ++i) { - const SlotDescriptor* slot_desc = tuple_desc->slots()[i]; - if (slot_desc->is_materialized()) { - tuple->set_null(slot_desc->null_indicator_offset()); - } - } - - *line_eof = false; return Status::OK(); }