// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. // This file is copied from // https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/JSONParsers/SimdJSONParser.h // and modified by Doris #pragma once #include #include #include #include #include #include #include "util/jsonb_writer.h" #include "vec/columns/column.h" #include "vec/common/string_ref.h" #include "vec/common/uint128.h" #include "vec/core/field.h" #include "vec/json/path_in_data.h" #include "vec/json/simd_json_parser.h" namespace doris::vectorized { template Field getValueAsField(const Element& element) { // bool will convert to type FiledType::UInt64 if (element.isBool()) { return element.getBool(); } if (element.isInt64()) { return element.getInt64(); } // doris only support signed integers at present if (element.isUInt64()) { return element.getInt64(); } if (element.isDouble()) { return element.getDouble(); } if (element.isString()) { return element.getString(); } if (element.isNull()) { return Field(); } return Field(); } template void writeValueAsJsonb(const Element& element, JsonbWriter& writer) { // bool will convert to type FiledType::UInt64 if (element.isBool()) { writer.writeBool(element.getBool()); return; } if (element.isInt64()) { writer.writeInt64(element.getInt64()); return; } // doris only support signed integers at present if (element.isUInt64()) { writer.writeInt64(element.getInt64()); return; } if (element.isDouble()) { writer.writeDouble(element.getDouble()); return; } if (element.isString()) { writer.writeStartString(); std::string_view str = element.getString(); writer.writeString(str.data(), str.size()); writer.writeEndString(); return; } if (element.isNull()) { writer.writeNull(); return; } } template std::string castValueAsString(const Element& element) { if (element.isBool()) { return element.getBool() ? "1" : "0"; } if (element.isInt64()) { return std::to_string(element.getInt64()); } if (element.isUInt64()) { return std::to_string(element.getUInt64()); } if (element.isDouble()) { return std::to_string(element.getDouble()); } if (element.isNull()) { return ""; } return ""; } enum class ExtractType { ToString = 0, // ... }; template class JSONDataParser { public: using Element = typename ParserImpl::Element; using JSONObject = typename ParserImpl::Object; using JSONArray = typename ParserImpl::Array; std::optional parse(const char* begin, size_t length); // extract keys's element into columns bool extract_key(MutableColumns& columns, StringRef json, const std::vector& keys, const std::vector& types); private: struct ParseContext { PathInDataBuilder builder; std::vector paths; std::vector values; }; using PathPartsWithArray = std::pair; using PathToArray = phmap::flat_hash_map; using KeyToSizes = phmap::flat_hash_map, StringRefHash>; struct ParseArrayContext { size_t current_size = 0; size_t total_size = 0; PathToArray arrays_by_path; KeyToSizes nested_sizes_by_key; }; void traverse(const Element& element, ParseContext& ctx); void traverseObject(const JSONObject& object, ParseContext& ctx); void traverseArray(const JSONArray& array, ParseContext& ctx); void traverseArrayElement(const Element& element, ParseArrayContext& ctx); static void fillMissedValuesInArrays(ParseArrayContext& ctx); static bool tryInsertDefaultFromNested(ParseArrayContext& ctx, const PathInData::Parts& path, Array& array); static StringRef getNameOfNested(const PathInData::Parts& path, const Field& value); bool has_nested = false; void checkHasNested(const Element& element); void traverseAsJsonb(const Element& element, JsonbWriter& writer); void traverseObjectAsJsonb(const JSONObject& object, JsonbWriter& writer); void traverseArrayAsJsonb(const JSONArray& array, JsonbWriter& writer); ParserImpl parser; }; } // namespace doris::vectorized