Use fmt and std::from_chars to make convert integer to string and convert string to integer more efficient (#6361)

* [Optimize] optimize the speed of converting integer to string

* Use fmt and std::from_chars to make convert integer to string and convert string to integer more efficient

Co-authored-by: caiconghui <caiconghui@xiaomi.com>
This commit is contained in:
caiconghui
2021-08-04 10:55:19 +08:00
committed by GitHub
parent 16bc5fa585
commit d1007afe80
11 changed files with 71 additions and 123 deletions

View File

@ -611,11 +611,8 @@ bool EsScanNode::to_ext_literal(PrimitiveType slot_type, void* value, TExtLitera
case TYPE_LARGEINT: {
node_type = (TExprNodeType::LARGE_INT_LITERAL);
char buf[48];
int len = 48;
char* v = LargeIntValue::to_string(*reinterpret_cast<__int128*>(value), buf, &len);
TLargeIntLiteral large_int_literal;
large_int_literal.__set_value(v);
large_int_literal.__set_value(LargeIntValue::to_string(*reinterpret_cast<__int128*>(value)));
literal->__set_large_int_literal(large_int_literal);
break;
}

View File

@ -284,11 +284,8 @@ Status ODBCConnector::append(const std::string& table_name, RowBatch* batch,
break;
}
case TYPE_LARGEINT: {
char buf[48];
int len = 48;
char* v = LargeIntValue::to_string(
reinterpret_cast<const PackedInt128*>(item)->value, buf, &len);
fmt::format_to(_insert_stmt_buffer, "{}", std::string(v, len));
fmt::format_to(_insert_stmt_buffer, "{}",
reinterpret_cast<const PackedInt128*>(item)->value);
break;
}
default: {

View File

@ -18,6 +18,7 @@
#include "exprs/cast_functions.h"
#include <cmath>
#include <fmt/format.h>
#include "exprs/anyval_util.h"
#include "gutil/strings/numbers.h"
@ -131,23 +132,15 @@ CAST_FUNCTION(FloatVal, DoubleVal, double_val)
CAST_FROM_STRINGS();
// Special-case tinyint because boost thinks it's a char and handles it differently.
// e.g. '0' is written as an empty string.
StringVal CastFunctions::cast_to_string_val(FunctionContext* ctx, const TinyIntVal& val) {
if (val.is_null) {
return StringVal::null();
}
int64_t tmp_val = val.val;
return AnyValUtil::from_string_temp(ctx, std::to_string(tmp_val));
}
#define CAST_TO_STRING(num_type) \
StringVal CastFunctions::cast_to_string_val(FunctionContext* ctx, const num_type& val) { \
if (val.is_null) return StringVal::null(); \
return AnyValUtil::from_string_temp(ctx, std::to_string(val.val)); \
auto f = fmt::format_int(val.val); \
return AnyValUtil::from_buffer_temp(ctx, f.data(), f.size()); \
}
CAST_TO_STRING(BooleanVal);
CAST_TO_STRING(TinyIntVal);
CAST_TO_STRING(SmallIntVal);
CAST_TO_STRING(IntVal);
CAST_TO_STRING(BigIntVal);
@ -156,10 +149,9 @@ StringVal CastFunctions::cast_to_string_val(FunctionContext* ctx, const LargeInt
if (val.is_null) {
return StringVal::null();
}
char buf[64];
int len = 64;
char* d = LargeIntValue::to_string(val.val, buf, &len);
return AnyValUtil::from_buffer_temp(ctx, d, len);
auto string_value = LargeIntValue::to_string(val.val);
return AnyValUtil::from_buffer_temp(ctx, string_value.data(), string_value.size());
}
template <typename T>

View File

@ -18,8 +18,8 @@
#include "olap/reader.h"
#include <boost/algorithm/string/case_conv.hpp>
#include <charconv>
#include <parallel_hashmap/phmap.h>
#include <sstream>
#include <unordered_set>
#include "olap/bloom_filter_predicate.h"
@ -651,37 +651,33 @@ void Reader::_init_conditions_param(const ReaderParams& read_params) {
ColumnPredicate* predicate = nullptr; \
switch (column.type()) { \
case OLAP_FIELD_TYPE_TINYINT: { \
std::stringstream ss(cond); \
int32_t value = 0; \
ss >> value; \
int8_t value = 0; \
std::from_chars(cond.data(), cond.data() + cond.size(), value); \
predicate = new PREDICATE<int8_t>(index, value, opposite); \
break; \
} \
case OLAP_FIELD_TYPE_SMALLINT: { \
std::stringstream ss(cond); \
int16_t value = 0; \
ss >> value; \
std::from_chars(cond.data(), cond.data() + cond.size(), value); \
predicate = new PREDICATE<int16_t>(index, value, opposite); \
break; \
} \
case OLAP_FIELD_TYPE_INT: { \
std::stringstream ss(cond); \
int32_t value = 0; \
ss >> value; \
std::from_chars(cond.data(), cond.data() + cond.size(), value); \
predicate = new PREDICATE<int32_t>(index, value, opposite); \
break; \
} \
case OLAP_FIELD_TYPE_BIGINT: { \
std::stringstream ss(cond); \
int64_t value = 0; \
ss >> value; \
std::from_chars(cond.data(), cond.data() + cond.size(), value); \
predicate = new PREDICATE<int64_t>(index, value, opposite); \
break; \
} \
case OLAP_FIELD_TYPE_LARGEINT: { \
std::stringstream ss(cond); \
int128_t value = 0; \
ss >> value; \
StringParser::ParseResult result; \
value = StringParser::string_to_int<__int128>(cond.data(), cond.size(), &result); \
predicate = new PREDICATE<int128_t>(index, value, opposite); \
break; \
} \
@ -723,9 +719,19 @@ void Reader::_init_conditions_param(const ReaderParams& read_params) {
break; \
} \
case OLAP_FIELD_TYPE_BOOL: { \
std::stringstream ss(cond); \
int32_t ivalue = 0; \
auto result = std::from_chars(cond.data(), cond.data() + cond.size(), ivalue); \
bool value = false; \
ss >> value; \
if (result.ec == std::errc()) { \
if (ivalue == 0) { \
value = false; \
} else { \
value = true; \
} \
} else { \
StringParser::ParseResult parse_result; \
value = StringParser::string_to_bool(cond.data(), cond.size(), &parse_result); \
} \
predicate = new PREDICATE<bool>(index, value, opposite); \
break; \
} \
@ -783,10 +789,9 @@ ColumnPredicate* Reader::_parse_to_predicate(const TCondition& condition, bool o
switch (column.type()) {
case OLAP_FIELD_TYPE_TINYINT: {
phmap::flat_hash_set<int8_t> values;
int8_t value = 0;
for (auto& cond_val : condition.condition_values) {
int32_t value = 0;
std::stringstream ss(cond_val);
ss >> value;
std::from_chars(cond_val.data(), cond_val.data() + cond_val.size(), value);
values.insert(value);
}
if (condition.condition_op == "*=") {
@ -798,10 +803,9 @@ ColumnPredicate* Reader::_parse_to_predicate(const TCondition& condition, bool o
}
case OLAP_FIELD_TYPE_SMALLINT: {
phmap::flat_hash_set<int16_t> values;
int16_t value = 0;
for (auto& cond_val : condition.condition_values) {
int16_t value = 0;
std::stringstream ss(cond_val);
ss >> value;
std::from_chars(cond_val.data(), cond_val.data() + cond_val.size(), value);
values.insert(value);
}
if (condition.condition_op == "*=") {
@ -813,10 +817,9 @@ ColumnPredicate* Reader::_parse_to_predicate(const TCondition& condition, bool o
}
case OLAP_FIELD_TYPE_INT: {
phmap::flat_hash_set<int32_t> values;
int32_t value = 0;
for (auto& cond_val : condition.condition_values) {
int32_t value = 0;
std::stringstream ss(cond_val);
ss >> value;
std::from_chars(cond_val.data(), cond_val.data() + cond_val.size(), value);
values.insert(value);
}
if (condition.condition_op == "*=") {
@ -828,10 +831,9 @@ ColumnPredicate* Reader::_parse_to_predicate(const TCondition& condition, bool o
}
case OLAP_FIELD_TYPE_BIGINT: {
phmap::flat_hash_set<int64_t> values;
int64_t value = 0;
for (auto& cond_val : condition.condition_values) {
int64_t value = 0;
std::stringstream ss(cond_val);
ss >> value;
std::from_chars(cond_val.data(), cond_val.data() + cond_val.size(), value);
values.insert(value);
}
if (condition.condition_op == "*=") {
@ -843,10 +845,10 @@ ColumnPredicate* Reader::_parse_to_predicate(const TCondition& condition, bool o
}
case OLAP_FIELD_TYPE_LARGEINT: {
phmap::flat_hash_set<int128_t> values;
int128_t value = 0;
StringParser::ParseResult result;
for (auto& cond_val : condition.condition_values) {
int128_t value = 0;
std::stringstream ss(cond_val);
ss >> value;
value = StringParser::string_to_int<__int128>(cond_val.c_str(), cond_val.size(), &result);
values.insert(value);
}
if (condition.condition_op == "*=") {

View File

@ -88,7 +88,7 @@ Status FoldConstantMgr::fold_constant_expr(
result = get_result(src, ctx->root()->type().type);
}
expr_result.set_content(result);
expr_result.set_content(std::move(result));
expr_result.mutable_type()->set_type(t_type);
pexpr_result_map.mutable_map()->insert({n.first, expr_result});
@ -154,42 +154,37 @@ string FoldConstantMgr::get_result(void* src, PrimitiveType slot_type){
}
case TYPE_TINYINT: {
int8_t val = *reinterpret_cast<const int8_t*>(src);
string s;
s.push_back(val);
return s;
return fmt::format_int(val).str();
}
case TYPE_SMALLINT: {
int16_t val = *reinterpret_cast<const int16_t*>(src);
return std::to_string(val);
return fmt::format_int(val).str();
}
case TYPE_INT: {
int32_t val = *reinterpret_cast<const int32_t*>(src);
return std::to_string(val);
return fmt::format_int(val).str();
}
case TYPE_BIGINT: {
int64_t val = *reinterpret_cast<const int64_t*>(src);
return std::to_string(val);
return fmt::format_int(val).str();
}
case TYPE_LARGEINT: {
char buf[48];
int len = 48;
char* v = LargeIntValue::to_string(*reinterpret_cast<__int128*>(src), buf, &len);
return std::string(v, len);
return LargeIntValue::to_string(*reinterpret_cast<__int128*>(src));
}
case TYPE_FLOAT: {
float val = *reinterpret_cast<const float*>(src);
return std::to_string(val);
return fmt::format("{:.9g}", val);
}
case TYPE_TIME:
case TYPE_DOUBLE: {
double val = *reinterpret_cast<double*>(src);
return std::to_string(val);
return fmt::format("{:.17g}", val);
}
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_HLL:
case TYPE_OBJECT: {
return (reinterpret_cast<StringValue*>(src))->debug_string();
return (reinterpret_cast<StringValue*>(src))->to_string();
}
case TYPE_DATE:
case TYPE_DATETIME: {
@ -205,7 +200,6 @@ string FoldConstantMgr::get_result(void* src, PrimitiveType slot_type){
DCHECK(false) << "Type not implemented: " << slot_type;
return NULL;
}
return NULL;
}

View File

@ -26,20 +26,8 @@ namespace doris {
std::ostream& operator<<(std::ostream& os, __int128 const& value) {
std::ostream::sentry s(os);
if (s) {
unsigned __int128 tmp = value < 0 ? -value : value;
char buffer[48];
char* d = std::end(buffer);
do {
--d;
*d = "0123456789"[tmp % 10];
tmp /= 10;
} while (tmp != 0);
if (value < 0) {
--d;
*d = '-';
}
int len = std::end(buffer) - d;
if (os.rdbuf()->sputn(d, len) != len) {
std::string value_str = fmt::format("{}", value);
if (os.rdbuf()->sputn(value_str.data(), value_str.size()) != value_str.size()) {
os.setstate(std::ios_base::badbit);
}
}

View File

@ -22,6 +22,7 @@
#include <stdlib.h>
#include <string.h>
#include <fmt/format.h>
#include <iostream>
#include <sstream>
#include <string>
@ -36,28 +37,9 @@ const __int128 MIN_INT128 = ((__int128)0x01 << 127);
class LargeIntValue {
public:
static char* to_string(__int128 value, char* buffer, int* len) {
DCHECK(*len >= 40);
unsigned __int128 tmp = value < 0 ? -value : value;
char* d = buffer + *len;
do {
--d;
*d = "0123456789"[tmp % 10];
tmp /= 10;
} while (tmp != 0);
if (value < 0) {
--d;
*d = '-';
}
*len = (buffer + *len) - d;
return d;
}
static std::string to_string(__int128 value) {
char buf[64] = {0};
int len = 64;
char* str = to_string(value, buf, &len);
return std::string(str, len);
return fmt::format("{}", value);
}
};

View File

@ -90,11 +90,8 @@ int MysqlResultWriter::_add_row_value(int index, const TypeDescriptor& type, voi
break;
case TYPE_LARGEINT: {
char buf[48];
int len = 48;
char* v = LargeIntValue::to_string(reinterpret_cast<const PackedInt128*>(item)->value, buf,
&len);
buf_ret = _row_buffer->push_string(v, len);
auto string_value = LargeIntValue::to_string(reinterpret_cast<const PackedInt128*>(item)->value);
buf_ret = _row_buffer->push_string(string_value.data(), string_value.size());
break;
}

View File

@ -217,7 +217,7 @@ public:
//char* tmp_val = reinterpret_cast<char*>(0x01);
ARROW_RETURN_NOT_OK(builder.Append(""));
} else {
ARROW_RETURN_NOT_OK(builder.Append(string_val->to_string()));
ARROW_RETURN_NOT_OK(builder.Append(string_val->ptr, string_val->len));
}
break;
}
@ -230,12 +230,9 @@ public:
break;
}
case TYPE_LARGEINT: {
char buf[48];
int len = 48;
char* v = LargeIntValue::to_string(
reinterpret_cast<const PackedInt128*>(cell_ptr)->value, buf, &len);
std::string temp(v, len);
ARROW_RETURN_NOT_OK(builder.Append(std::move(temp)));
auto string_temp = LargeIntValue::to_string(
reinterpret_cast<const PackedInt128*>(cell_ptr)->value);
ARROW_RETURN_NOT_OK(builder.Append(string_temp.data(), string_temp.size()));
break;
}
default: {

View File

@ -55,18 +55,11 @@ uint24_t timestamp_from_date(const std::string& date_str) {
}
std::string time_str_from_double(double time) {
std::stringstream time_ss;
if (time < 0) {
time_ss << "-";
time = -time;
return fmt::format("-{:02d}:{:02d}:{:02d}", (int64_t)(time / 60 / 60), ((int64_t)(time / 60)) % 60, ((int64_t)time) % 60);
}
int64_t hour = time / 60 / 60;
int minute = ((int64_t)(time / 60)) % 60;
int second = ((int64_t)time) % 60;
time_ss << std::setw(2) << std::setfill('0') << hour << ":" << std::setw(2) << std::setfill('0')
<< minute << ":" << std::setw(2) << std::setfill('0') << second;
return time_ss.str();
return fmt::format("{:02d}:{:02d}:{:02d}", (int64_t)(time / 60 / 60), ((int64_t)(time / 60)) % 60, ((int64_t)time) % 60);
}
} // namespace doris

View File

@ -19,6 +19,7 @@
#include <gtest/gtest.h>
#include <fmt/format.h>
#include <iostream>
#include <sstream>
#include <string>
@ -89,6 +90,14 @@ TEST_F(LargeIntValueTest, largeint_to_string) {
}
}
TEST_F(LargeIntValueTest, largeint_to_string_benchmark) {
for (int i = 0; i < 10000000; i++) {
__int128 v2 = MAX_INT128;
EXPECT_EQ(LargeIntValue::to_string(v2), "170141183460469231731687303715884105727");
LargeIntValue::to_string(v2);
}
}
} // end namespace doris
int main(int argc, char** argv) {