From 693982fd1a203d0bdeceace602a3691a96ff5bcb Mon Sep 17 00:00:00 2001
From: TengJianPing <18241664+jacktengg@users.noreply.github.com>
Date: Wed, 25 Oct 2023 15:47:51 +0800
Subject: [PATCH] [feature](decimal) support decimal256 (#25386)

---
 be/src/common/consts.h                        |   12 +
 be/src/exec/olap_common.h                     |   21 +-
 .../schema_scanner/schema_columns_scanner.cpp |    4 +-
 be/src/exec/table_connector.cpp               |    3 +-
 be/src/exprs/create_predicate_function.h      |    3 +-
 be/src/exprs/runtime_filter.cpp               |   43 +
 be/src/gutil/endian.h                         |   12 +
 be/src/olap/delete_handler.cpp                |    2 +
 be/src/olap/field.h                           |    4 +
 be/src/olap/in_list_predicate.h               |    2 +-
 be/src/olap/key_coder.cpp                     |    1 +
 be/src/olap/key_coder.h                       |   33 +-
 be/src/olap/olap_common.h                     |    4 +-
 be/src/olap/predicate_creator.h               |    7 +-
 .../rowset/segment_v2/bitmap_index_writer.cpp |    3 +
 .../olap/rowset/segment_v2/bitshuffle_page.h  |    1 +
 .../segment_v2/bloom_filter_index_writer.cpp  |    1 +
 .../olap/rowset/segment_v2/encoding_info.cpp  |    4 +
 .../segment_v2/inverted_index_writer.cpp      |    6 +
 .../olap/rowset/segment_v2/zone_map_index.cpp |    3 +-
 be/src/olap/schema.cpp                        |    3 +
 be/src/olap/tablet_schema.cpp                 |    7 +
 be/src/olap/types.cpp                         |    1 +
 be/src/olap/types.h                           |   34 +
 be/src/olap/utils.h                           |    1 +
 be/src/pipeline/exec/scan_operator.cpp        |    4 +-
 be/src/runtime/decimalv2_value.h              |    7 +
 be/src/runtime/define_primitive_type.h        |    3 +-
 be/src/runtime/fold_constant_executor.cpp     |    3 +-
 be/src/runtime/primitive_type.cpp             |   12 +
 be/src/runtime/primitive_type.h               |    6 +
 be/src/runtime/raw_value.h                    |    2 +
 be/src/runtime/runtime_predicate.cpp          |    4 +
 be/src/runtime/runtime_predicate.h            |    6 +
 be/src/runtime/runtime_state.h                |    4 +
 be/src/runtime/type_limit.h                   |    8 +
 be/src/runtime/types.cpp                      |   12 +-
 be/src/runtime/types.h                        |   38 +-
 be/src/util/binary_cast.hpp                   |    6 +-
 be/src/util/string_parser.hpp                 |   11 +-
 .../aggregate_function_avg.cpp                |   11 +
 .../aggregate_function_avg.h                  |   29 +-
 .../aggregate_function_product.h              |    2 +-
 .../aggregate_function_simple_factory.h       |    8 +-
 .../aggregate_function_sum.cpp                |    2 +
 .../aggregate_function_sum.h                  |   16 +-
 be/src/vec/aggregate_functions/helpers.h      |    3 +-
 be/src/vec/columns/column_array.h             |    7 +-
 be/src/vec/columns/column_decimal.cpp         |   11 +-
 be/src/vec/columns/column_decimal.h           |    2 +-
 be/src/vec/columns/columns_number.h           |    1 +
 be/src/vec/common/arithmetic_overflow.h       |   25 +
 be/src/vec/common/field_visitors.h            |    2 +
 be/src/vec/common/hash_table/hash.h           |   21 +
 be/src/vec/common/int_exp.h                   |   92 +
 be/src/vec/core/accurate_comparison.h         |  409 ++---
 be/src/vec/core/call_on_type_index.h          |    9 +
 be/src/vec/core/decimal_comparison.h          |   35 +-
 be/src/vec/core/decomposed_float.h            |  219 +++
 be/src/vec/core/extended_types.h              |   86 +
 be/src/vec/core/field.cpp                     |    1 +
 be/src/vec/core/field.h                       |   80 +-
 be/src/vec/core/types.h                       |  425 ++++-
 be/src/vec/core/wide_integer.h                |  296 ++++
 be/src/vec/core/wide_integer_impl.h           | 1479 +++++++++++++++++
 be/src/vec/core/wide_integer_to_string.h      |   84 +
 .../vec/data_types/convert_field_to_type.cpp  |    3 +
 be/src/vec/data_types/data_type.cpp           |    2 +
 be/src/vec/data_types/data_type.h             |    4 +-
 be/src/vec/data_types/data_type_decimal.cpp   |   15 +-
 be/src/vec/data_types/data_type_decimal.h     |   67 +-
 be/src/vec/data_types/data_type_factory.cpp   |   10 +
 be/src/vec/data_types/get_least_supertype.cpp |   15 +-
 be/src/vec/data_types/number_traits.h         |    9 +
 .../serde/data_type_decimal_serde.cpp         |    2 +
 .../serde/data_type_decimal_serde.h           |   12 +-
 .../parquet/byte_array_dict_decoder.cpp       |    1 +
 .../parquet/byte_array_plain_decoder.cpp      |    1 +
 .../parquet/fix_length_dict_decoder.hpp       |    2 +
 .../parquet/fix_length_plain_decoder.cpp      |    1 +
 be/src/vec/exec/scan/vscan_node.cpp           |    4 +-
 be/src/vec/exec/vjdbc_connector.cpp           |    3 +-
 be/src/vec/exprs/vectorized_agg_fn.cpp        |    2 +-
 be/src/vec/exprs/vexpr.cpp                    |    5 +
 be/src/vec/exprs/vexpr.h                      |    8 +
 .../functions/array/function_array_apply.cpp  |    2 +
 .../array/function_array_difference.h         |    3 +
 .../functions/array/function_array_distinct.h |    3 +
 .../functions/array/function_array_element.h  |    3 +
 .../array/function_array_enumerate_uniq.cpp   |    2 +
 .../functions/array/function_array_index.h    |    4 +
 .../vec/functions/array/function_array_join.h |    3 +
 .../functions/array/function_array_remove.h   |    3 +
 .../functions/array/function_arrays_overlap.h |    4 +
 be/src/vec/functions/function.h               |   11 +-
 .../functions/function_binary_arithmetic.h    |   91 +-
 be/src/vec/functions/function_cast.h          |   24 +-
 .../vec/functions/function_multi_same_args.h  |    1 -
 be/src/vec/functions/function_string.h        |   23 +-
 .../vec/functions/function_unary_arithmetic.h |    4 +-
 .../vec/functions/function_width_bucket.cpp   |    3 +
 be/src/vec/functions/functions_comparison.h   |    1 -
 be/src/vec/functions/if.cpp                   |    1 -
 be/src/vec/functions/least_greast.cpp         |    6 +-
 be/src/vec/olap/olap_data_convertor.cpp       |    3 +
 be/src/vec/sink/vtablet_block_convertor.cpp   |   12 +-
 be/src/vec/sink/vtablet_block_convertor.h     |    2 +
 .../vec/sink/writer/vmysql_table_writer.cpp   |    3 +-
 be/test/vec/data_types/decimal_test.cpp       |  212 +++
 .../apache/doris/catalog/PrimitiveType.java   |   50 +-
 .../org/apache/doris/catalog/ScalarType.java  |   85 +-
 .../java/org/apache/doris/catalog/Type.java   |   65 +-
 .../org/apache/doris/analysis/CastExpr.java   |    1 +
 .../org/apache/doris/analysis/ColumnDef.java  |    1 +
 .../java/org/apache/doris/analysis/Expr.java  |    2 +
 .../apache/doris/analysis/LiteralExpr.java    |    1 +
 .../apache/doris/analysis/StringLiteral.java  |    1 +
 .../org/apache/doris/analysis/TypeDef.java    |   30 +
 .../apache/doris/catalog/AliasFunction.java   |    1 +
 .../java/org/apache/doris/catalog/Column.java |    1 +
 .../org/apache/doris/common/util/Util.java    |    1 +
 .../apache/doris/mysql/MysqlSerializer.java   |    4 +-
 .../exceptions/NotSupportedException.java     |   28 +
 .../rules/FoldConstantRuleOnBE.java           |    5 +-
 .../rules/SimplifyDecimalV3Comparison.java    |    2 +-
 .../nereids/trees/expressions/Divide.java     |    4 +-
 .../nereids/trees/expressions/Multiply.java   |   10 +-
 .../functions/ComputePrecisionForSum.java     |    6 +-
 .../trees/expressions/functions/agg/Avg.java  |   16 +-
 .../executable/NumericArithmetic.java         |    4 +-
 .../expressions/literal/DecimalV3Literal.java |    2 +-
 .../apache/doris/nereids/types/DataType.java  |    2 +-
 .../doris/nereids/types/DecimalV3Type.java    |   59 +-
 .../org/apache/doris/qe/ConnectProcessor.java |   10 +
 .../org/apache/doris/qe/SessionVariable.java  |   12 +
 .../apache/doris/qe/cache/PartitionRange.java |    1 +
 .../doris/rewrite/FoldConstantsRule.java      |    3 +-
 gensrc/proto/internal_service.proto           |    1 +
 gensrc/proto/types.proto                      |    1 +
 gensrc/thrift/PaloInternalService.thrift      |    2 +
 gensrc/thrift/Types.thrift                    |    3 +-
 .../decimalv3/test_arithmetic_expressions.out |  109 +-
 .../datatype_p0/decimalv3/test_decimalv3.out  |   66 +
 .../datatype_p0/decimalv3/test_predicate.out  |   77 +
 .../aggregate/aggregate_decimal256.out        |   97 ++
 .../query_p0/join/test_join_decimal256.out    |   41 +
 .../join/test_runtime_filter_decimal256.out   |  201 +++
 .../test_arithmetic_expressions.groovy        |  274 ++-
 .../decimalv3/test_decimalv3.groovy           |  110 +-
 .../decimalv3/test_predicate.groovy           |   52 +
 .../aggregate/aggregate_decimal256.groovy     |  154 ++
 .../query_p0/join/test_join_decimal256.groovy |   97 ++
 .../test_runtime_filter_decimal256.groovy     |  302 ++++
 153 files changed, 5647 insertions(+), 633 deletions(-)
 create mode 100644 be/src/vec/core/decomposed_float.h
 create mode 100644 be/src/vec/core/extended_types.h
 create mode 100644 be/src/vec/core/wide_integer.h
 create mode 100644 be/src/vec/core/wide_integer_impl.h
 create mode 100644 be/src/vec/core/wide_integer_to_string.h
 create mode 100644 be/test/vec/data_types/decimal_test.cpp
 create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/exceptions/NotSupportedException.java
 create mode 100644 regression-test/data/query_p0/aggregate/aggregate_decimal256.out
 create mode 100644 regression-test/data/query_p0/join/test_join_decimal256.out
 create mode 100644 regression-test/data/query_p0/join/test_runtime_filter_decimal256.out
 create mode 100644 regression-test/suites/query_p0/aggregate/aggregate_decimal256.groovy
 create mode 100644 regression-test/suites/query_p0/join/test_join_decimal256.groovy
 create mode 100644 regression-test/suites/query_p0/join/test_runtime_filter_decimal256.groovy

diff --git a/be/src/common/consts.h b/be/src/common/consts.h
index 72942f75b2..7548f9a202 100644
--- a/be/src/common/consts.h
+++ b/be/src/common/consts.h
@@ -30,8 +30,20 @@ const std::string ROWID_COL = "__DORIS_ROWID_COL__";
 const std::string ROW_STORE_COL = "__DORIS_ROW_STORE_COL__";
 const std::string DYNAMIC_COLUMN_NAME = "__DORIS_DYNAMIC_COL__";
 
+/// The maximum precision representable by a 4-byte decimal (Decimal4Value)
 constexpr int MAX_DECIMAL32_PRECISION = 9;
+/// The maximum precision representable by a 8-byte decimal (Decimal8Value)
 constexpr int MAX_DECIMAL64_PRECISION = 18;
+/// The maximum precision representable by a 16-byte decimal
 constexpr int MAX_DECIMAL128_PRECISION = 38;
+/// The maximum precision representable by a 32-byte decimal
+constexpr int MAX_DECIMAL256_PRECISION = 76;
+
+/// Must be kept in sync with FE's max precision/scale.
+static constexpr int MAX_DECIMALV2_PRECISION = MAX_DECIMAL128_PRECISION;
+static constexpr int MAX_DECIMALV2_SCALE = MAX_DECIMALV2_PRECISION;
+
+static constexpr int MAX_DECIMALV3_PRECISION = MAX_DECIMAL256_PRECISION;
+static constexpr int MAX_DECIMALV3_SCALE = MAX_DECIMALV3_PRECISION;
 } // namespace BeConsts
 } // namespace doris
diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h
index b9f53c3f7d..47005ef042 100644
--- a/be/src/exec/olap_common.h
+++ b/be/src/exec/olap_common.h
@@ -54,6 +54,8 @@ std::string cast_to_string(T value, int scale) {
         return ((vectorized::Decimal<int64_t>)value).to_string(scale);
     } else if constexpr (primitive_type == TYPE_DECIMAL128I) {
         return ((vectorized::Decimal<int128_t>)value).to_string(scale);
+    } else if constexpr (primitive_type == TYPE_DECIMAL256) {
+        return ((vectorized::Decimal<Int256>)value).to_string(scale);
     } else if constexpr (primitive_type == TYPE_TINYINT) {
         return std::to_string(static_cast<int>(value));
     } else if constexpr (primitive_type == TYPE_LARGEINT) {
@@ -501,16 +503,15 @@ private:
     bool _is_convertible;
 };
 
-using ColumnValueRangeType =
-        std::variant<ColumnValueRange<TYPE_TINYINT>, ColumnValueRange<TYPE_SMALLINT>,
-                     ColumnValueRange<TYPE_INT>, ColumnValueRange<TYPE_BIGINT>,
-                     ColumnValueRange<TYPE_LARGEINT>, ColumnValueRange<TYPE_CHAR>,
-                     ColumnValueRange<TYPE_VARCHAR>, ColumnValueRange<TYPE_STRING>,
-                     ColumnValueRange<TYPE_DATE>, ColumnValueRange<TYPE_DATEV2>,
-                     ColumnValueRange<TYPE_DATETIME>, ColumnValueRange<TYPE_DATETIMEV2>,
-                     ColumnValueRange<TYPE_DECIMALV2>, ColumnValueRange<TYPE_BOOLEAN>,
-                     ColumnValueRange<TYPE_HLL>, ColumnValueRange<TYPE_DECIMAL32>,
-                     ColumnValueRange<TYPE_DECIMAL64>, ColumnValueRange<TYPE_DECIMAL128I>>;
+using ColumnValueRangeType = std::variant<
+        ColumnValueRange<TYPE_TINYINT>, ColumnValueRange<TYPE_SMALLINT>, ColumnValueRange<TYPE_INT>,
+        ColumnValueRange<TYPE_BIGINT>, ColumnValueRange<TYPE_LARGEINT>, ColumnValueRange<TYPE_CHAR>,
+        ColumnValueRange<TYPE_VARCHAR>, ColumnValueRange<TYPE_STRING>, ColumnValueRange<TYPE_DATE>,
+        ColumnValueRange<TYPE_DATEV2>, ColumnValueRange<TYPE_DATETIME>,
+        ColumnValueRange<TYPE_DATETIMEV2>, ColumnValueRange<TYPE_DECIMALV2>,
+        ColumnValueRange<TYPE_BOOLEAN>, ColumnValueRange<TYPE_HLL>,
+        ColumnValueRange<TYPE_DECIMAL32>, ColumnValueRange<TYPE_DECIMAL64>,
+        ColumnValueRange<TYPE_DECIMAL128I>, ColumnValueRange<TYPE_DECIMAL256>>;
 
 template <PrimitiveType primitive_type>
 const typename ColumnValueRange<primitive_type>::CppType
diff --git a/be/src/exec/schema_scanner/schema_columns_scanner.cpp b/be/src/exec/schema_scanner/schema_columns_scanner.cpp
index 9eec707f74..7848a7b528 100644
--- a/be/src/exec/schema_scanner/schema_columns_scanner.cpp
+++ b/be/src/exec/schema_scanner/schema_columns_scanner.cpp
@@ -138,6 +138,7 @@ std::string SchemaColumnsScanner::_to_mysql_data_type_string(TColumnDesc& desc)
     case TPrimitiveType::DECIMAL32:
     case TPrimitiveType::DECIMAL64:
     case TPrimitiveType::DECIMAL128I:
+    case TPrimitiveType::DECIMAL256:
     case TPrimitiveType::DECIMALV2: {
         return "decimal";
     }
@@ -208,7 +209,8 @@ std::string SchemaColumnsScanner::_type_to_string(TColumnDesc& desc) {
     }
     case TPrimitiveType::DECIMAL32:
     case TPrimitiveType::DECIMAL64:
-    case TPrimitiveType::DECIMAL128I: {
+    case TPrimitiveType::DECIMAL128I:
+    case TPrimitiveType::DECIMAL256: {
         fmt::memory_buffer debug_string_buffer;
         fmt::format_to(
                 debug_string_buffer, "decimalv3({}, {})",
diff --git a/be/src/exec/table_connector.cpp b/be/src/exec/table_connector.cpp
index ba0d37d5dd..fa5181f5fe 100644
--- a/be/src/exec/table_connector.cpp
+++ b/be/src/exec/table_connector.cpp
@@ -245,7 +245,8 @@ Status TableConnector::convert_column_data(const vectorized::ColumnPtr& column_p
     }
     case TYPE_DECIMAL32:
     case TYPE_DECIMAL64:
-    case TYPE_DECIMAL128I: {
+    case TYPE_DECIMAL128I:
+    case TYPE_DECIMAL256: {
         auto decimal_type = remove_nullable(type_ptr);
         auto val = decimal_type->to_string(*column, row);
         fmt::format_to(_insert_stmt_buffer, "{}", val);
diff --git a/be/src/exprs/create_predicate_function.h b/be/src/exprs/create_predicate_function.h
index 011270cd09..6cc463bf65 100644
--- a/be/src/exprs/create_predicate_function.h
+++ b/be/src/exprs/create_predicate_function.h
@@ -103,7 +103,8 @@ public:
     M(TYPE_STRING)            \
     M(TYPE_DECIMAL32)         \
     M(TYPE_DECIMAL64)         \
-    M(TYPE_DECIMAL128I)
+    M(TYPE_DECIMAL128I)       \
+    M(TYPE_DECIMAL256)
 
 template <class Traits, size_t N = 0>
 typename Traits::BasePtr create_predicate_function(PrimitiveType type) {
diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp
index bb510f30e2..c6e64fd0e5 100644
--- a/be/src/exprs/runtime_filter.cpp
+++ b/be/src/exprs/runtime_filter.cpp
@@ -52,6 +52,8 @@
 #include "vec/columns/column.h"
 #include "vec/columns/column_complex.h"
 #include "vec/common/assert_cast.h"
+#include "vec/core/wide_integer.h"
+#include "vec/core/wide_integer_to_string.h"
 #include "vec/exprs/vbitmap_predicate.h"
 #include "vec/exprs/vbloom_predicate.h"
 #include "vec/exprs/vdirect_in_predicate.h"
@@ -99,6 +101,8 @@ PColumnType to_proto(PrimitiveType type) {
         return PColumnType::COLUMN_TYPE_DECIMAL64;
     case TYPE_DECIMAL128I:
         return PColumnType::COLUMN_TYPE_DECIMAL128I;
+    case TYPE_DECIMAL256:
+        return PColumnType::COLUMN_TYPE_DECIMAL256;
     case TYPE_CHAR:
         return PColumnType::COLUMN_TYPE_CHAR;
     case TYPE_VARCHAR:
@@ -148,6 +152,8 @@ PrimitiveType to_primitive_type(PColumnType type) {
         return TYPE_DECIMAL64;
     case PColumnType::COLUMN_TYPE_DECIMAL128I:
         return TYPE_DECIMAL128I;
+    case PColumnType::COLUMN_TYPE_DECIMAL256:
+        return TYPE_DECIMAL256;
     case PColumnType::COLUMN_TYPE_VARCHAR:
         return TYPE_VARCHAR;
     case PColumnType::COLUMN_TYPE_CHAR:
@@ -790,6 +796,18 @@ public:
             });
             break;
         }
+        case TYPE_DECIMAL256: {
+            batch_assign(in_filter, [](std::shared_ptr<HybridSetBase>& set, PColumnValue& column,
+                                       ObjectPool* pool) {
+                auto string_val = column.stringval();
+                StringParser::ParseResult result;
+                auto int_val = StringParser::string_to_int<wide::Int256>(
+                        string_val.c_str(), string_val.length(), &result);
+                DCHECK(result == StringParser::PARSE_SUCCESS);
+                set->insert(&int_val);
+            });
+            break;
+        }
         case TYPE_VARCHAR:
         case TYPE_CHAR:
         case TYPE_STRING: {
@@ -923,6 +941,18 @@ public:
             DCHECK(result == StringParser::PARSE_SUCCESS);
             return _context.minmax_func->assign(&min_val, &max_val);
         }
+        case TYPE_DECIMAL256: {
+            auto min_string_val = minmax_filter->min_val().stringval();
+            auto max_string_val = minmax_filter->max_val().stringval();
+            StringParser::ParseResult result;
+            auto min_val = StringParser::string_to_int<wide::Int256>(
+                    min_string_val.c_str(), min_string_val.length(), &result);
+            DCHECK(result == StringParser::PARSE_SUCCESS);
+            auto max_val = StringParser::string_to_int<wide::Int256>(
+                    max_string_val.c_str(), max_string_val.length(), &result);
+            DCHECK(result == StringParser::PARSE_SUCCESS);
+            return _context.minmax_func->assign(&min_val, &max_val);
+        }
         case TYPE_VARCHAR:
         case TYPE_CHAR:
         case TYPE_STRING: {
@@ -1578,6 +1608,12 @@ void IRuntimeFilter::to_protobuf(PInFilter* filter) {
         });
         return;
     }
+    case TYPE_DECIMAL256: {
+        batch_copy<wide::Int256>(filter, it, [](PColumnValue* column, const wide::Int256* value) {
+            column->set_stringval(wide::to_string(*value));
+        });
+        return;
+    }
     case TYPE_CHAR:
     case TYPE_VARCHAR:
     case TYPE_STRING: {
@@ -1686,6 +1722,13 @@ void IRuntimeFilter::to_protobuf(PMinMaxFilter* filter) {
                 LargeIntValue::to_string(*reinterpret_cast<const int128_t*>(max_data)));
         return;
     }
+    case TYPE_DECIMAL256: {
+        filter->mutable_min_val()->set_stringval(
+                wide::to_string(*reinterpret_cast<const wide::Int256*>(min_data)));
+        filter->mutable_max_val()->set_stringval(
+                wide::to_string(*reinterpret_cast<const wide::Int256*>(max_data)));
+        return;
+    }
     case TYPE_CHAR:
     case TYPE_VARCHAR:
     case TYPE_STRING: {
diff --git a/be/src/gutil/endian.h b/be/src/gutil/endian.h
index 4434bb943b..66d849f73c 100644
--- a/be/src/gutil/endian.h
+++ b/be/src/gutil/endian.h
@@ -35,6 +35,7 @@
 #include "gutil/int128.h"
 #include "gutil/integral_types.h"
 #include "gutil/port.h"
+#include "vec/core/wide_integer.h"
 
 inline uint64 gbswap_64(uint64 host_int) {
 #if defined(__GNUC__) && defined(__x86_64__) && !defined(__APPLE__)
@@ -59,6 +60,11 @@ inline unsigned __int128 gbswap_128(unsigned __int128 host_int) {
            (static_cast<unsigned __int128>(bswap_64(static_cast<uint64>(host_int))) << 64);
 }
 
+inline wide::UInt256 gbswap_256(wide::UInt256 host_int) {
+    wide::UInt256 result{gbswap_64(host_int.items[0]), gbswap_64(host_int.items[1]), gbswap_64(host_int.items[2]), gbswap_64(host_int.items[3])};
+    return result;
+}
+
 // Swap bytes of a 24-bit value.
 inline uint32_t bswap_24(uint32_t x) {
     return ((x & 0x0000ffULL) << 16) | ((x & 0x00ff00ULL)) | ((x & 0xff0000ULL) >> 16);
@@ -252,6 +258,9 @@ public:
     static unsigned __int128 FromHost128(unsigned __int128 x) { return gbswap_128(x); }
     static unsigned __int128 ToHost128(unsigned __int128 x) { return gbswap_128(x); }
 
+    static wide::UInt256 FromHost256(wide::UInt256 x) { return gbswap_256(x); }
+    static wide::UInt256 ToHost256(wide::UInt256 x) { return gbswap_256(x); }
+
     static bool IsLittleEndian() { return true; }
 
 #elif defined IS_BIG_ENDIAN
@@ -271,6 +280,9 @@ public:
     static uint128 FromHost128(uint128 x) { return x; }
     static uint128 ToHost128(uint128 x) { return x; }
 
+    static wide::UInt256 FromHost256(wide::UInt256 x) { return x; }
+    static wide::UInt256 ToHost256(wide::UInt256 x) { return x; }
+
     static bool IsLittleEndian() { return false; }
 
 #endif /* ENDIAN */
diff --git a/be/src/olap/delete_handler.cpp b/be/src/olap/delete_handler.cpp
index f05a6c86e2..d9a0b7b43c 100644
--- a/be/src/olap/delete_handler.cpp
+++ b/be/src/olap/delete_handler.cpp
@@ -199,6 +199,8 @@ bool DeleteHandler::is_condition_value_valid(const TabletColumn& column,
         return valid_decimal(value_str, column.precision(), column.frac());
     case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
         return valid_decimal(value_str, column.precision(), column.frac());
+    case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
+        return valid_decimal(value_str, column.precision(), column.frac());
     case FieldType::OLAP_FIELD_TYPE_CHAR:
     case FieldType::OLAP_FIELD_TYPE_VARCHAR:
         return value_str.size() <= column.length();
diff --git a/be/src/olap/field.h b/be/src/olap/field.h
index 3a01876b58..412ee2a76f 100644
--- a/be/src/olap/field.h
+++ b/be/src/olap/field.h
@@ -521,6 +521,8 @@ public:
                 [[fallthrough]];
             case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
                 [[fallthrough]];
+            case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
+                [[fallthrough]];
             case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: {
                 Field* field = new Field(column);
                 field->set_precision(column.precision());
@@ -579,6 +581,8 @@ public:
                 [[fallthrough]];
             case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
                 [[fallthrough]];
+            case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
+                [[fallthrough]];
             case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: {
                 Field* field = new Field(column);
                 field->set_precision(column.precision());
diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index 4025016768..73791bbf9e 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -97,7 +97,7 @@ public:
             if constexpr (Type == TYPE_STRING || Type == TYPE_CHAR) {
                 tmp = convert(*col, condition, arena);
             } else if constexpr (Type == TYPE_DECIMAL32 || Type == TYPE_DECIMAL64 ||
-                                 Type == TYPE_DECIMAL128I) {
+                                 Type == TYPE_DECIMAL128I || Type == TYPE_DECIMAL256) {
                 tmp = convert(*col, condition);
             } else {
                 tmp = convert(condition);
diff --git a/be/src/olap/key_coder.cpp b/be/src/olap/key_coder.cpp
index 803b353375..168117117d 100644
--- a/be/src/olap/key_coder.cpp
+++ b/be/src/olap/key_coder.cpp
@@ -80,6 +80,7 @@ private:
         add_mapping<FieldType::OLAP_FIELD_TYPE_DECIMAL32>();
         add_mapping<FieldType::OLAP_FIELD_TYPE_DECIMAL64>();
         add_mapping<FieldType::OLAP_FIELD_TYPE_DECIMAL128I>();
+        add_mapping<FieldType::OLAP_FIELD_TYPE_DECIMAL256>();
     }
 
     template <FieldType field_type>
diff --git a/be/src/olap/key_coder.h b/be/src/olap/key_coder.h
index 30d33cd3fa..6885a0d96f 100644
--- a/be/src/olap/key_coder.h
+++ b/be/src/olap/key_coder.h
@@ -85,6 +85,7 @@ class KeyCoderTraits<
         field_type,
         typename std::enable_if<
                 std::is_integral<typename CppTypeTraits<field_type>::CppType>::value ||
+                field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL256 ||
                 vectorized::IsDecimalNumber<typename CppTypeTraits<field_type>::CppType>>::type> {
 public:
     using CppType = typename CppTypeTraits<field_type>::CppType;
@@ -93,20 +94,24 @@ public:
 private:
     // Swap value's endian from/to big endian
     static UnsignedCppType swap_big_endian(UnsignedCppType val) {
-        switch (sizeof(UnsignedCppType)) {
-        case 1:
-            return val;
-        case 2:
-            return BigEndian::FromHost16(val);
-        case 4:
-            return BigEndian::FromHost32(val);
-        case 8:
-            return BigEndian::FromHost64(val);
-        case 16:
-            return BigEndian::FromHost128(val);
-        default:
-            LOG(FATAL) << "Invalid type to big endian, type=" << int(field_type)
-                       << ", size=" << sizeof(UnsignedCppType);
+        if constexpr (field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL256) {
+            return BigEndian::FromHost256(val);
+        } else {
+            switch (sizeof(UnsignedCppType)) {
+            case 1:
+                return val;
+            case 2:
+                return BigEndian::FromHost16(val);
+            case 4:
+                return BigEndian::FromHost32(val);
+            case 8:
+                return BigEndian::FromHost64(val);
+            case 16:
+                return BigEndian::FromHost128(val);
+            default:
+                LOG(FATAL) << "Invalid type to big endian, type=" << int(field_type)
+                           << ", size=" << sizeof(UnsignedCppType);
+            }
         }
     }
 
diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h
index 130d65e7ef..3811aab378 100644
--- a/be/src/olap/olap_common.h
+++ b/be/src/olap/olap_common.h
@@ -143,7 +143,8 @@ enum class FieldType {
     OLAP_FIELD_TYPE_DECIMAL128I = 33,
     OLAP_FIELD_TYPE_JSONB = 34,
     OLAP_FIELD_TYPE_VARIANT = 35,
-    OLAP_FIELD_TYPE_AGG_STATE = 36
+    OLAP_FIELD_TYPE_AGG_STATE = 36,
+    OLAP_FIELD_TYPE_DECIMAL256 = 37,
 };
 
 // Define all aggregation methods supported by Field
@@ -197,6 +198,7 @@ constexpr bool field_is_numeric_type(const FieldType& field_type) {
            field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL32 ||
            field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL64 ||
            field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL128I ||
+           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL256 ||
            field_type == FieldType::OLAP_FIELD_TYPE_BOOL;
 }
 
diff --git a/be/src/olap/predicate_creator.h b/be/src/olap/predicate_creator.h
index 6298f6f231..dd9fded40e 100644
--- a/be/src/olap/predicate_creator.h
+++ b/be/src/olap/predicate_creator.h
@@ -96,8 +96,8 @@ private:
     static CppType convert(const TabletColumn& column, const std::string& condition) {
         StringParser::ParseResult result = StringParser::ParseResult::PARSE_SUCCESS;
         // return CppType value cast from int128_t
-        return StringParser::string_to_decimal<Type>(condition.data(), condition.size(),
-                                                     column.precision(), column.frac(), &result);
+        return CppType(StringParser::string_to_decimal<Type>(
+                condition.data(), condition.size(), column.precision(), column.frac(), &result));
     }
 };
 
@@ -195,6 +195,9 @@ std::unique_ptr<PredicateCreator<ConditionType>> get_creator(const FieldType& ty
     case FieldType::OLAP_FIELD_TYPE_DECIMAL128I: {
         return std::make_unique<DecimalPredicateCreator<TYPE_DECIMAL128I, PT, ConditionType>>();
     }
+    case FieldType::OLAP_FIELD_TYPE_DECIMAL256: {
+        return std::make_unique<DecimalPredicateCreator<TYPE_DECIMAL256, PT, ConditionType>>();
+    }
     case FieldType::OLAP_FIELD_TYPE_CHAR: {
         return std::make_unique<StringPredicateCreator<TYPE_CHAR, PT, ConditionType>>();
     }
diff --git a/be/src/olap/rowset/segment_v2/bitmap_index_writer.cpp b/be/src/olap/rowset/segment_v2/bitmap_index_writer.cpp
index 8523740920..227e914002 100644
--- a/be/src/olap/rowset/segment_v2/bitmap_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/bitmap_index_writer.cpp
@@ -247,6 +247,9 @@ Status BitmapIndexWriter::create(const TypeInfo* type_info,
     case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
         res->reset(new BitmapIndexWriterImpl<FieldType::OLAP_FIELD_TYPE_DECIMAL128I>(type_info));
         break;
+    case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
+        res->reset(new BitmapIndexWriterImpl<FieldType::OLAP_FIELD_TYPE_DECIMAL256>(type_info));
+        break;
     case FieldType::OLAP_FIELD_TYPE_BOOL:
         res->reset(new BitmapIndexWriterImpl<FieldType::OLAP_FIELD_TYPE_BOOL>(type_info));
         break;
diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.h b/be/src/olap/rowset/segment_v2/bitshuffle_page.h
index 05d07acf88..54f446070f 100644
--- a/be/src/olap/rowset/segment_v2/bitshuffle_page.h
+++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.h
@@ -267,6 +267,7 @@ inline Status parse_bit_shuffle_header(const Slice& data, size_t& num_elements,
     case 8:
     case 12:
     case 16:
+    case 32:
         break;
     default:
         return Status::InternalError("invalid size_of_elem:{}", size_of_element);
diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
index 3afde1340c..e7e3e5e7f6 100644
--- a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
@@ -315,6 +315,7 @@ Status BloomFilterIndexWriter::create(const BloomFilterOptions& bf_options,
         M(FieldType::OLAP_FIELD_TYPE_DECIMAL32)
         M(FieldType::OLAP_FIELD_TYPE_DECIMAL64)
         M(FieldType::OLAP_FIELD_TYPE_DECIMAL128I)
+        M(FieldType::OLAP_FIELD_TYPE_DECIMAL256)
 #undef M
     default:
         return Status::NotSupported("unsupported type for bitmap index: {}",
diff --git a/be/src/olap/rowset/segment_v2/encoding_info.cpp b/be/src/olap/rowset/segment_v2/encoding_info.cpp
index 573ea92532..462b5bdf51 100644
--- a/be/src/olap/rowset/segment_v2/encoding_info.cpp
+++ b/be/src/olap/rowset/segment_v2/encoding_info.cpp
@@ -321,6 +321,10 @@ EncodingInfoResolver::EncodingInfoResolver() {
     _add_map<FieldType::OLAP_FIELD_TYPE_DECIMAL128I, PLAIN_ENCODING>();
     _add_map<FieldType::OLAP_FIELD_TYPE_DECIMAL128I, BIT_SHUFFLE, true>();
 
+    _add_map<FieldType::OLAP_FIELD_TYPE_DECIMAL256, BIT_SHUFFLE>();
+    _add_map<FieldType::OLAP_FIELD_TYPE_DECIMAL256, PLAIN_ENCODING>();
+    _add_map<FieldType::OLAP_FIELD_TYPE_DECIMAL256, BIT_SHUFFLE, true>();
+
     _add_map<FieldType::OLAP_FIELD_TYPE_HLL, PLAIN_ENCODING>();
 
     _add_map<FieldType::OLAP_FIELD_TYPE_OBJECT, PLAIN_ENCODING>();
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
index a9f7daf4b4..b6682e3ae2 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
@@ -624,6 +624,12 @@ Status InvertedIndexColumnWriter::create(const Field* field,
                 field_name, segment_file_name, dir, fs, index_meta);
         break;
     }
+    case FieldType::OLAP_FIELD_TYPE_DECIMAL256: {
+        *res = std::make_unique<
+                InvertedIndexColumnWriterImpl<FieldType::OLAP_FIELD_TYPE_DECIMAL256>>(
+                field_name, segment_file_name, dir, fs, index_meta);
+        break;
+    }
     case FieldType::OLAP_FIELD_TYPE_BOOL: {
         *res = std::make_unique<InvertedIndexColumnWriterImpl<FieldType::OLAP_FIELD_TYPE_BOOL>>(
                 field_name, segment_file_name, dir, fs, index_meta);
diff --git a/be/src/olap/rowset/segment_v2/zone_map_index.cpp b/be/src/olap/rowset/segment_v2/zone_map_index.cpp
index 75f0a9d845..40f755654e 100644
--- a/be/src/olap/rowset/segment_v2/zone_map_index.cpp
+++ b/be/src/olap/rowset/segment_v2/zone_map_index.cpp
@@ -200,7 +200,8 @@ Status ZoneMapIndexReader::_load(bool use_page_cache, bool kept_in_memory,
     M(TYPE_STRING)               \
     M(TYPE_DECIMAL32)            \
     M(TYPE_DECIMAL64)            \
-    M(TYPE_DECIMAL128I)
+    M(TYPE_DECIMAL128I)          \
+    M(TYPE_DECIMAL256)
 
 Status ZoneMapIndexWriter::create(Field* field, std::unique_ptr<ZoneMapIndexWriter>& res) {
     switch (field->type()) {
diff --git a/be/src/olap/schema.cpp b/be/src/olap/schema.cpp
index a3297b4c2f..e55b1dcf2a 100644
--- a/be/src/olap/schema.cpp
+++ b/be/src/olap/schema.cpp
@@ -199,6 +199,9 @@ vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(const Field& fi
     case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
         ptr = doris::vectorized::PredicateColumnType<TYPE_DECIMAL128I>::create();
         break;
+    case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
+        ptr = doris::vectorized::PredicateColumnType<TYPE_DECIMAL256>::create();
+        break;
     default:
         LOG(FATAL) << "Unexpected type when choosing predicate column, type=" << int(field.type());
     }
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index 103ae7c8fd..6f3e5260f7 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -92,6 +92,8 @@ FieldType TabletColumn::get_field_type_by_string(const std::string& type_str) {
         type = FieldType::OLAP_FIELD_TYPE_DECIMAL64;
     } else if (0 == upper_type_str.compare("DECIMAL128I")) {
         type = FieldType::OLAP_FIELD_TYPE_DECIMAL128I;
+    } else if (0 == upper_type_str.compare("DECIMAL256")) {
+        type = FieldType::OLAP_FIELD_TYPE_DECIMAL256;
     } else if (0 == upper_type_str.compare(0, 7, "DECIMAL")) {
         type = FieldType::OLAP_FIELD_TYPE_DECIMAL;
     } else if (0 == upper_type_str.compare(0, 7, "VARCHAR")) {
@@ -226,6 +228,9 @@ std::string TabletColumn::get_string_by_field_type(FieldType type) {
     case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
         return "DECIMAL128I";
 
+    case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
+        return "DECIMAL256";
+
     case FieldType::OLAP_FIELD_TYPE_VARCHAR:
         return "VARCHAR";
 
@@ -351,6 +356,8 @@ uint32_t TabletColumn::get_field_length_by_type(TPrimitiveType::type type, uint3
         return 8;
     case TPrimitiveType::DECIMAL128I:
         return 16;
+    case TPrimitiveType::DECIMAL256:
+        return 32;
     case TPrimitiveType::DECIMALV2:
         return 12; // use 12 bytes in olap engine.
     default:
diff --git a/be/src/olap/types.cpp b/be/src/olap/types.cpp
index 2c92bd3f2c..b095d830e7 100644
--- a/be/src/olap/types.cpp
+++ b/be/src/olap/types.cpp
@@ -98,6 +98,7 @@ const TypeInfo* get_scalar_type_info(FieldType field_type) {
             get_scalar_type_info<FieldType::OLAP_FIELD_TYPE_JSONB>(),
             get_scalar_type_info<FieldType::OLAP_FIELD_TYPE_VARIANT>(),
             get_scalar_type_info<FieldType::OLAP_FIELD_TYPE_AGG_STATE>(),
+            get_scalar_type_info<FieldType::OLAP_FIELD_TYPE_DECIMAL256>(),
             nullptr};
     return field_type_array[int(field_type)];
 }
diff --git a/be/src/olap/types.h b/be/src/olap/types.h
index fba48f331d..542860362b 100644
--- a/be/src/olap/types.h
+++ b/be/src/olap/types.h
@@ -37,6 +37,7 @@
 #include <vector>
 
 #include "common/config.h"
+#include "common/consts.h"
 #include "common/status.h"
 #include "gutil/stringprintf.h"
 #include "gutil/strings/numbers.h"
@@ -53,6 +54,7 @@
 #include "util/string_parser.hpp"
 #include "util/types.h"
 #include "vec/common/arena.h"
+#include "vec/core/wide_integer.h"
 #include "vec/runtime/vdatetime_value.h"
 
 namespace doris {
@@ -690,6 +692,11 @@ struct CppTypeTraits<FieldType::OLAP_FIELD_TYPE_DECIMAL128I> {
     using UnsignedCppType = uint128_t;
 };
 template <>
+struct CppTypeTraits<FieldType::OLAP_FIELD_TYPE_DECIMAL256> {
+    using CppType = Int256;
+    using UnsignedCppType = wide::UInt256;
+};
+template <>
 struct CppTypeTraits<FieldType::OLAP_FIELD_TYPE_DATE> {
     using CppType = uint24_t;
     using UnsignedCppType = uint24_t;
@@ -1083,6 +1090,33 @@ struct FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_DECIMAL128I>
     }
 };
 
+template <>
+struct FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_DECIMAL256>
+        : public BaseFieldtypeTraits<FieldType::OLAP_FIELD_TYPE_DECIMAL256> {
+    static Status from_string(void* buf, const std::string& scan_key, const int precision,
+                              const int scale) {
+        StringParser::ParseResult result = StringParser::PARSE_SUCCESS;
+        auto value = StringParser::string_to_decimal<TYPE_DECIMAL256>(
+                scan_key.c_str(), scan_key.size(), BeConsts::MAX_DECIMAL256_PRECISION, scale,
+                &result);
+        if (result == StringParser::PARSE_FAILURE) {
+            return Status::Error<ErrorCode::INVALID_ARGUMENT>(
+                    "FieldTypeTraits<OLAP_FIELD_TYPE_DECIMAL256>::from_string meet PARSE_FAILURE");
+        }
+        *reinterpret_cast<Int256*>(buf) = value;
+        return Status::OK();
+    }
+    static std::string to_string(const void* src) {
+        // TODO: support decimal256
+        DCHECK(false);
+        return "";
+        // auto value = reinterpret_cast<const wide::Int256*>(src);
+        // fmt::memory_buffer buffer;
+        // fmt::format_to(buffer, "{}", *value);
+        // return std::string(buffer.data(), buffer.size());
+    }
+};
+
 template <>
 struct FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_DATE>
         : public BaseFieldtypeTraits<FieldType::OLAP_FIELD_TYPE_DATE> {
diff --git a/be/src/olap/utils.h b/be/src/olap/utils.h
index 75df35e32a..f8e2e1fbe9 100644
--- a/be/src/olap/utils.h
+++ b/be/src/olap/utils.h
@@ -257,6 +257,7 @@ constexpr bool is_numeric_type(const FieldType& field_type) {
            field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL32 ||
            field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL64 ||
            field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL128I ||
+           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL256 ||
            field_type == FieldType::OLAP_FIELD_TYPE_BOOL;
 }
 
diff --git a/be/src/pipeline/exec/scan_operator.cpp b/be/src/pipeline/exec/scan_operator.cpp
index 07872f1b7c..adee7e7d0b 100644
--- a/be/src/pipeline/exec/scan_operator.cpp
+++ b/be/src/pipeline/exec/scan_operator.cpp
@@ -228,6 +228,7 @@ Status ScanLocalState<Derived>::_normalize_conjuncts() {
     M(DECIMAL32)                    \
     M(DECIMAL64)                    \
     M(DECIMAL128I)                  \
+    M(DECIMAL256)                   \
     M(DECIMALV2)                    \
     M(BOOLEAN)
             APPLY_FOR_PRIMITIVE_TYPE(M)
@@ -885,7 +886,8 @@ Status ScanLocalState<Derived>::_change_value_range(ColumnValueRange<PrimitiveTy
                          (PrimitiveType == TYPE_SMALLINT) || (PrimitiveType == TYPE_INT) ||
                          (PrimitiveType == TYPE_BIGINT) || (PrimitiveType == TYPE_LARGEINT) ||
                          (PrimitiveType == TYPE_DECIMAL32) || (PrimitiveType == TYPE_DECIMAL64) ||
-                         (PrimitiveType == TYPE_DECIMAL128I) || (PrimitiveType == TYPE_STRING) ||
+                         (PrimitiveType == TYPE_DECIMAL128I) ||
+                         (PrimitiveType == TYPE_DECIMAL256) || (PrimitiveType == TYPE_STRING) ||
                          (PrimitiveType == TYPE_BOOLEAN) || (PrimitiveType == TYPE_DATEV2)) {
         if constexpr (IsFixed) {
             func(temp_range,
diff --git a/be/src/runtime/decimalv2_value.h b/be/src/runtime/decimalv2_value.h
index 6472ad4275..932ac01c0b 100644
--- a/be/src/runtime/decimalv2_value.h
+++ b/be/src/runtime/decimalv2_value.h
@@ -28,6 +28,7 @@
 #include <string_view>
 
 #include "util/hash_util.hpp"
+#include "vec/core/wide_integer.h"
 
 namespace doris {
 
@@ -140,6 +141,12 @@ public:
     // ATTN: invoker must make sure no OVERFLOW
     operator int128_t() const { return static_cast<int128_t>(_value / ONE_BILLION); }
 
+    operator wide::Int256() const {
+        wide::Int256 result;
+        wide::Int256::_impl::wide_integer_from_builtin(result, _value);
+        return result;
+    }
+
     operator bool() const { return _value != 0; }
 
     operator int8_t() const { return static_cast<char>(operator int64_t()); }
diff --git a/be/src/runtime/define_primitive_type.h b/be/src/runtime/define_primitive_type.h
index 0ecacb9234..44a0f2c38e 100644
--- a/be/src/runtime/define_primitive_type.h
+++ b/be/src/runtime/define_primitive_type.h
@@ -63,8 +63,9 @@ enum PrimitiveType : PrimitiveNative {
     TYPE_VARIANT,         /* 32 */
     TYPE_LAMBDA_FUNCTION, /* 33 */
     TYPE_AGG_STATE,       /* 34 */
+    TYPE_DECIMAL256,      /* 35 */
 };
 
 constexpr PrimitiveNative BEGIN_OF_PRIMITIVE_TYPE = INVALID_TYPE;
-constexpr PrimitiveNative END_OF_PRIMITIVE_TYPE = TYPE_AGG_STATE;
+constexpr PrimitiveNative END_OF_PRIMITIVE_TYPE = TYPE_DECIMAL256;
 } // namespace doris
diff --git a/be/src/runtime/fold_constant_executor.cpp b/be/src/runtime/fold_constant_executor.cpp
index f3e7311266..e5fd16418d 100644
--- a/be/src/runtime/fold_constant_executor.cpp
+++ b/be/src/runtime/fold_constant_executor.cpp
@@ -230,7 +230,8 @@ string FoldConstantExecutor::_get_result(void* src, size_t size, const TypeDescr
     }
     case TYPE_DECIMAL32:
     case TYPE_DECIMAL64:
-    case TYPE_DECIMAL128I: {
+    case TYPE_DECIMAL128I:
+    case TYPE_DECIMAL256: {
         return column_type->to_string(*column_ptr, 0);
     }
     case TYPE_ARRAY:
diff --git a/be/src/runtime/primitive_type.cpp b/be/src/runtime/primitive_type.cpp
index 56ac616014..91f2849901 100644
--- a/be/src/runtime/primitive_type.cpp
+++ b/be/src/runtime/primitive_type.cpp
@@ -125,6 +125,9 @@ PrimitiveType thrift_to_type(TPrimitiveType::type ttype) {
     case TPrimitiveType::DECIMAL128I:
         return TYPE_DECIMAL128I;
 
+    case TPrimitiveType::DECIMAL256:
+        return TYPE_DECIMAL256;
+
     case TPrimitiveType::CHAR:
         return TYPE_CHAR;
 
@@ -238,6 +241,9 @@ TPrimitiveType::type to_thrift(PrimitiveType ptype) {
     case TYPE_DECIMAL128I:
         return TPrimitiveType::DECIMAL128I;
 
+    case TYPE_DECIMAL256:
+        return TPrimitiveType::DECIMAL256;
+
     case TYPE_CHAR:
         return TPrimitiveType::CHAR;
 
@@ -342,6 +348,9 @@ std::string type_to_string(PrimitiveType t) {
     case TYPE_DECIMAL128I:
         return "DECIMAL128I";
 
+    case TYPE_DECIMAL256:
+        return "DECIMAL256";
+
     case TYPE_CHAR:
         return "CHAR";
 
@@ -451,6 +460,9 @@ std::string type_to_odbc_string(PrimitiveType t) {
     case TYPE_DECIMAL128I:
         return "decimal128";
 
+    case TYPE_DECIMAL256:
+        return "decimal256";
+
     case TYPE_CHAR:
         return "char";
 
diff --git a/be/src/runtime/primitive_type.h b/be/src/runtime/primitive_type.h
index 32b62ebc19..5eff1541d6 100644
--- a/be/src/runtime/primitive_type.h
+++ b/be/src/runtime/primitive_type.h
@@ -59,6 +59,7 @@ constexpr bool is_enumeration_type(PrimitiveType type) {
     case TYPE_DECIMAL32:
     case TYPE_DECIMAL64:
     case TYPE_DECIMAL128I:
+    case TYPE_DECIMAL256:
     case TYPE_BOOLEAN:
     case TYPE_ARRAY:
     case TYPE_STRUCT:
@@ -205,6 +206,11 @@ struct PrimitiveTypeTraits<TYPE_DECIMAL128I> {
     using ColumnType = vectorized::ColumnDecimal<vectorized::Decimal128I>;
 };
 template <>
+struct PrimitiveTypeTraits<TYPE_DECIMAL256> {
+    using CppType = vectorized::Decimal256;
+    using ColumnType = vectorized::ColumnDecimal<vectorized::Decimal256>;
+};
+template <>
 struct PrimitiveTypeTraits<TYPE_LARGEINT> {
     using CppType = __int128_t;
     using ColumnType = vectorized::ColumnInt128;
diff --git a/be/src/runtime/raw_value.h b/be/src/runtime/raw_value.h
index e242249bf0..8749e6ea10 100644
--- a/be/src/runtime/raw_value.h
+++ b/be/src/runtime/raw_value.h
@@ -103,6 +103,8 @@ inline uint32_t RawValue::zlib_crc32(const void* v, size_t len, const PrimitiveT
         return HashUtil::zlib_crc_hash(v, 8, seed);
     case TYPE_DECIMAL128I:
         return HashUtil::zlib_crc_hash(v, 16, seed);
+    case TYPE_DECIMAL256:
+        return HashUtil::zlib_crc_hash(v, 32, seed);
     default:
         DCHECK(false) << "invalid type: " << type;
         return 0;
diff --git a/be/src/runtime/runtime_predicate.cpp b/be/src/runtime/runtime_predicate.cpp
index f053b842c7..2b949fb10e 100644
--- a/be/src/runtime/runtime_predicate.cpp
+++ b/be/src/runtime/runtime_predicate.cpp
@@ -112,6 +112,10 @@ Status RuntimePredicate::init(const PrimitiveType type, const bool nulls_first)
         _get_value_fn = get_decimal128_value;
         break;
     }
+    case PrimitiveType::TYPE_DECIMAL256: {
+        _get_value_fn = get_decimal256_value;
+        break;
+    }
     default:
         return Status::InvalidArgument("unsupported runtime predicate type {}", type);
     }
diff --git a/be/src/runtime/runtime_predicate.h b/be/src/runtime/runtime_predicate.h
index 9dd48279ac..b1d4dadf1a 100644
--- a/be/src/runtime/runtime_predicate.h
+++ b/be/src/runtime/runtime_predicate.h
@@ -173,6 +173,12 @@ private:
         auto v = field.get<DecimalField<Decimal128I>>();
         return cast_to_string<TYPE_DECIMAL128I, ValueType>(v.get_value(), v.get_scale());
     }
+
+    static std::string get_decimal256_value(const Field& field) {
+        using ValueType = typename PrimitiveTypeTraits<TYPE_DECIMAL256>::CppType;
+        auto v = field.get<DecimalField<Decimal256>>();
+        return cast_to_string<TYPE_DECIMAL256, ValueType>(v.get_value(), v.get_scale());
+    }
 };
 
 } // namespace vectorized
diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h
index 5dda1201ce..d630c778a5 100644
--- a/be/src/runtime/runtime_state.h
+++ b/be/src/runtime/runtime_state.h
@@ -146,6 +146,10 @@ public:
                _query_options.check_overflow_for_decimal;
     }
 
+    bool enable_decima256() const {
+        return _query_options.__isset.enable_decimal256 && _query_options.enable_decimal256;
+    }
+
     bool enable_common_expr_pushdown() const {
         return _query_options.__isset.enable_common_expr_pushdown &&
                _query_options.enable_common_expr_pushdown;
diff --git a/be/src/runtime/type_limit.h b/be/src/runtime/type_limit.h
index bae6de28c5..d406689644 100644
--- a/be/src/runtime/type_limit.h
+++ b/be/src/runtime/type_limit.h
@@ -19,6 +19,7 @@
 
 #include "runtime/decimalv2_value.h"
 #include "vec/common/string_ref.h"
+#include "vec/core/wide_integer.h"
 
 namespace doris {
 
@@ -69,6 +70,13 @@ struct type_limit<vectorized::Decimal128> {
     }
     static vectorized::Decimal128 min() { return -max(); }
 };
+static Int256 MAX_DECIMAL256_INT({18446744073709551615ul, 8607968719199866879ul,
+                                  532749306367912313ul, 1593091911132452277ul});
+template <>
+struct type_limit<vectorized::Decimal256> {
+    static vectorized::Decimal256 max() { return vectorized::Decimal256(MAX_DECIMAL256_INT); }
+    static vectorized::Decimal256 min() { return vectorized::Decimal256(-MAX_DECIMAL256_INT); }
+};
 
 template <>
 struct type_limit<VecDateTimeValue> {
diff --git a/be/src/runtime/types.cpp b/be/src/runtime/types.cpp
index 4cb3d3ef5b..10a6b47f84 100644
--- a/be/src/runtime/types.cpp
+++ b/be/src/runtime/types.cpp
@@ -46,7 +46,8 @@ TypeDescriptor::TypeDescriptor(const std::vector<TTypeNode>& types, int* idx)
             DCHECK(scalar_type.__isset.len);
             len = scalar_type.len;
         } else if (type == TYPE_DECIMALV2 || type == TYPE_DECIMAL32 || type == TYPE_DECIMAL64 ||
-                   type == TYPE_DECIMAL128I || type == TYPE_DATETIMEV2 || type == TYPE_TIMEV2) {
+                   type == TYPE_DECIMAL128I || type == TYPE_DECIMAL256 || type == TYPE_DATETIMEV2 ||
+                   type == TYPE_TIMEV2) {
             DCHECK(scalar_type.__isset.precision);
             DCHECK(scalar_type.__isset.scale);
             precision = scalar_type.precision;
@@ -151,7 +152,7 @@ void TypeDescriptor::to_thrift(TTypeDesc* thrift_type) const {
             // DCHECK_NE(len, -1);
             scalar_type.__set_len(len);
         } else if (type == TYPE_DECIMALV2 || type == TYPE_DECIMAL32 || type == TYPE_DECIMAL64 ||
-                   type == TYPE_DECIMAL128I || type == TYPE_DATETIMEV2) {
+                   type == TYPE_DECIMAL128I || type == TYPE_DECIMAL256 || type == TYPE_DATETIMEV2) {
             DCHECK_NE(precision, -1);
             DCHECK_NE(scale, -1);
             scalar_type.__set_precision(precision);
@@ -168,7 +169,7 @@ void TypeDescriptor::to_protobuf(PTypeDesc* ptype) const {
     if (type == TYPE_CHAR || type == TYPE_VARCHAR || type == TYPE_HLL || type == TYPE_STRING) {
         scalar_type->set_len(len);
     } else if (type == TYPE_DECIMALV2 || type == TYPE_DECIMAL32 || type == TYPE_DECIMAL64 ||
-               type == TYPE_DECIMAL128I || type == TYPE_DATETIMEV2) {
+               type == TYPE_DECIMAL128I || type == TYPE_DECIMAL256 || type == TYPE_DATETIMEV2) {
         DCHECK_NE(precision, -1);
         DCHECK_NE(scale, -1);
         scalar_type->set_precision(precision);
@@ -218,7 +219,7 @@ TypeDescriptor::TypeDescriptor(const google::protobuf::RepeatedPtrField<PTypeNod
             DCHECK(scalar_type.has_len());
             len = scalar_type.len();
         } else if (type == TYPE_DECIMALV2 || type == TYPE_DECIMAL32 || type == TYPE_DECIMAL64 ||
-                   type == TYPE_DECIMAL128I || type == TYPE_DATETIMEV2) {
+                   type == TYPE_DECIMAL128I || type == TYPE_DECIMAL256 || type == TYPE_DATETIMEV2) {
             DCHECK(scalar_type.has_precision());
             DCHECK(scalar_type.has_scale());
             precision = scalar_type.precision();
@@ -307,6 +308,9 @@ std::string TypeDescriptor::debug_string() const {
     case TYPE_DECIMAL128I:
         ss << "DECIMAL128(" << precision << ", " << scale << ")";
         return ss.str();
+    case TYPE_DECIMAL256:
+        ss << "DECIMAL256(" << precision << ", " << scale << ")";
+        return ss.str();
     case TYPE_ARRAY: {
         ss << "ARRAY<" << children[0].debug_string() << ">";
         return ss.str();
diff --git a/be/src/runtime/types.h b/be/src/runtime/types.h
index bb030b66d6..4cb7d51e4b 100644
--- a/be/src/runtime/types.h
+++ b/be/src/runtime/types.h
@@ -29,6 +29,7 @@
 #include <vector>
 
 #include "common/config.h"
+#include "common/consts.h"
 #include "runtime/define_primitive_type.h"
 
 namespace doris {
@@ -50,15 +51,6 @@ struct TypeDescriptor {
     int precision;
     int scale;
 
-    /// Must be kept in sync with FE's max precision/scale.
-    static constexpr int MAX_PRECISION = 38;
-    static constexpr int MAX_SCALE = MAX_PRECISION;
-
-    /// The maximum precision representable by a 4-byte decimal (Decimal4Value)
-    static constexpr int MAX_DECIMAL4_PRECISION = 9;
-    /// The maximum precision representable by a 8-byte decimal (Decimal8Value)
-    static constexpr int MAX_DECIMAL8_PRECISION = 18;
-
     std::vector<TypeDescriptor> children;
 
     bool result_is_nullable = false;
@@ -118,8 +110,8 @@ struct TypeDescriptor {
     }
 
     static TypeDescriptor create_decimalv2_type(int precision, int scale) {
-        DCHECK_LE(precision, MAX_PRECISION);
-        DCHECK_LE(scale, MAX_SCALE);
+        DCHECK_LE(precision, BeConsts::MAX_DECIMALV2_PRECISION);
+        DCHECK_LE(scale, BeConsts::MAX_DECIMALV2_SCALE);
         DCHECK_GE(precision, 0);
         DCHECK_LE(scale, precision);
         TypeDescriptor ret;
@@ -130,17 +122,19 @@ struct TypeDescriptor {
     }
 
     static TypeDescriptor create_decimalv3_type(int precision, int scale) {
-        DCHECK_LE(precision, MAX_PRECISION);
-        DCHECK_LE(scale, MAX_SCALE);
+        DCHECK_LE(precision, BeConsts::MAX_DECIMALV3_PRECISION);
+        DCHECK_LE(scale, BeConsts::MAX_DECIMALV3_SCALE);
         DCHECK_GE(precision, 0);
         DCHECK_LE(scale, precision);
         TypeDescriptor ret;
-        if (precision <= MAX_DECIMAL4_PRECISION) {
+        if (precision <= BeConsts::MAX_DECIMAL32_PRECISION) {
             ret.type = TYPE_DECIMAL32;
-        } else if (precision <= MAX_DECIMAL8_PRECISION) {
+        } else if (precision <= BeConsts::MAX_DECIMAL64_PRECISION) {
             ret.type = TYPE_DECIMAL64;
-        } else {
+        } else if (precision <= BeConsts::MAX_DECIMAL128_PRECISION) {
             ret.type = TYPE_DECIMAL128I;
+        } else {
+            ret.type = TYPE_DECIMAL256;
         }
         ret.precision = precision;
         ret.scale = scale;
@@ -216,7 +210,8 @@ struct TypeDescriptor {
     bool is_decimal_v2_type() const { return type == TYPE_DECIMALV2; }
 
     bool is_decimal_v3_type() const {
-        return (type == TYPE_DECIMAL32) || (type == TYPE_DECIMAL64) || (type == TYPE_DECIMAL128I);
+        return (type == TYPE_DECIMAL32) || (type == TYPE_DECIMAL64) || (type == TYPE_DECIMAL128I) ||
+               (type == TYPE_DECIMAL256);
     }
 
     bool is_datetime_type() const { return type == TYPE_DATETIME; }
@@ -244,13 +239,16 @@ struct TypeDescriptor {
 
     static inline int get_decimal_byte_size(int precision) {
         DCHECK_GT(precision, 0);
-        if (precision <= MAX_DECIMAL4_PRECISION) {
+        if (precision <= BeConsts::MAX_DECIMAL32_PRECISION) {
             return 4;
         }
-        if (precision <= MAX_DECIMAL8_PRECISION) {
+        if (precision <= BeConsts::MAX_DECIMAL64_PRECISION) {
             return 8;
         }
-        return 16;
+        if (precision <= BeConsts::MAX_DECIMAL128_PRECISION) {
+            return 16;
+        }
+        return 32;
     }
 
     std::string debug_string() const;
diff --git a/be/src/util/binary_cast.hpp b/be/src/util/binary_cast.hpp
index 8365ef7e87..8a91ab3a57 100644
--- a/be/src/util/binary_cast.hpp
+++ b/be/src/util/binary_cast.hpp
@@ -23,6 +23,7 @@
 
 #include "runtime/decimalv2_value.h"
 #include "util/types.h"
+#include "vec/core/wide_integer.h"
 #include "vec/runtime/vdatetime_value.h"
 namespace doris {
 union TypeConverter {
@@ -76,6 +77,7 @@ To binary_cast(From from) {
     constexpr bool from_vec_dt_to_i64 = match_v<From, doris::VecDateTimeValue, To, __int64_t>;
     constexpr bool from_i128_to_decv2 = match_v<From, __int128_t, To, DecimalV2Value>;
     constexpr bool from_decv2_to_i128 = match_v<From, DecimalV2Value, To, __int128_t>;
+    constexpr bool from_decv2_to_i256 = match_v<From, DecimalV2Value, To, wide::Int256>;
 
     constexpr bool from_ui32_to_date_v2 = match_v<From, uint32_t, To, DateV2Value<DateV2ValueType>>;
 
@@ -89,8 +91,8 @@ To binary_cast(From from) {
 
     static_assert(from_u64_to_db || from_i64_to_db || from_db_to_i64 || from_db_to_u64 ||
                   from_i64_to_vec_dt || from_vec_dt_to_i64 || from_i128_to_decv2 ||
-                  from_decv2_to_i128 || from_ui32_to_date_v2 || from_date_v2_to_ui32 ||
-                  from_ui64_to_datetime_v2 || from_datetime_v2_to_ui64);
+                  from_decv2_to_i128 || from_decv2_to_i256 || from_ui32_to_date_v2 ||
+                  from_date_v2_to_ui32 || from_ui64_to_datetime_v2 || from_datetime_v2_to_ui64);
 
     if constexpr (from_u64_to_db) {
         TypeConverter conv;
diff --git a/be/src/util/string_parser.hpp b/be/src/util/string_parser.hpp
index 5485e3ff5d..83289e73ad 100644
--- a/be/src/util/string_parser.hpp
+++ b/be/src/util/string_parser.hpp
@@ -41,6 +41,8 @@
 #include "runtime/large_int_value.h"
 #include "runtime/primitive_type.h"
 #include "vec/common/int_exp.h"
+#include "vec/core/extended_types.h"
+#include "vec/core/wide_integer.h"
 #include "vec/data_types/data_type_decimal.h"
 
 namespace doris {
@@ -91,7 +93,7 @@ public:
     template <typename T>
     static T get_scale_multiplier(int scale) {
         static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
-                              std::is_same_v<T, __int128>,
+                              std::is_same_v<T, __int128> || std::is_same_v<T, Int256>,
                       "You can only instantiate as int32_t, int64_t, __int128.");
         if constexpr (std::is_same_v<T, int32_t>) {
             return common::exp10_i32(scale);
@@ -99,6 +101,8 @@ public:
             return common::exp10_i64(scale);
         } else if constexpr (std::is_same_v<T, __int128>) {
             return common::exp10_i128(scale);
+        } else if constexpr (std::is_same_v<T, Int256>) {
+            return common::exp10_i256(scale);
         }
     }
 
@@ -568,6 +572,11 @@ inline int StringParser::StringParseTraits<__int128>::max_ascii_len() {
     return 39;
 }
 
+template <>
+inline int StringParser::StringParseTraits<wide::Int256>::max_ascii_len() {
+    return 78;
+}
+
 template <PrimitiveType P, typename T>
 T StringParser::string_to_decimal(const char* s, int len, int type_precision, int type_scale,
                                   ParseResult* result) {
diff --git a/be/src/vec/aggregate_functions/aggregate_function_avg.cpp b/be/src/vec/aggregate_functions/aggregate_function_avg.cpp
index 2e58ccb56e..0f3d0fd3bd 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_avg.cpp
+++ b/be/src/vec/aggregate_functions/aggregate_function_avg.cpp
@@ -35,7 +35,18 @@ struct Avg {
 template <typename T>
 using AggregateFuncAvg = typename Avg<T>::Function;
 
+template <typename T>
+struct AvgDecimal256 {
+    using FieldType = typename AvgNearestFieldTypeTrait256<T>::Type;
+    using Function = AggregateFunctionAvg<T, AggregateFunctionAvgData<FieldType>>;
+};
+
+template <typename T>
+using AggregateFuncAvgDecimal256 = typename AvgDecimal256<T>::Function;
+
 void register_aggregate_function_avg(AggregateFunctionSimpleFactory& factory) {
     factory.register_function_both("avg", creator_with_type::creator<AggregateFuncAvg>);
+    factory.register_function_both("avg_decimal256",
+                                   creator_with_type::creator<AggregateFuncAvgDecimal256>);
 }
 } // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_avg.h b/be/src/vec/aggregate_functions/aggregate_function_avg.h
index 9697658ec7..b2d95db679 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_avg.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_avg.h
@@ -58,6 +58,7 @@ namespace doris::vectorized {
 
 template <typename T>
 struct AggregateFunctionAvgData {
+    using ResultType = T;
     T sum {};
     UInt64 count = 0;
 
@@ -87,7 +88,11 @@ struct AggregateFunctionAvgData {
             Decimal128 ret(cal_ret.value());
             return ret;
         } else {
-            return static_cast<ResultT>(sum) / count;
+            if constexpr (IsDecimal256<T>) {
+                return static_cast<ResultT>(sum / T(count));
+            } else {
+                return static_cast<ResultT>(sum) / count;
+            }
         }
     }
 
@@ -107,16 +112,18 @@ template <typename T, typename Data>
 class AggregateFunctionAvg final
         : public IAggregateFunctionDataHelper<Data, AggregateFunctionAvg<T, Data>> {
 public:
-    using ResultType = DisposeDecimal<T, Float64>;
-    using ResultDataType =
-            std::conditional_t<IsDecimalV2<T>, DataTypeDecimal<Decimal128>,
-                               std::conditional_t<IsDecimalNumber<T>, DataTypeDecimal<Decimal128I>,
-                                                  DataTypeNumber<Float64>>>;
+    using ResultType = std::conditional_t<
+            IsDecimalV2<T>, Decimal128,
+            std::conditional_t<IsDecimalNumber<T>, typename Data::ResultType, Float64>>;
+    using ResultDataType = std::conditional_t<
+            IsDecimalV2<T>, DataTypeDecimal<Decimal128>,
+            std::conditional_t<IsDecimalNumber<T>, DataTypeDecimal<typename Data::ResultType>,
+                               DataTypeNumber<Float64>>>;
     using ColVecType = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
-    using ColVecResult =
-            std::conditional_t<IsDecimalV2<T>, ColumnDecimal<Decimal128>,
-                               std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<Decimal128I>,
-                                                  ColumnVector<Float64>>>;
+    using ColVecResult = std::conditional_t<
+            IsDecimalV2<T>, ColumnDecimal<Decimal128>,
+            std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<typename Data::ResultType>,
+                               ColumnVector<Float64>>>;
 
     /// ctor for native types
     AggregateFunctionAvg(const DataTypes& argument_types_)
@@ -205,7 +212,7 @@ public:
         auto* data = dst_col.get_data().data();
         for (size_t i = 0; i != num_rows; ++i) {
             auto& state = *reinterpret_cast<Data*>(&data[sizeof(Data) * i]);
-            state.sum = src_data[i];
+            state.sum = typename Data::ResultType(src_data[i]);
             state.count = 1;
         }
     }
diff --git a/be/src/vec/aggregate_functions/aggregate_function_product.h b/be/src/vec/aggregate_functions/aggregate_function_product.h
index ca1c7c9fae..8a13ad0b03 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_product.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_product.h
@@ -134,7 +134,7 @@ public:
     void add(AggregateDataPtr __restrict place, const IColumn** columns, size_t row_num,
              Arena*) const override {
         const auto& column = assert_cast<const ColVecType&>(*columns[0]);
-        this->data(place).add(column.get_data()[row_num], multiplier);
+        this->data(place).add(TResult(column.get_data()[row_num]), multiplier);
     }
 
     void reset(AggregateDataPtr place) const override {
diff --git a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.h b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.h
index 618340dd88..dccbd9a4d5 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.h
@@ -80,7 +80,8 @@ public:
 
     AggregateFunctionPtr get(const std::string& name, const DataTypes& argument_types,
                              const bool result_is_nullable = false,
-                             int be_version = BeExecVersionManager::get_newest_version()) {
+                             int be_version = BeExecVersionManager::get_newest_version(),
+                             bool enable_decima256 = false) {
         bool nullable = false;
         for (const auto& type : argument_types) {
             if (type->is_nullable()) {
@@ -89,6 +90,11 @@ public:
         }
 
         std::string name_str = name;
+        if (enable_decima256) {
+            if (name_str == "sum" || name_str == "avg") {
+                name_str += "_decimal256";
+            }
+        }
         temporary_function_update(be_version, name_str);
 
         if (function_alias.count(name)) {
diff --git a/be/src/vec/aggregate_functions/aggregate_function_sum.cpp b/be/src/vec/aggregate_functions/aggregate_function_sum.cpp
index ede2425198..3ee7dc6ff4 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_sum.cpp
+++ b/be/src/vec/aggregate_functions/aggregate_function_sum.cpp
@@ -27,6 +27,8 @@ namespace doris::vectorized {
 
 void register_aggregate_function_sum(AggregateFunctionSimpleFactory& factory) {
     factory.register_function_both("sum", creator_with_type::creator<AggregateFunctionSumSimple>);
+    factory.register_function_both(
+            "sum_decimal256", creator_with_type::creator<AggregateFunctionSumSimpleDecimal256>);
 }
 
 } // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_sum.h b/be/src/vec/aggregate_functions/aggregate_function_sum.h
index 9f58023d50..41677dd419 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_sum.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_sum.h
@@ -101,7 +101,7 @@ public:
     void add(AggregateDataPtr __restrict place, const IColumn** columns, size_t row_num,
              Arena*) const override {
         const auto& column = assert_cast<const ColVecType&>(*columns[0]);
-        this->data(place).add(column.get_data()[row_num]);
+        this->data(place).add(TResult(column.get_data()[row_num]));
     }
 
     void reset(AggregateDataPtr place) const override { this->data(place).sum = {}; }
@@ -156,7 +156,7 @@ public:
         auto* dst_data = col.get_data().data();
         for (size_t i = 0; i != num_rows; ++i) {
             auto& state = *reinterpret_cast<Data*>(&dst_data[sizeof(Data) * i]);
-            state.sum = src_data[i];
+            state.sum = TResult(src_data[i]);
         }
     }
 
@@ -231,6 +231,18 @@ struct SumSimple {
 template <typename T>
 using AggregateFunctionSumSimple = typename SumSimple<T, true>::Function;
 
+const static std::string DECIMAL256_SUFFIX {"_decimal256"};
+template <typename T, bool level_up>
+struct SumSimpleDecimal256 {
+    /// @note It uses slow Decimal128 (cause we need such a variant). sumWithOverflow is faster for Decimal32/64
+    using ResultType = std::conditional_t<level_up, DisposeDecimal256<T, NearestFieldType<T>>, T>;
+    using AggregateDataType = AggregateFunctionSumData<ResultType>;
+    using Function = AggregateFunctionSum<T, ResultType, AggregateDataType>;
+};
+
+template <typename T>
+using AggregateFunctionSumSimpleDecimal256 = typename SumSimpleDecimal256<T, true>::Function;
+
 // do not level up return type for agg reader
 template <typename T>
 using AggregateFunctionSumSimpleReader = typename SumSimple<T, false>::Function;
diff --git a/be/src/vec/aggregate_functions/helpers.h b/be/src/vec/aggregate_functions/helpers.h
index f50524085c..58ddd455bc 100644
--- a/be/src/vec/aggregate_functions/helpers.h
+++ b/be/src/vec/aggregate_functions/helpers.h
@@ -45,7 +45,8 @@
     M(Decimal32)             \
     M(Decimal64)             \
     M(Decimal128)            \
-    M(Decimal128I)
+    M(Decimal128I)           \
+    M(Decimal256)
 
 /** If the serialized type is not the default type(string),
  * aggregation function need to override these functions:
diff --git a/be/src/vec/columns/column_array.h b/be/src/vec/columns/column_array.h
index 44391ae8c7..c37fb48ba5 100644
--- a/be/src/vec/columns/column_array.h
+++ b/be/src/vec/columns/column_array.h
@@ -52,9 +52,10 @@ class Arena;
 } // namespace doris
 
 //TODO: use marcos below to decouple array function calls
-#define ALL_COLUMNS_NUMBER                                                                       \
-    ColumnUInt8, ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64, ColumnInt128, ColumnFloat32, \
-            ColumnFloat64, ColumnDecimal32, ColumnDecimal64, ColumnDecimal128I, ColumnDecimal128
+#define ALL_COLUMNS_NUMBER                                                                        \
+    ColumnUInt8, ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64, ColumnInt128, ColumnFloat32,  \
+            ColumnFloat64, ColumnDecimal32, ColumnDecimal64, ColumnDecimal128I, ColumnDecimal128, \
+            ColumnDecimal256
 #define ALL_COLUMNS_TIME ColumnDate, ColumnDateTime, ColumnDateV2, ColumnDateTimeV2
 #define ALL_COLUMNS_NUMERIC ALL_COLUMNS_NUMBER, ALL_COLUMNS_TIME
 #define ALL_COLUMNS_SIMPLE ALL_COLUMNS_NUMERIC, ColumnString
diff --git a/be/src/vec/columns/column_decimal.cpp b/be/src/vec/columns/column_decimal.cpp
index b4574fd7b1..07508f8c6a 100644
--- a/be/src/vec/columns/column_decimal.cpp
+++ b/be/src/vec/columns/column_decimal.cpp
@@ -121,8 +121,9 @@ template <typename T>
 UInt64 ColumnDecimal<T>::get64(size_t n) const {
     if constexpr (sizeof(T) > sizeof(UInt64)) {
         LOG(FATAL) << "Method get64 is not supported for " << get_family_name();
+    } else {
+        return static_cast<typename T::NativeType>(data[n]);
     }
-    return static_cast<typename T::NativeType>(data[n]);
 }
 
 template <typename T>
@@ -502,6 +503,13 @@ Decimal128I ColumnDecimal<Decimal128I>::get_scale_multiplier() const {
     return common::exp10_i128(scale);
 }
 
+// duplicate with
+// Decimal256 DataTypeDecimal<Decimal256>::get_scale_multiplier(UInt32 scale) {
+template <>
+Decimal256 ColumnDecimal<Decimal256>::get_scale_multiplier() const {
+    return Decimal256(common::exp10_i256(scale));
+}
+
 template <typename T>
 ColumnPtr ColumnDecimal<T>::index(const IColumn& indexes, size_t limit) const {
     return select_index_impl(*this, indexes, limit);
@@ -511,4 +519,5 @@ template class ColumnDecimal<Decimal32>;
 template class ColumnDecimal<Decimal64>;
 template class ColumnDecimal<Decimal128>;
 template class ColumnDecimal<Decimal128I>;
+template class ColumnDecimal<Decimal256>;
 } // namespace doris::vectorized
diff --git a/be/src/vec/columns/column_decimal.h b/be/src/vec/columns/column_decimal.h
index dcd135d46b..6c1e8893a3 100644
--- a/be/src/vec/columns/column_decimal.h
+++ b/be/src/vec/columns/column_decimal.h
@@ -201,7 +201,7 @@ public:
     }
     void get(size_t n, Field& res) const override { res = (*this)[n]; }
     bool get_bool(size_t n) const override { return bool(data[n]); }
-    Int64 get_int(size_t n) const override { return Int64(data[n] * scale); }
+    Int64 get_int(size_t n) const override { return Int64(data[n].value * scale); }
     UInt64 get64(size_t n) const override;
     bool is_default_at(size_t n) const override { return data[n].value == 0; }
 
diff --git a/be/src/vec/columns/columns_number.h b/be/src/vec/columns/columns_number.h
index 1ce930e4cd..a9d3c9ac16 100644
--- a/be/src/vec/columns/columns_number.h
+++ b/be/src/vec/columns/columns_number.h
@@ -53,6 +53,7 @@ using ColumnDecimal32 = ColumnDecimal<Decimal32>;
 using ColumnDecimal64 = ColumnDecimal<Decimal64>;
 using ColumnDecimal128 = ColumnDecimal<Decimal128>;
 using ColumnDecimal128I = ColumnDecimal<Decimal128I>;
+using ColumnDecimal256 = ColumnDecimal<Decimal256>;
 
 template <typename T>
 struct IsFixLenColumnType {
diff --git a/be/src/vec/common/arithmetic_overflow.h b/be/src/vec/common/arithmetic_overflow.h
index 0d0828a61b..b4b55eb47a 100644
--- a/be/src/vec/common/arithmetic_overflow.h
+++ b/be/src/vec/common/arithmetic_overflow.h
@@ -20,6 +20,7 @@
 
 #pragma once
 
+#include "vec/core/wide_integer.h"
 namespace common {
 template <typename T>
 inline bool add_overflow(T x, T y, T& res) {
@@ -50,6 +51,13 @@ inline bool add_overflow(__int128 x, __int128 y, __int128& res) {
     return (y > 0 && x > max_int128 - y) || (y < 0 && x < min_int128 - y);
 }
 
+template <>
+inline bool add_overflow(wide::Int256 x, wide::Int256 y, wide::Int256& res) {
+    static constexpr wide::Int256 min_int256 = std::numeric_limits<wide::Int256>::min();
+    static constexpr wide::Int256 max_int256 = std::numeric_limits<wide::Int256>::max();
+    res = x + y;
+    return (y > 0 && x > max_int256 - y) || (y < 0 && x < min_int256 - y);
+}
 template <typename T>
 inline bool sub_overflow(T x, T y, T& res) {
     return __builtin_sub_overflow(x, y, &res);
@@ -79,6 +87,14 @@ inline bool sub_overflow(__int128 x, __int128 y, __int128& res) {
     return (y < 0 && x > max_int128 + y) || (y > 0 && x < min_int128 + y);
 }
 
+template <>
+inline bool sub_overflow(wide::Int256 x, wide::Int256 y, wide::Int256& res) {
+    static constexpr wide::Int256 min_int256 = std::numeric_limits<wide::Int256>::min();
+    static constexpr wide::Int256 max_int256 = std::numeric_limits<wide::Int256>::max();
+    res = x - y;
+    return (y < 0 && x > max_int256 + y) || (y > 0 && x < min_int256 + y);
+}
+
 template <typename T>
 inline bool mul_overflow(T x, T y, T& res) {
     return __builtin_mul_overflow(x, y, &res);
@@ -109,4 +125,13 @@ inline bool mul_overflow(__int128 x, __int128 y, __int128& res) {
     unsigned __int128 b = (y > 0) ? y : -y;
     return (a * b) / b != a;
 }
+
+template <>
+inline bool mul_overflow(wide::Int256 x, wide::Int256 y, wide::Int256& res) {
+    res = x * y;
+    if (!x || !y) return false;
+    wide::UInt256 a = (x > 0) ? x : -x;
+    wide::UInt256 b = (y > 0) ? y : -y;
+    return (a * b) / b != a;
+}
 } // namespace common
diff --git a/be/src/vec/common/field_visitors.h b/be/src/vec/common/field_visitors.h
index 68a85170d4..8434483b77 100644
--- a/be/src/vec/common/field_visitors.h
+++ b/be/src/vec/common/field_visitors.h
@@ -63,6 +63,8 @@ typename std::decay_t<Visitor>::ResultType apply_visitor(Visitor&& visitor, F&&
         return visitor(field.template get<DecimalField<Decimal128>>());
     case Field::Types::Decimal128I:
         return visitor(field.template get<DecimalField<Decimal128I>>());
+    case Field::Types::Decimal256:
+        return visitor(field.template get<DecimalField<Decimal256>>());
     default:
         LOG(FATAL) << "Bad type of Field";
         return {};
diff --git a/be/src/vec/common/hash_table/hash.h b/be/src/vec/common/hash_table/hash.h
index 3c7df75b0a..9556bf87a0 100644
--- a/be/src/vec/common/hash_table/hash.h
+++ b/be/src/vec/common/hash_table/hash.h
@@ -26,6 +26,7 @@
 #include "vec/common/string_ref.h"
 #include "vec/common/uint128.h"
 #include "vec/core/types.h"
+#include "vec/core/wide_integer.h"
 
 // Here is an empirical value.
 static constexpr size_t HASH_MAP_PREFETCH_DIST = 16;
@@ -94,6 +95,9 @@ struct DefaultHash<T> {
 template <>
 struct DefaultHash<doris::StringRef> : public doris::StringRefHash {};
 
+template <>
+struct DefaultHash<wide::Int256> : public std::hash<wide::Int256> {};
+
 template <typename T>
 struct HashCRC32;
 
@@ -163,6 +167,23 @@ struct HashCRC32<doris::vectorized::UInt256> {
     }
 };
 
+template <>
+struct HashCRC32<wide::Int256> {
+    size_t operator()(const wide::Int256& x) const {
+#if defined(__SSE4_2__) || defined(__aarch64__)
+        doris::vectorized::UInt64 crc = -1ULL;
+        crc = _mm_crc32_u64(crc, x.items[0]);
+        crc = _mm_crc32_u64(crc, x.items[1]);
+        crc = _mm_crc32_u64(crc, x.items[2]);
+        crc = _mm_crc32_u64(crc, x.items[3]);
+        return crc;
+#else
+        return Hash128to64(
+                {Hash128to64({x.items[0], x.items[1]}), Hash128to64({x.items[2], x.items[3]})});
+#endif
+    }
+};
+
 template <>
 struct HashCRC32<doris::vectorized::UInt136> {
     size_t operator()(const doris::vectorized::UInt136& x) const {
diff --git a/be/src/vec/common/int_exp.h b/be/src/vec/common/int_exp.h
index cac7f24f04..81ca11bb11 100644
--- a/be/src/vec/common/int_exp.h
+++ b/be/src/vec/common/int_exp.h
@@ -24,6 +24,8 @@
 #include <limits>
 #include <utility>
 
+#include "vec/core/wide_integer.h"
+
 namespace exp_details {
 
 // compile-time exp(v, n) by linear recursion
@@ -78,4 +80,94 @@ inline constexpr __int128 exp10_i128(int x) {
     return exp_details::get_exp<__int128, 10, 39>(x);
 }
 
+using wide::Int256;
+inline Int256 exp10_i256(int x) {
+    if (x < 0) return 0;
+    if (x > 76) return std::numeric_limits<Int256>::max();
+
+    using Int256 = Int256;
+    static constexpr Int256 i10e18 {1000000000000000000ll};
+    static const Int256 values[] = {
+            static_cast<Int256>(1ll),
+            static_cast<Int256>(10ll),
+            static_cast<Int256>(100ll),
+            static_cast<Int256>(1000ll),
+            static_cast<Int256>(10000ll),
+            static_cast<Int256>(100000ll),
+            static_cast<Int256>(1000000ll),
+            static_cast<Int256>(10000000ll),
+            static_cast<Int256>(100000000ll),
+            static_cast<Int256>(1000000000ll),
+            static_cast<Int256>(10000000000ll),
+            static_cast<Int256>(100000000000ll),
+            static_cast<Int256>(1000000000000ll),
+            static_cast<Int256>(10000000000000ll),
+            static_cast<Int256>(100000000000000ll),
+            static_cast<Int256>(1000000000000000ll),
+            static_cast<Int256>(10000000000000000ll),
+            static_cast<Int256>(100000000000000000ll),
+            i10e18,
+            i10e18 * 10ll,
+            i10e18 * 100ll,
+            i10e18 * 1000ll,
+            i10e18 * 10000ll,
+            i10e18 * 100000ll,
+            i10e18 * 1000000ll,
+            i10e18 * 10000000ll,
+            i10e18 * 100000000ll,
+            i10e18 * 1000000000ll,
+            i10e18 * 10000000000ll,
+            i10e18 * 100000000000ll,
+            i10e18 * 1000000000000ll,
+            i10e18 * 10000000000000ll,
+            i10e18 * 100000000000000ll,
+            i10e18 * 1000000000000000ll,
+            i10e18 * 10000000000000000ll,
+            i10e18 * 100000000000000000ll,
+            i10e18 * 100000000000000000ll * 10ll,
+            i10e18 * 100000000000000000ll * 100ll,
+            i10e18 * 100000000000000000ll * 1000ll,
+            i10e18 * 100000000000000000ll * 10000ll,
+            i10e18 * 100000000000000000ll * 100000ll,
+            i10e18 * 100000000000000000ll * 1000000ll,
+            i10e18 * 100000000000000000ll * 10000000ll,
+            i10e18 * 100000000000000000ll * 100000000ll,
+            i10e18 * 100000000000000000ll * 1000000000ll,
+            i10e18 * 100000000000000000ll * 10000000000ll,
+            i10e18 * 100000000000000000ll * 100000000000ll,
+            i10e18 * 100000000000000000ll * 1000000000000ll,
+            i10e18 * 100000000000000000ll * 10000000000000ll,
+            i10e18 * 100000000000000000ll * 100000000000000ll,
+            i10e18 * 100000000000000000ll * 1000000000000000ll,
+            i10e18 * 100000000000000000ll * 10000000000000000ll,
+            i10e18 * 100000000000000000ll * 100000000000000000ll,
+            i10e18 * 100000000000000000ll * 100000000000000000ll * 10ll,
+            i10e18 * 100000000000000000ll * 100000000000000000ll * 100ll,
+            i10e18 * 100000000000000000ll * 100000000000000000ll * 1000ll,
+            i10e18 * 100000000000000000ll * 100000000000000000ll * 10000ll,
+            i10e18 * 100000000000000000ll * 100000000000000000ll * 100000ll,
+            i10e18 * 100000000000000000ll * 100000000000000000ll * 1000000ll,
+            i10e18 * 100000000000000000ll * 100000000000000000ll * 10000000ll,
+            i10e18 * 100000000000000000ll * 100000000000000000ll * 100000000ll,
+            i10e18 * 100000000000000000ll * 100000000000000000ll * 1000000000ll,
+            i10e18 * 100000000000000000ll * 100000000000000000ll * 10000000000ll,
+            i10e18 * 100000000000000000ll * 100000000000000000ll * 100000000000ll,
+            i10e18 * 100000000000000000ll * 100000000000000000ll * 1000000000000ll,
+            i10e18 * 100000000000000000ll * 100000000000000000ll * 10000000000000ll,
+            i10e18 * 100000000000000000ll * 100000000000000000ll * 100000000000000ll,
+            i10e18 * 100000000000000000ll * 100000000000000000ll * 1000000000000000ll,
+            i10e18 * 100000000000000000ll * 100000000000000000ll * 10000000000000000ll,
+            i10e18 * 100000000000000000ll * 100000000000000000ll * 100000000000000000ll,
+            i10e18 * 100000000000000000ll * 100000000000000000ll * 100000000000000000ll * 10ll,
+            i10e18 * 100000000000000000ll * 100000000000000000ll * 100000000000000000ll * 100ll,
+            i10e18 * 100000000000000000ll * 100000000000000000ll * 100000000000000000ll * 1000ll,
+            i10e18 * 100000000000000000ll * 100000000000000000ll * 100000000000000000ll * 10000ll,
+            i10e18 * 100000000000000000ll * 100000000000000000ll * 100000000000000000ll * 100000ll,
+            i10e18 * 100000000000000000ll * 100000000000000000ll * 100000000000000000ll * 1000000ll,
+            i10e18 * 100000000000000000ll * 100000000000000000ll * 100000000000000000ll *
+                    10000000ll,
+    };
+    return values[x];
+}
+
 } // namespace common
diff --git a/be/src/vec/core/accurate_comparison.h b/be/src/vec/core/accurate_comparison.h
index a48713b1bf..31af73711c 100644
--- a/be/src/vec/core/accurate_comparison.h
+++ b/be/src/vec/core/accurate_comparison.h
@@ -27,6 +27,8 @@
 #include "vec/common/nan_utils.h"
 #include "vec/common/string_ref.h"
 #include "vec/common/uint128.h"
+#include "vec/core/decomposed_float.h"
+#include "vec/core/extended_types.h"
 #include "vec/core/types.h"
 #include "vec/runtime/vdatetime_value.h"
 /** Perceptually-correct number comparisons.
@@ -161,309 +163,144 @@ inline bool_if_double_can_be_used<TAInt, TAFloat> equalsOpTmpl(TAFloat a, TAInt
 /* Final realizations */
 
 template <typename A, typename B>
-inline bool_if_not_safe_conversion<A, B> greaterOp(A a, B b) {
-    return greaterOpTmpl(a, b);
+bool lessOp(A a, B b) {
+    if constexpr (std::is_same_v<A, B>) {
+        return a < b;
+    }
+
+    /// float vs float
+    if constexpr (std::is_floating_point_v<A> && std::is_floating_point_v<B>) {
+        return a < b;
+    }
+
+    /// anything vs NaN
+    if (is_nan(a) || is_nan(b)) {
+        return false;
+    }
+
+    /// int vs int
+    if constexpr (is_integer<A> && is_integer<B>) {
+        /// same signedness
+        if constexpr (is_signed_v<A> == is_signed_v<B>) {
+            return a < b;
+        }
+
+        /// different signedness
+
+        if constexpr (is_signed_v<A> && !is_signed_v<B>) {
+            return a < 0 || static_cast<std::make_unsigned_t<A>>(a) < b;
+        }
+
+        if constexpr (!is_signed_v<A> && is_signed_v<B>) {
+            return b >= 0 && a < static_cast<std::make_unsigned_t<B>>(b);
+        }
+    }
+
+    /// int vs float
+    if constexpr (is_integer<A> && std::is_floating_point_v<B>) {
+        if constexpr (sizeof(A) <= 4) {
+            return static_cast<double>(a) < static_cast<double>(b);
+        }
+
+        return DecomposedFloat<B>(b).greater(a);
+    }
+
+    if constexpr (std::is_floating_point_v<A> && is_integer<B>) {
+        if constexpr (sizeof(B) <= 4) {
+            return static_cast<double>(a) < static_cast<double>(b);
+        }
+
+        return DecomposedFloat<A>(a).less(b);
+    }
+
+    static_assert(is_integer<A> || std::is_floating_point_v<A>);
+    static_assert(is_integer<B> || std::is_floating_point_v<B>);
+    __builtin_unreachable();
 }
 
 template <typename A, typename B>
-inline bool_if_safe_conversion<A, B> greaterOp(A a, B b) {
-    return a > b;
-}
-
-// Case 3b. 64-bit integers vs floats comparison.
-// See hint at https://github.com/JuliaLang/julia/issues/257 (but it doesn't work properly for -2**63)
-
-constexpr doris::vectorized::Int64 MAX_INT64_WITH_EXACT_FLOAT64_REPR = 9007199254740992LL; // 2^53
-
-template <>
-inline bool greaterOp<doris::vectorized::Float64, doris::vectorized::Int64>(
-        doris::vectorized::Float64 f, doris::vectorized::Int64 i) {
-    if (-MAX_INT64_WITH_EXACT_FLOAT64_REPR <= i && i <= MAX_INT64_WITH_EXACT_FLOAT64_REPR) {
-        return f > static_cast<doris::vectorized::Float64>(i);
-    }
-
-    return (f >= static_cast<doris::vectorized::Float64>(
-                         std::numeric_limits<
-                                 doris::vectorized::Int64>::max())) // rhs is 2**63 (not 2^63 - 1)
-           || (f > static_cast<doris::vectorized::Float64>(
-                           std::numeric_limits<doris::vectorized::Int64>::min()) &&
-               static_cast<doris::vectorized::Int64>(f) > i);
-}
-
-template <>
-inline bool greaterOp<doris::vectorized::Int64, doris::vectorized::Float64>(
-        doris::vectorized::Int64 i, doris::vectorized::Float64 f) {
-    if (-MAX_INT64_WITH_EXACT_FLOAT64_REPR <= i && i <= MAX_INT64_WITH_EXACT_FLOAT64_REPR) {
-        return f < static_cast<doris::vectorized::Float64>(i);
-    }
-
-    return (f < static_cast<doris::vectorized::Float64>(
-                        std::numeric_limits<doris::vectorized::Int64>::min())) ||
-           (f < static_cast<doris::vectorized::Float64>(
-                        std::numeric_limits<doris::vectorized::Int64>::max()) &&
-            i > static_cast<doris::vectorized::Int64>(f));
-}
-
-template <>
-inline bool greaterOp<doris::vectorized::Float64, doris::vectorized::UInt64>(
-        doris::vectorized::Float64 f, doris::vectorized::UInt64 u) {
-    if (u <= static_cast<doris::vectorized::UInt64>(MAX_INT64_WITH_EXACT_FLOAT64_REPR)) {
-        return f > static_cast<doris::vectorized::Float64>(u);
-    }
-
-    return (f >= static_cast<doris::vectorized::Float64>(
-                         std::numeric_limits<doris::vectorized::UInt64>::max())) ||
-           (f >= 0 && static_cast<doris::vectorized::UInt64>(f) > u);
-}
-
-template <>
-inline bool greaterOp<doris::vectorized::UInt64, doris::vectorized::Float64>(
-        doris::vectorized::UInt64 u, doris::vectorized::Float64 f) {
-    if (u <= static_cast<doris::vectorized::UInt64>(MAX_INT64_WITH_EXACT_FLOAT64_REPR)) {
-        return static_cast<doris::vectorized::Float64>(u) > f;
-    }
-
-    return (f < 0) || (f < static_cast<doris::vectorized::Float64>(
-                                   std::numeric_limits<doris::vectorized::UInt64>::max()) &&
-                       u > static_cast<doris::vectorized::UInt64>(f));
-}
-
-// Case 3b for float32
-template <>
-inline bool greaterOp<doris::vectorized::Float32, doris::vectorized::Int64>(
-        doris::vectorized::Float32 f, doris::vectorized::Int64 i) {
-    return greaterOp(static_cast<doris::vectorized::Float64>(f), i);
-}
-
-template <>
-inline bool greaterOp<doris::vectorized::Int64, doris::vectorized::Float32>(
-        doris::vectorized::Int64 i, doris::vectorized::Float32 f) {
-    return greaterOp(i, static_cast<doris::vectorized::Float64>(f));
-}
-
-template <>
-inline bool greaterOp<doris::vectorized::Float32, doris::vectorized::UInt64>(
-        doris::vectorized::Float32 f, doris::vectorized::UInt64 u) {
-    return greaterOp(static_cast<doris::vectorized::Float64>(f), u);
-}
-
-template <>
-inline bool greaterOp<doris::vectorized::UInt64, doris::vectorized::Float32>(
-        doris::vectorized::UInt64 u, doris::vectorized::Float32 f) {
-    return greaterOp(u, static_cast<doris::vectorized::Float64>(f));
-}
-
-template <>
-inline bool greaterOp<doris::vectorized::Float64, doris::vectorized::UInt128>(
-        doris::vectorized::Float64 f, doris::vectorized::UInt128 u) {
-    return u.low == 0 && greaterOp(f, u.high);
-}
-
-template <>
-inline bool greaterOp<doris::vectorized::UInt128, doris::vectorized::Float64>(
-        doris::vectorized::UInt128 u, doris::vectorized::Float64 f) {
-    return u.low != 0 || greaterOp(u.high, f);
-}
-
-template <>
-inline bool greaterOp<doris::vectorized::Float32, doris::vectorized::UInt128>(
-        doris::vectorized::Float32 f, doris::vectorized::UInt128 u) {
-    return greaterOp(static_cast<doris::vectorized::Float64>(f), u);
-}
-
-template <>
-inline bool greaterOp<doris::vectorized::UInt128, doris::vectorized::Float32>(
-        doris::vectorized::UInt128 u, doris::vectorized::Float32 f) {
-    return greaterOp(u, static_cast<doris::vectorized::Float64>(f));
+bool greaterOp(A a, B b) {
+    return lessOp(b, a);
 }
 
 template <typename A, typename B>
-inline bool_if_not_safe_conversion<A, B> equalsOp(A a, B b) {
-    return equalsOpTmpl(a, b);
+bool greaterOrEqualsOp(A a, B b) {
+    if (is_nan(a) || is_nan(b)) {
+        return false;
+    }
+
+    return !lessOp(a, b);
 }
 
 template <typename A, typename B>
-inline bool_if_safe_conversion<A, B> equalsOp(A a, B b) {
-    using LargestType = std::conditional_t<sizeof(A) >= sizeof(B), A, B>;
-    return static_cast<LargestType>(a) == static_cast<LargestType>(b);
-}
-
-template <>
-inline bool equalsOp<doris::vectorized::Float64, doris::vectorized::UInt64>(
-        doris::vectorized::Float64 f, doris::vectorized::UInt64 u) {
-    return static_cast<doris::vectorized::UInt64>(f) == u &&
-           f == static_cast<doris::vectorized::Float64>(u);
-}
-
-template <>
-inline bool equalsOp<doris::vectorized::UInt64, doris::vectorized::Float64>(
-        doris::vectorized::UInt64 u, doris::vectorized::Float64 f) {
-    return u == static_cast<doris::vectorized::UInt64>(f) &&
-           static_cast<doris::vectorized::Float64>(u) == f;
-}
-
-template <>
-inline bool equalsOp<doris::vectorized::Float64, doris::vectorized::Int64>(
-        doris::vectorized::Float64 f, doris::vectorized::Int64 u) {
-    return static_cast<doris::vectorized::Int64>(f) == u &&
-           f == static_cast<doris::vectorized::Float64>(u);
-}
-
-template <>
-inline bool equalsOp<doris::vectorized::Int64, doris::vectorized::Float64>(
-        doris::vectorized::Int64 u, doris::vectorized::Float64 f) {
-    return u == static_cast<doris::vectorized::Int64>(f) &&
-           static_cast<doris::vectorized::Float64>(u) == f;
-}
-
-template <>
-inline bool equalsOp<doris::vectorized::Float32, doris::vectorized::UInt64>(
-        doris::vectorized::Float32 f, doris::vectorized::UInt64 u) {
-    return static_cast<doris::vectorized::UInt64>(f) == u &&
-           f == static_cast<doris::vectorized::Float32>(u);
-}
-
-template <>
-inline bool equalsOp<doris::vectorized::UInt64, doris::vectorized::Float32>(
-        doris::vectorized::UInt64 u, doris::vectorized::Float32 f) {
-    return u == static_cast<doris::vectorized::UInt64>(f) &&
-           static_cast<doris::vectorized::Float32>(u) == f;
-}
-
-template <>
-inline bool equalsOp<doris::vectorized::Float32, doris::vectorized::Int64>(
-        doris::vectorized::Float32 f, doris::vectorized::Int64 u) {
-    return static_cast<doris::vectorized::Int64>(f) == u &&
-           f == static_cast<doris::vectorized::Float32>(u);
-}
-
-template <>
-inline bool equalsOp<doris::vectorized::Int64, doris::vectorized::Float32>(
-        doris::vectorized::Int64 u, doris::vectorized::Float32 f) {
-    return u == static_cast<doris::vectorized::Int64>(f) &&
-           static_cast<doris::vectorized::Float32>(u) == f;
-}
-
-template <>
-inline bool equalsOp<doris::vectorized::UInt128, doris::vectorized::Float64>(
-        doris::vectorized::UInt128 u, doris::vectorized::Float64 f) {
-    return u.low == 0 && equalsOp(static_cast<doris::vectorized::UInt64>(u.high), f);
-}
-
-template <>
-inline bool equalsOp<doris::vectorized::UInt128, doris::vectorized::Float32>(
-        doris::vectorized::UInt128 u, doris::vectorized::Float32 f) {
-    return equalsOp(u, static_cast<doris::vectorized::Float64>(f));
-}
-
-template <>
-inline bool equalsOp<doris::vectorized::Float64, doris::vectorized::UInt128>(
-        doris::vectorized::Float64 f, doris::vectorized::UInt128 u) {
-    return equalsOp(u, f);
-}
-
-template <>
-inline bool equalsOp<doris::vectorized::Float32, doris::vectorized::UInt128>(
-        doris::vectorized::Float32 f, doris::vectorized::UInt128 u) {
-    return equalsOp(static_cast<doris::vectorized::Float64>(f), u);
-}
-
-inline bool greaterOp(doris::vectorized::Int128 i, doris::vectorized::Float64 f) {
-    static constexpr __int128 min_int128 = __int128(0x8000000000000000ll) << 64;
-    static constexpr __int128 max_int128 =
-            (__int128(0x7fffffffffffffffll) << 64) + 0xffffffffffffffffll;
-
-    if (-MAX_INT64_WITH_EXACT_FLOAT64_REPR <= i && i <= MAX_INT64_WITH_EXACT_FLOAT64_REPR) {
-        return static_cast<doris::vectorized::Float64>(i) > f;
+bool lessOrEqualsOp(A a, B b) {
+    if (is_nan(a) || is_nan(b)) {
+        return false;
     }
 
-    return (f < static_cast<doris::vectorized::Float64>(min_int128)) ||
-           (f < static_cast<doris::vectorized::Float64>(max_int128) &&
-            i > static_cast<doris::vectorized::Int128>(f));
-}
-
-inline bool greaterOp(doris::vectorized::Float64 f, doris::vectorized::Int128 i) {
-    static constexpr __int128 min_int128 = __int128(0x8000000000000000ll) << 64;
-    static constexpr __int128 max_int128 =
-            (__int128(0x7fffffffffffffffll) << 64) + 0xffffffffffffffffll;
-
-    if (-MAX_INT64_WITH_EXACT_FLOAT64_REPR <= i && i <= MAX_INT64_WITH_EXACT_FLOAT64_REPR) {
-        return f > static_cast<doris::vectorized::Float64>(i);
-    }
-
-    return (f >= static_cast<doris::vectorized::Float64>(max_int128)) ||
-           (f > static_cast<doris::vectorized::Float64>(min_int128) &&
-            static_cast<doris::vectorized::Int128>(f) > i);
-}
-
-inline bool greaterOp(doris::vectorized::Int128 i, doris::vectorized::Float32 f) {
-    return greaterOp(i, static_cast<doris::vectorized::Float64>(f));
-}
-inline bool greaterOp(doris::vectorized::Float32 f, doris::vectorized::Int128 i) {
-    return greaterOp(static_cast<doris::vectorized::Float64>(f), i);
-}
-
-inline bool equalsOp(doris::vectorized::Int128 i, doris::vectorized::Float64 f) {
-    return i == static_cast<doris::vectorized::Int128>(f) &&
-           static_cast<doris::vectorized::Float64>(i) == f;
-}
-inline bool equalsOp(doris::vectorized::Int128 i, doris::vectorized::Float32 f) {
-    return i == static_cast<doris::vectorized::Int128>(f) &&
-           static_cast<doris::vectorized::Float32>(i) == f;
-}
-inline bool equalsOp(doris::vectorized::Float64 f, doris::vectorized::Int128 i) {
-    return equalsOp(i, f);
-}
-inline bool equalsOp(doris::vectorized::Float32 f, doris::vectorized::Int128 i) {
-    return equalsOp(i, f);
+    return !lessOp(b, a);
 }
 
 template <typename A, typename B>
-inline bool_if_not_safe_conversion<A, B> notEqualsOp(A a, B b) {
+bool equalsOp(A a, B b) {
+    if constexpr (std::is_same_v<A, B>) {
+        return a == b;
+    }
+
+    /// float vs float
+    if constexpr (std::is_floating_point_v<A> && std::is_floating_point_v<B>) {
+        return a == b;
+    }
+
+    /// anything vs NaN
+    if (is_nan(a) || is_nan(b)) {
+        return false;
+    }
+
+    /// int vs int
+    if constexpr (is_integer<A> && is_integer<B>) {
+        /// same signedness
+        if constexpr (is_signed_v<A> == is_signed_v<B>) {
+            return a == b;
+        }
+
+        /// different signedness
+
+        if constexpr (is_signed_v<A> && !is_signed_v<B>) {
+            return a >= 0 && static_cast<std::make_unsigned_t<A>>(a) == b;
+        }
+
+        if constexpr (!is_signed_v<A> && is_signed_v<B>) {
+            return b >= 0 && a == static_cast<std::make_unsigned_t<B>>(b);
+        }
+    }
+
+    /// int vs float
+    if constexpr (is_integer<A> && std::is_floating_point_v<B>) {
+        if constexpr (sizeof(A) <= 4) {
+            return static_cast<double>(a) == static_cast<double>(b);
+        }
+
+        return DecomposedFloat<B>(b).equals(a);
+    }
+
+    if constexpr (std::is_floating_point_v<A> && is_integer<B>) {
+        if constexpr (sizeof(B) <= 4) {
+            return static_cast<double>(a) == static_cast<double>(b);
+        }
+
+        return DecomposedFloat<A>(a).equals(b);
+    }
+
+    /// e.g comparing UUID with integer.
+    return false;
+}
+
+template <typename A, typename B>
+bool notEqualsOp(A a, B b) {
     return !equalsOp(a, b);
 }
 
-template <typename A, typename B>
-inline bool_if_safe_conversion<A, B> notEqualsOp(A a, B b) {
-    return a != b;
-}
-
-template <typename A, typename B>
-inline bool_if_not_safe_conversion<A, B> lessOp(A a, B b) {
-    return greaterOp(b, a);
-}
-
-template <typename A, typename B>
-inline bool_if_safe_conversion<A, B> lessOp(A a, B b) {
-    return a < b;
-}
-
-template <typename A, typename B>
-inline bool_if_not_safe_conversion<A, B> lessOrEqualsOp(A a, B b) {
-    if (is_nan(a) || is_nan(b)) {
-        return false;
-    }
-    return !greaterOp(a, b);
-}
-
-template <typename A, typename B>
-inline bool_if_safe_conversion<A, B> lessOrEqualsOp(A a, B b) {
-    return a <= b;
-}
-
-template <typename A, typename B>
-inline bool_if_not_safe_conversion<A, B> greaterOrEqualsOp(A a, B b) {
-    if (is_nan(a) || is_nan(b)) {
-        return false;
-    }
-    return !greaterOp(b, a);
-}
-
-template <typename A, typename B>
-inline bool_if_safe_conversion<A, B> greaterOrEqualsOp(A a, B b) {
-    return a >= b;
-}
-
 /// Converts numeric to an equal numeric of other type.
 /// When `strict` is `true` check that result exactly same as input, otherwise just check overflow
 template <typename From, typename To, bool strict = true>
diff --git a/be/src/vec/core/call_on_type_index.h b/be/src/vec/core/call_on_type_index.h
index 283f7aeb07..ecc595f88a 100644
--- a/be/src/vec/core/call_on_type_index.h
+++ b/be/src/vec/core/call_on_type_index.h
@@ -72,6 +72,8 @@ bool call_on_basic_type(TypeIndex number, F&& f) {
             return f(TypePair<T, Decimal128>());
         case TypeIndex::Decimal128I:
             return f(TypePair<T, Decimal128I>());
+        case TypeIndex::Decimal256:
+            return f(TypePair<T, Decimal256>());
         default:
             break;
         }
@@ -143,6 +145,9 @@ bool call_on_basic_types(TypeIndex type_num1, TypeIndex type_num2, F&& f) {
         case TypeIndex::Decimal128I:
             return call_on_basic_type<Decimal128I, _int, _float, _decimal, _datetime>(
                     type_num2, std::forward<F>(f));
+        case TypeIndex::Decimal256:
+            return call_on_basic_type<Decimal256, _int, _float, _decimal, _datetime>(
+                    type_num2, std::forward<F>(f));
         default:
             break;
         }
@@ -215,6 +220,8 @@ bool call_on_index_and_data_type(TypeIndex number, F&& f) {
         return f(TypePair<DataTypeDecimal<Decimal128>, T>());
     case TypeIndex::Decimal128I:
         return f(TypePair<DataTypeDecimal<Decimal128I>, T>());
+    case TypeIndex::Decimal256:
+        return f(TypePair<DataTypeDecimal<Decimal256>, T>());
 
     case TypeIndex::Date:
         return f(TypePair<DataTypeDate, T>());
@@ -270,6 +277,8 @@ bool call_on_index_and_number_data_type(TypeIndex number, F&& f) {
         return f(TypePair<DataTypeDecimal<Decimal128>, T>());
     case TypeIndex::Decimal128I:
         return f(TypePair<DataTypeDecimal<Decimal128I>, T>());
+    case TypeIndex::Decimal256:
+        return f(TypePair<DataTypeDecimal<Decimal256>, T>());
     default:
         break;
     }
diff --git a/be/src/vec/core/decimal_comparison.h b/be/src/vec/core/decimal_comparison.h
index 68a083dc15..82cba4fe84 100644
--- a/be/src/vec/core/decimal_comparison.h
+++ b/be/src/vec/core/decimal_comparison.h
@@ -27,6 +27,7 @@
 #include "vec/core/accurate_comparison.h"
 #include "vec/core/block.h"
 #include "vec/core/call_on_type_index.h"
+#include "vec/core/types.h"
 #include "vec/data_types/data_type_decimal.h"
 #include "vec/functions/function_helpers.h" /// todo core should not depend on function"
 
@@ -53,6 +54,10 @@ template <>
 struct ConstructDecInt<16> {
     using Type = Int128;
 };
+template <>
+struct ConstructDecInt<32> {
+    using Type = Int256;
+};
 
 template <typename T, typename U>
 struct DecCompareInt {
@@ -99,18 +104,22 @@ public:
     }
 
     static bool compare(A a, B b, UInt32 scale_a, UInt32 scale_b) {
-        static const UInt32 max_scale = max_decimal_precision<Decimal128>();
+        static const UInt32 max_scale = max_decimal_precision<Decimal256>();
         if (scale_a > max_scale || scale_b > max_scale) {
             LOG(FATAL) << "Bad scale of decimal field";
         }
 
         Shift shift;
-        if (scale_a < scale_b)
+        if (scale_a < scale_b) {
             shift.a = DataTypeDecimal<B>(max_decimal_precision<B>(), scale_b)
-                              .get_scale_multiplier(scale_b - scale_a);
-        if (scale_a > scale_b)
+                              .get_scale_multiplier(scale_b - scale_a)
+                              .value;
+        }
+        if (scale_a > scale_b) {
             shift.b = DataTypeDecimal<A>(max_decimal_precision<A>(), scale_a)
-                              .get_scale_multiplier(scale_a - scale_b);
+                              .get_scale_multiplier(scale_a - scale_b)
+                              .value;
+        }
 
         return apply_with_scale(a, b, shift);
     }
@@ -145,12 +154,12 @@ private:
             using Type = std::conditional_t<sizeof(T) >= sizeof(U), T, U>;
             auto type_ptr = decimal_result_type(*decimal0, *decimal1, false, false, false);
             const DataTypeDecimal<Type>* result_type = check_decimal<Type>(*type_ptr);
-            shift.a = result_type->scale_factor_for(*decimal0, false);
-            shift.b = result_type->scale_factor_for(*decimal1, false);
+            shift.a = result_type->scale_factor_for(*decimal0, false).value;
+            shift.b = result_type->scale_factor_for(*decimal1, false).value;
         } else if (decimal0) {
-            shift.b = decimal0->get_scale_multiplier();
+            shift.b = decimal0->get_scale_multiplier().value;
         } else if (decimal1) {
-            shift.a = decimal1->get_scale_multiplier();
+            shift.a = decimal1->get_scale_multiplier().value;
         }
 
         return shift;
@@ -161,7 +170,9 @@ private:
     static Shift getScales(const DataTypePtr& left_type, const DataTypePtr&) {
         Shift shift;
         const DataTypeDecimal<T>* decimal0 = check_decimal<T>(*left_type);
-        if (decimal0) shift.b = decimal0->get_scale_multiplier();
+        if (decimal0) {
+            shift.b = decimal0->get_scale_multiplier().value;
+        }
         return shift;
     }
 
@@ -170,7 +181,9 @@ private:
     static Shift getScales(const DataTypePtr&, const DataTypePtr& right_type) {
         Shift shift;
         const DataTypeDecimal<U>* decimal1 = check_decimal<U>(*right_type);
-        if (decimal1) shift.a = decimal1->get_scale_multiplier();
+        if (decimal1) {
+            shift.a = decimal1->get_scale_multiplier().value;
+        }
         return shift;
     }
 
diff --git a/be/src/vec/core/decomposed_float.h b/be/src/vec/core/decomposed_float.h
new file mode 100644
index 0000000000..a4784e3f13
--- /dev/null
+++ b/be/src/vec/core/decomposed_float.h
@@ -0,0 +1,219 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/base/base/DecomposedFloat.h
+// and modified by Doris
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+
+#include "extended_types.h"
+
+/// Allows to check the internals of IEEE-754 floating point number.
+
+template <typename T>
+struct FloatTraits;
+
+template <>
+struct FloatTraits<float> {
+    using UInt = uint32_t;
+    static constexpr size_t bits = 32;
+    static constexpr size_t exponent_bits = 8;
+    static constexpr size_t mantissa_bits = bits - exponent_bits - 1;
+};
+
+template <>
+struct FloatTraits<double> {
+    using UInt = uint64_t;
+    static constexpr size_t bits = 64;
+    static constexpr size_t exponent_bits = 11;
+    static constexpr size_t mantissa_bits = bits - exponent_bits - 1;
+};
+
+/// x = sign * (2 ^ normalized_exponent) * (1 + mantissa * 2 ^ -mantissa_bits)
+/// x = sign * (2 ^ normalized_exponent + mantissa * 2 ^ (normalized_exponent - mantissa_bits))
+template <typename T>
+struct DecomposedFloat {
+    using Traits = FloatTraits<T>;
+
+    explicit DecomposedFloat(T x) { memcpy(&x_uint, &x, sizeof(x)); }
+
+    typename Traits::UInt x_uint;
+
+    bool isNegative() const { return x_uint >> (Traits::bits - 1); }
+
+    /// Returns 0 for both +0. and -0.
+    int sign() const { return (exponent() == 0 && mantissa() == 0) ? 0 : (isNegative() ? -1 : 1); }
+
+    uint16_t exponent() const {
+        return (x_uint >> (Traits::mantissa_bits)) &
+               (((1ull << (Traits::exponent_bits + 1)) - 1) >> 1);
+    }
+
+    int16_t normalizedExponent() const {
+        return int16_t(exponent()) - ((1ull << (Traits::exponent_bits - 1)) - 1);
+    }
+
+    uint64_t mantissa() const { return x_uint & ((1ull << Traits::mantissa_bits) - 1); }
+
+    int64_t mantissaWithSign() const { return isNegative() ? -mantissa() : mantissa(); }
+
+    /// NOTE Probably floating point instructions can be better.
+    bool isIntegerInRepresentableRange() const {
+        return x_uint == 0 ||
+               (normalizedExponent() >= 0 /// The number is not less than one
+                /// The number is inside the range where every integer has exact representation in float
+                && normalizedExponent() <= static_cast<int16_t>(Traits::mantissa_bits)
+                /// After multiplying by 2^exp, the fractional part becomes zero, means the number is integer
+                && ((mantissa() & ((1ULL << (Traits::mantissa_bits - normalizedExponent())) - 1)) ==
+                    0));
+    }
+
+    /// Compare float with integer of arbitrary width (both signed and unsigned are supported). Assuming two's complement arithmetic.
+    /// This function is generic, big integers (128, 256 bit) are supported as well.
+    /// Infinities are compared correctly. NaNs are treat similarly to infinities, so they can be less than all numbers.
+    /// (note that we need total order)
+    /// Returns -1, 0 or 1.
+    template <typename Int>
+    int compare(Int rhs) const {
+        if (rhs == 0) {
+            return sign();
+        }
+
+        /// Different signs
+        if (isNegative() && rhs > 0) {
+            return -1;
+        }
+        if (!isNegative() && rhs < 0) {
+            return 1;
+        }
+
+        /// Fractional number with magnitude less than one
+        if (normalizedExponent() < 0) {
+            if (!isNegative()) {
+                return rhs > 0 ? -1 : 1;
+            }
+            return rhs >= 0 ? -1 : 1;
+        }
+
+        /// The case of the most negative integer
+        if constexpr (is_signed_v<Int>) {
+            if (rhs == std::numeric_limits<Int>::lowest()) {
+                assert(isNegative());
+
+                if (normalizedExponent() <
+                    static_cast<int16_t>(8 * sizeof(Int) - is_signed_v<Int>)) {
+                    return 1;
+                }
+                if (normalizedExponent() >
+                    static_cast<int16_t>(8 * sizeof(Int) - is_signed_v<Int>)) {
+                    return -1;
+                }
+
+                if (mantissa() == 0) {
+                    return 0;
+                }
+                return -1;
+            }
+        }
+
+        /// Too large number: abs(float) > abs(rhs). Also the case with infinities and NaN.
+        if (normalizedExponent() >= static_cast<int16_t>(8 * sizeof(Int) - is_signed_v<Int>)) {
+            return isNegative() ? -1 : 1;
+        }
+
+        using UInt = std::conditional_t<(sizeof(Int) > sizeof(typename Traits::UInt)),
+                                        std::make_unsigned_t<Int>, typename Traits::UInt>;
+        UInt uint_rhs = rhs < 0 ? -rhs : rhs;
+
+        /// Smaller octave: abs(rhs) < abs(float)
+        /// FYI, TIL: octave is also called "binade", https://en.wikipedia.org/wiki/Binade
+        if (uint_rhs < (static_cast<UInt>(1) << normalizedExponent())) {
+            return isNegative() ? -1 : 1;
+        }
+
+        /// Larger octave: abs(rhs) > abs(float)
+        if (normalizedExponent() + 1 < static_cast<int16_t>(8 * sizeof(Int) - is_signed_v<Int>) &&
+            uint_rhs >= (static_cast<UInt>(1) << (normalizedExponent() + 1))) {
+            return isNegative() ? 1 : -1;
+        }
+
+        /// The same octave
+        /// uint_rhs == 2 ^ normalizedExponent + mantissa * 2 ^ (normalizedExponent - mantissa_bits)
+
+        bool large_and_always_integer =
+                normalizedExponent() >= static_cast<int16_t>(Traits::mantissa_bits);
+
+        UInt a = large_and_always_integer
+                         ? static_cast<UInt>(mantissa())
+                                   << (normalizedExponent() - Traits::mantissa_bits)
+                         : static_cast<UInt>(mantissa()) >>
+                                   (Traits::mantissa_bits - normalizedExponent());
+
+        UInt b = uint_rhs - (static_cast<UInt>(1) << normalizedExponent());
+
+        if (a < b) {
+            return isNegative() ? 1 : -1;
+        }
+        if (a > b) {
+            return isNegative() ? -1 : 1;
+        }
+
+        /// Float has no fractional part means that the numbers are equal.
+        if (large_and_always_integer ||
+            (mantissa() & ((1ULL << (Traits::mantissa_bits - normalizedExponent())) - 1)) == 0) {
+            return 0;
+        }
+        /// Float has fractional part means its abs value is larger.
+        return isNegative() ? -1 : 1;
+    }
+
+    template <typename Int>
+    bool equals(Int rhs) const {
+        return compare(rhs) == 0;
+    }
+
+    template <typename Int>
+    bool notEquals(Int rhs) const {
+        return compare(rhs) != 0;
+    }
+
+    template <typename Int>
+    bool less(Int rhs) const {
+        return compare(rhs) < 0;
+    }
+
+    template <typename Int>
+    bool greater(Int rhs) const {
+        return compare(rhs) > 0;
+    }
+
+    template <typename Int>
+    bool lessOrEquals(Int rhs) const {
+        return compare(rhs) <= 0;
+    }
+
+    template <typename Int>
+    bool greaterOrEquals(Int rhs) const {
+        return compare(rhs) >= 0;
+    }
+};
+
+using DecomposedFloat64 = DecomposedFloat<double>;
+using DecomposedFloat32 = DecomposedFloat<float>;
diff --git a/be/src/vec/core/extended_types.h b/be/src/vec/core/extended_types.h
new file mode 100644
index 0000000000..d7e088dd45
--- /dev/null
+++ b/be/src/vec/core/extended_types.h
@@ -0,0 +1,86 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/base/base/extended_types.h
+// and modified by Doris
+#pragma once
+
+#include <type_traits>
+
+#include "wide_integer.h"
+
+using Int256 = wide::integer<256, signed>;
+using UInt256 = wide::integer<256, unsigned>;
+
+static_assert(sizeof(Int256) == 32);
+static_assert(sizeof(UInt256) == 32);
+
+/// The standard library type traits, such as std::is_arithmetic, with one exception
+/// (std::common_type), are "set in stone". Attempting to specialize them causes undefined behavior.
+/// So instead of using the std type_traits, we use our own version which allows extension.
+template <typename T>
+struct is_signed // NOLINT(readability-identifier-naming)
+{
+    static constexpr bool value = std::is_signed_v<T>;
+};
+
+template <>
+struct is_signed<Int256> {
+    static constexpr bool value = true;
+};
+
+template <typename T>
+inline constexpr bool is_signed_v = is_signed<T>::value;
+
+template <typename T>
+struct is_unsigned // NOLINT(readability-identifier-naming)
+{
+    static constexpr bool value = std::is_unsigned_v<T>;
+};
+
+template <typename T>
+inline constexpr bool is_unsigned_v = is_unsigned<T>::value;
+
+template <class T>
+concept is_integer =
+        std::is_integral_v<T> || std::is_same_v<T, Int256> || std::is_same_v<T, UInt256>;
+
+namespace std {
+template <>
+struct make_unsigned<Int256> {
+    using type = UInt256;
+};
+template <>
+struct make_unsigned<UInt256> {
+    using type = UInt256;
+};
+
+template <typename T>
+using make_unsigned_t = typename make_unsigned<T>::type;
+
+template <>
+struct make_signed<Int256> {
+    using type = Int256;
+};
+template <>
+struct make_signed<UInt256> {
+    using type = Int256;
+};
+
+template <typename T>
+using make_signed_t = typename make_signed<T>::type;
+} // namespace std
\ No newline at end of file
diff --git a/be/src/vec/core/field.cpp b/be/src/vec/core/field.cpp
index 9970b284ce..337c2c395f 100644
--- a/be/src/vec/core/field.cpp
+++ b/be/src/vec/core/field.cpp
@@ -170,6 +170,7 @@ bool dec_less_or_equal(T x, T y, UInt32 x_scale, UInt32 y_scale) {
 DECLARE_DECIMAL_COMPARISON(Decimal32)
 DECLARE_DECIMAL_COMPARISON(Decimal64)
 DECLARE_DECIMAL_COMPARISON(Decimal128)
+DECLARE_DECIMAL_COMPARISON(Decimal256)
 
 template <>
 bool decimal_equal(Decimal128I x, Decimal128I y, UInt32 xs, UInt32 ys) {
diff --git a/be/src/vec/core/field.h b/be/src/vec/core/field.h
index 9aadfe2a0a..6b9a2fc352 100644
--- a/be/src/vec/core/field.h
+++ b/be/src/vec/core/field.h
@@ -87,7 +87,12 @@ struct AvgNearestFieldTypeTrait<Decimal128> {
 
 template <>
 struct AvgNearestFieldTypeTrait<Decimal128I> {
-    using Type = Decimal128;
+    using Type = Decimal128I;
+};
+
+template <>
+struct AvgNearestFieldTypeTrait<Decimal256> {
+    using Type = Decimal256;
 };
 
 template <>
@@ -95,6 +100,41 @@ struct AvgNearestFieldTypeTrait<Int64> {
     using Type = double;
 };
 
+template <typename T>
+struct AvgNearestFieldTypeTrait256 {
+    using Type = typename NearestFieldTypeImpl<T>::Type;
+};
+
+template <>
+struct AvgNearestFieldTypeTrait256<Decimal32> {
+    using Type = Decimal256;
+};
+
+template <>
+struct AvgNearestFieldTypeTrait256<Decimal64> {
+    using Type = Decimal256;
+};
+
+template <>
+struct AvgNearestFieldTypeTrait256<Decimal128> {
+    using Type = Decimal128;
+};
+
+template <>
+struct AvgNearestFieldTypeTrait256<Decimal128I> {
+    using Type = Decimal256;
+};
+
+template <>
+struct AvgNearestFieldTypeTrait256<Decimal256> {
+    using Type = Decimal256;
+};
+
+template <>
+struct AvgNearestFieldTypeTrait256<Int64> {
+    using Type = double;
+};
+
 class Field;
 
 using FieldVector = std::vector<Field>;
@@ -319,6 +359,8 @@ public:
             Bitmap = 27,
             HyperLogLog = 28,
             QuantileState = 29,
+            Int256 = 30,
+            Decimal256 = 31,
         };
 
         static const int MIN_NON_POD = 16;
@@ -355,6 +397,8 @@ public:
                 return "Decimal128";
             case Decimal128I:
                 return "Decimal128I";
+            case Decimal256:
+                return "Decimal256";
             case FixedLengthObject:
                 return "FixedLengthObject";
             case VariantMap:
@@ -380,7 +424,7 @@ public:
 
     static bool is_decimal(Types::Which which) {
         return (which >= Types::Decimal32 && which <= Types::Decimal128) ||
-               which == Types::Decimal128I;
+               which == Types::Decimal128I || which == Types::Decimal256;
     }
 
     Field() : which(Types::Null) {}
@@ -551,6 +595,8 @@ public:
             return get<Decimal128>() <=> rhs.get<Decimal128>();
         case Types::Decimal128I:
             return get<Decimal128I>() <=> rhs.get<Decimal128I>();
+        case Types::Decimal256:
+            return get<Decimal256>() <=> rhs.get<Decimal256>();
         default:
             LOG(FATAL) << "lhs type not equal with rhs, lhs=" << Types::to_string(which)
                        << ", rhs=" << Types::to_string(rhs.which);
@@ -562,7 +608,8 @@ private:
     std::aligned_union_t<DBMS_MIN_FIELD_SIZE - sizeof(Types::Which), Null, UInt64, UInt128, Int64,
                          Int128, Float64, String, JsonbField, Array, Tuple, Map, VariantMap,
                          DecimalField<Decimal32>, DecimalField<Decimal64>, DecimalField<Decimal128>,
-                         DecimalField<Decimal128I>, BitmapValue, HyperLogLog, QuantileState>
+                         DecimalField<Decimal128I>, DecimalField<Decimal256>, BitmapValue,
+                         HyperLogLog, QuantileState>
             storage;
 
     Types::Which which;
@@ -640,6 +687,9 @@ private:
         case Types::Decimal128I:
             f(field.template get<DecimalField<Decimal128I>>());
             return;
+        case Types::Decimal256:
+            f(field.template get<DecimalField<Decimal256>>());
+            return;
         case Types::VariantMap:
             f(field.template get<VariantMap>());
             return;
@@ -753,6 +803,10 @@ struct TypeId<DecimalField<Decimal128I>> {
     static constexpr const TypeIndex value = TypeIndex::Decimal128I;
 };
 template <>
+struct TypeId<DecimalField<Decimal256>> {
+    static constexpr const TypeIndex value = TypeIndex::Decimal256;
+};
+template <>
 struct Field::TypeToEnum<Null> {
     static constexpr Types::Which value = Types::Null;
 };
@@ -773,6 +827,10 @@ struct Field::TypeToEnum<Int128> {
     static constexpr Types::Which value = Types::Int128;
 };
 template <>
+struct Field::TypeToEnum<Int256> {
+    static constexpr Types::Which value = Types::Int256;
+};
+template <>
 struct Field::TypeToEnum<Float64> {
     static constexpr Types::Which value = Types::Float64;
 };
@@ -813,6 +871,10 @@ struct Field::TypeToEnum<DecimalField<Decimal128I>> {
     static constexpr Types::Which value = Types::Decimal128I;
 };
 template <>
+struct Field::TypeToEnum<DecimalField<Decimal256>> {
+    static constexpr Types::Which value = Types::Decimal256;
+};
+template <>
 struct Field::TypeToEnum<VariantMap> {
     static constexpr Types::Which value = Types::VariantMap;
 };
@@ -893,6 +955,10 @@ struct Field::EnumToType<Field::Types::Decimal128I> {
     using Type = DecimalField<Decimal128I>;
 };
 template <>
+struct Field::EnumToType<Field::Types::Decimal256> {
+    using Type = DecimalField<Decimal256>;
+};
+template <>
 struct Field::EnumToType<Field::Types::VariantMap> {
     using Type = VariantMap;
 };
@@ -993,6 +1059,10 @@ struct NearestFieldTypeImpl<Decimal128I> {
     using Type = DecimalField<Decimal128I>;
 };
 template <>
+struct NearestFieldTypeImpl<Decimal256> {
+    using Type = DecimalField<Decimal256>;
+};
+template <>
 struct NearestFieldTypeImpl<DecimalField<Decimal32>> {
     using Type = DecimalField<Decimal32>;
 };
@@ -1009,6 +1079,10 @@ struct NearestFieldTypeImpl<DecimalField<Decimal128I>> {
     using Type = DecimalField<Decimal128I>;
 };
 template <>
+struct NearestFieldTypeImpl<DecimalField<Decimal256>> {
+    using Type = DecimalField<Decimal256>;
+};
+template <>
 struct NearestFieldTypeImpl<Float32> {
     using Type = Float64;
 };
diff --git a/be/src/vec/core/types.h b/be/src/vec/core/types.h
index abb5c9255c..f24a85b1f3 100644
--- a/be/src/vec/core/types.h
+++ b/be/src/vec/core/types.h
@@ -23,11 +23,16 @@
 #include <cstdint>
 #include <limits>
 #include <string>
+#include <type_traits>
 #include <vector>
 
 #include "common/consts.h"
 #include "util/binary_cast.hpp"
 #include "vec/common/int_exp.h"
+#include "vec/core/wide_integer.h"
+#include "vec/core/wide_integer_to_string.h"
+
+using wide::Int256;
 
 namespace doris {
 
@@ -92,7 +97,9 @@ enum class TypeIndex {
     VARIANT = 41,
     QuantileState = 42,
     Time = 43,
-    AggState
+    AggState = 44,
+    Decimal256 = 45,
+    Int256
 };
 
 struct Consted {
@@ -277,10 +284,21 @@ struct TypeName<Int128> {
     static const char* get() { return "Int128"; }
 };
 template <>
+inline constexpr bool IsNumber<Int256> = true;
+template <>
+struct TypeName<Int256> {
+    static const char* get() { return "Int256"; }
+};
+template <>
 struct TypeId<Int128> {
     static constexpr const TypeIndex value = TypeIndex::Int128;
 };
 
+template <>
+struct TypeId<Int256> {
+    static constexpr const TypeIndex value = TypeIndex::Int256;
+};
+
 using Date = Int64;
 using DateTime = Int64;
 using DateV2 = UInt32;
@@ -300,11 +318,18 @@ template <>
 inline constexpr Int128 decimal_scale_multiplier<Int128>(UInt32 scale) {
     return common::exp10_i128(scale);
 }
+// gcc report error if add constexpr in declaration
+template <>
+inline Int256 decimal_scale_multiplier<Int256>(UInt32 scale) {
+    return common::exp10_i256(scale);
+}
 
 /// Own FieldType for Decimal.
 /// It is only a "storage" for decimal. To perform operations, you also have to provide a scale (number of digits after point).
 template <typename T>
 struct Decimal {
+    static_assert(std::is_same_v<T, Int32> || std::is_same_v<T, Int64> ||
+                  std::is_same_v<T, Int128>);
     using NativeType = T;
 
     Decimal() = default;
@@ -314,6 +339,7 @@ struct Decimal {
 #define DECLARE_NUMERIC_CTOR(TYPE) \
     Decimal(const TYPE& value_) : value(value_) {}
 
+    DECLARE_NUMERIC_CTOR(Int256)
     DECLARE_NUMERIC_CTOR(Int128)
     DECLARE_NUMERIC_CTOR(Int32)
     DECLARE_NUMERIC_CTOR(Int64)
@@ -348,6 +374,12 @@ struct Decimal {
 
     operator T() const { return value; }
 
+    operator wide::Int256() const {
+        wide::Int256 result;
+        wide::Int256::_impl::wide_integer_from_builtin(result, value);
+        return result;
+    }
+
     const Decimal<T>& operator++() {
         value++;
         return *this;
@@ -384,8 +416,11 @@ struct Decimal {
         constexpr auto precision =
                 std::is_same_v<T, Int32>
                         ? BeConsts::MAX_DECIMAL32_PRECISION
-                        : (std::is_same_v<T, Int64> ? BeConsts::MAX_DECIMAL64_PRECISION
-                                                    : BeConsts::MAX_DECIMAL128_PRECISION);
+                        : (std::is_same_v<T, Int64>
+                                   ? BeConsts::MAX_DECIMAL64_PRECISION
+                                   : (std::is_same_v<T, __int128>
+                                              ? BeConsts::MAX_DECIMAL128_PRECISION
+                                              : BeConsts::MAX_DECIMAL256_PRECISION));
         return precision + 1 // Add a space for decimal place
                + 1           // Add a space for leading 0
                + 1;          // Add a space for negative sign
@@ -393,18 +428,27 @@ struct Decimal {
 
     std::string to_string(UInt32 scale) const {
         if (value == std::numeric_limits<T>::min()) {
-            fmt::memory_buffer buffer;
-            fmt::format_to(buffer, "{}", value);
-            std::string res {buffer.data(), buffer.size()};
-            res.insert(res.size() - scale, ".");
-            return res;
+            if constexpr (std::is_same_v<T, Int256>) {
+                std::string res {wide::to_string(value)};
+                res.insert(res.size() - scale, ".");
+                return res;
+            } else {
+                fmt::memory_buffer buffer;
+                fmt::format_to(buffer, "{}", value);
+                std::string res {buffer.data(), buffer.size()};
+                res.insert(res.size() - scale, ".");
+                return res;
+            }
         }
 
         static constexpr auto precision =
                 std::is_same_v<T, Int32>
                         ? BeConsts::MAX_DECIMAL32_PRECISION
-                        : (std::is_same_v<T, Int64> ? BeConsts::MAX_DECIMAL64_PRECISION
-                                                    : BeConsts::MAX_DECIMAL128_PRECISION);
+                        : (std::is_same_v<T, Int64>
+                                   ? BeConsts::MAX_DECIMAL64_PRECISION
+                                   : (std::is_same_v<T, __int128>
+                                              ? BeConsts::MAX_DECIMAL128_PRECISION
+                                              : BeConsts::MAX_DECIMAL256_PRECISION));
         bool is_nagetive = value < 0;
         int max_result_length = precision + (scale > 0) // Add a space for decimal place
                                 + (scale == precision)  // Add a space for leading 0
@@ -425,14 +469,20 @@ struct Decimal {
             whole_part = abs_value / decimal_scale_multiplier<T>(scale);
             frac_part = abs_value % decimal_scale_multiplier<T>(scale);
         }
-        auto end = fmt::format_to(str.data() + pos, "{}", whole_part);
-        pos = end - str.data();
+        if constexpr (std::is_same_v<T, Int256>) {
+            std::string num_str {wide::to_string(whole_part)};
+            auto end = fmt::format_to(str.data() + pos, "{}", num_str);
+            pos = end - str.data();
+        } else {
+            auto end = fmt::format_to(str.data() + pos, "{}", whole_part);
+            pos = end - str.data();
+        }
 
         if (scale) {
             str[pos++] = '.';
             for (auto end_pos = pos + scale - 1; end_pos >= pos && frac_part > 0;
                  --end_pos, frac_part /= 10) {
-                str[end_pos] += frac_part % 10;
+                str[end_pos] += (int)(frac_part % 10);
             }
         }
 
@@ -450,8 +500,15 @@ struct Decimal {
     __attribute__((always_inline)) size_t to_string(char* dst, UInt32 scale,
                                                     const T& scale_multiplier) const {
         if (UNLIKELY(value == std::numeric_limits<T>::min())) {
-            auto end = fmt::format_to(dst, "{}", value);
-            return end - dst;
+            if constexpr (std::is_same_v<T, Int256>) {
+                // handle scale?
+                std::string num_str {wide::to_string(value)};
+                auto end = fmt::format_to(dst, "{}", num_str);
+                return end - dst;
+            } else {
+                auto end = fmt::format_to(dst, "{}", value);
+                return end - dst;
+            }
         }
 
         bool is_negative = value < 0;
@@ -469,8 +526,14 @@ struct Decimal {
             whole_part = abs_value / scale_multiplier;
             frac_part = abs_value % scale_multiplier;
         }
-        auto end = fmt::format_to(dst + pos, "{}", whole_part);
-        pos = end - dst;
+        if constexpr (std::is_same_v<T, Int256>) {
+            std::string num_str {wide::to_string(whole_part)};
+            auto end = fmt::format_to(dst + pos, "{}", num_str);
+            pos = end - dst;
+        } else {
+            auto end = fmt::format_to(dst + pos, "{}", whole_part);
+            pos = end - dst;
+        }
 
         if (LIKELY(scale)) {
             int low_scale = 0;
@@ -490,8 +553,14 @@ struct Decimal {
                 pos += scale - low_scale;
             }
             if (frac_part) {
-                end = fmt::format_to(&dst[pos], "{}", frac_part);
-                pos = end - dst;
+                if constexpr (std::is_same_v<T, Int256>) {
+                    std::string num_str {wide::to_string(whole_part)};
+                    auto end = fmt::format_to(&dst[pos], "{}", num_str);
+                    pos = end - dst;
+                } else {
+                    auto end = fmt::format_to(&dst[pos], "{}", frac_part);
+                    pos = end - dst;
+                }
             }
         }
 
@@ -507,6 +576,7 @@ struct Decimal128I : public Decimal<Int128> {
 #define DECLARE_NUMERIC_CTOR(TYPE) \
     Decimal128I(const TYPE& value_) : Decimal<Int128>(value_) {}
 
+    DECLARE_NUMERIC_CTOR(Int256)
     DECLARE_NUMERIC_CTOR(Int128)
     DECLARE_NUMERIC_CTOR(Int32)
     DECLARE_NUMERIC_CTOR(Int64)
@@ -522,9 +592,289 @@ struct Decimal128I : public Decimal<Int128> {
     }
 };
 
+template <>
+struct Decimal<Int256> {
+    using T = Int256;
+    using NativeType = Int256;
+
+    Decimal() = default;
+    Decimal(Decimal<T>&&) = default;
+    Decimal(const Decimal<T>&) = default;
+
+#define DECLARE_NUMERIC_CTOR(TYPE) \
+    explicit Decimal(const TYPE& value_) : value(value_) {}
+
+    DECLARE_NUMERIC_CTOR(Int256)
+    DECLARE_NUMERIC_CTOR(Int128)
+    DECLARE_NUMERIC_CTOR(Int32)
+    DECLARE_NUMERIC_CTOR(Int64)
+    DECLARE_NUMERIC_CTOR(UInt32)
+    DECLARE_NUMERIC_CTOR(UInt64)
+
+#undef DECLARE_NUMERIC_CTOR
+
+    explicit Decimal(const Float32& value_) : value(value_) {
+        if constexpr (std::is_integral<T>::value) {
+            value = round(value_);
+        }
+    }
+    explicit Decimal(const Float64& value_) : value(value_) {
+        if constexpr (std::is_integral<T>::value) {
+            value = round(value_);
+        }
+    }
+
+    static Decimal double_to_decimal(double value_) {
+        DecimalV2Value decimal_value;
+        decimal_value.assign_from_double(value_);
+        return Decimal(binary_cast<DecimalV2Value, T>(decimal_value));
+    }
+
+    template <typename U>
+    explicit Decimal(const Decimal<U>& x) {
+        value = x.value;
+    }
+
+    constexpr Decimal<T>& operator=(Decimal<T>&&) = default;
+    constexpr Decimal<T>& operator=(const Decimal<T>&) = default;
+
+    operator T() const { return value; }
+
+    operator Int128() const { return (Int128)value.items[0] + ((Int128)(value.items[1]) << 64); }
+
+    const Decimal<T>& operator++() {
+        value++;
+        return *this;
+    }
+    const Decimal<T>& operator--() {
+        value--;
+        return *this;
+    }
+
+    const Decimal<T>& operator+=(const T& x) {
+        value += x;
+        return *this;
+    }
+    const Decimal<T>& operator-=(const T& x) {
+        value -= x;
+        return *this;
+    }
+    const Decimal<T>& operator*=(const T& x) {
+        value *= x;
+        return *this;
+    }
+    const Decimal<T>& operator/=(const T& x) {
+        value /= x;
+        return *this;
+    }
+    const Decimal<T>& operator%=(const T& x) {
+        value %= x;
+        return *this;
+    }
+
+    static constexpr int max_string_length() {
+        constexpr auto precision =
+                std::is_same_v<T, Int32>
+                        ? BeConsts::MAX_DECIMAL32_PRECISION
+                        : (std::is_same_v<T, Int64>
+                                   ? BeConsts::MAX_DECIMAL64_PRECISION
+                                   : (std::is_same_v<T, Int128>
+                                              ? BeConsts::MAX_DECIMAL128_PRECISION
+                                              : BeConsts::MAX_DECIMAL256_PRECISION));
+        return precision + 1 // Add a space for decimal place
+               + 1           // Add a space for leading 0
+               + 1;          // Add a space for negative sign
+    }
+
+    std::string to_string(UInt32 scale) const {
+        if (value == std::numeric_limits<T>::min()) {
+            if constexpr (std::is_same_v<T, Int256>) {
+                std::string res {wide::to_string(value)};
+                res.insert(res.size() - scale, ".");
+                return res;
+            } else {
+                fmt::memory_buffer buffer;
+                fmt::format_to(buffer, "{}", value);
+                std::string res {buffer.data(), buffer.size()};
+                res.insert(res.size() - scale, ".");
+                return res;
+            }
+        }
+
+        static constexpr auto precision =
+                std::is_same_v<T, Int32>
+                        ? BeConsts::MAX_DECIMAL32_PRECISION
+                        : (std::is_same_v<T, Int64>
+                                   ? BeConsts::MAX_DECIMAL64_PRECISION
+                                   : (std::is_same_v<T, Int128>
+                                              ? BeConsts::MAX_DECIMAL128_PRECISION
+                                              : BeConsts::MAX_DECIMAL256_PRECISION));
+        bool is_nagetive = value < 0;
+        int max_result_length = precision + (scale > 0) // Add a space for decimal place
+                                + (scale == precision)  // Add a space for leading 0
+                                + (is_nagetive);        // Add a space for negative sign
+        std::string str = std::string(max_result_length, '0');
+
+        T abs_value = value;
+        int pos = 0;
+
+        if (is_nagetive) {
+            abs_value = -value;
+            str[pos++] = '-';
+        }
+
+        T whole_part = abs_value;
+        T frac_part;
+        if (scale) {
+            whole_part = abs_value / decimal_scale_multiplier<T>(scale);
+            frac_part = abs_value % decimal_scale_multiplier<T>(scale);
+        }
+        if constexpr (std::is_same_v<T, Int256>) {
+            std::string num_str {wide::to_string(whole_part)};
+            auto end = fmt::format_to(str.data() + pos, "{}", num_str);
+            pos = end - str.data();
+        } else {
+            auto end = fmt::format_to(str.data() + pos, "{}", whole_part);
+            pos = end - str.data();
+        }
+
+        if (scale) {
+            str[pos++] = '.';
+            for (auto end_pos = pos + scale - 1; end_pos >= pos && frac_part > 0;
+                 --end_pos, frac_part /= 10) {
+                str[end_pos] += (int)(frac_part % 10);
+            }
+        }
+
+        str.resize(pos + scale);
+        return str;
+    }
+
+    /**
+     * Got the string representation of a decimal.
+     * @param dst Store the result, should be pre-allocated.
+     * @param scale Decimal's scale.
+     * @param scale_multiplier Decimal's scale multiplier.
+     * @return The length of string.
+     */
+    __attribute__((always_inline)) size_t to_string(char* dst, UInt32 scale,
+                                                    const T& scale_multiplier) const {
+        if (UNLIKELY(value == std::numeric_limits<T>::min())) {
+            if constexpr (std::is_same_v<T, Int256>) {
+                std::string num_str {wide::to_string(value)};
+                auto end = fmt::format_to(dst, "{}", num_str);
+                return end - dst;
+            } else {
+                auto end = fmt::format_to(dst, "{}", value);
+                return end - dst;
+            }
+        }
+
+        bool is_negative = value < 0;
+        T abs_value = value;
+        int pos = 0;
+
+        if (is_negative) {
+            abs_value = -value;
+            dst[pos++] = '-';
+        }
+
+        T whole_part = abs_value;
+        T frac_part;
+        if (LIKELY(scale)) {
+            whole_part = abs_value / scale_multiplier;
+            frac_part = abs_value % scale_multiplier;
+        }
+        if constexpr (std::is_same_v<T, Int256>) {
+            std::string num_str {wide::to_string(whole_part)};
+            auto end = fmt::format_to(dst + pos, "{}", num_str);
+            pos = end - dst;
+        } else {
+            auto end = fmt::format_to(dst + pos, "{}", whole_part);
+            pos = end - dst;
+        }
+
+        if (LIKELY(scale)) {
+            int low_scale = 0;
+            int high_scale = scale;
+            while (low_scale < high_scale) {
+                int mid_scale = (high_scale + low_scale) >> 1;
+                const auto mid_scale_factor = decimal_scale_multiplier<T>(mid_scale);
+                if (mid_scale_factor <= frac_part) {
+                    low_scale = mid_scale + 1;
+                } else {
+                    high_scale = mid_scale;
+                }
+            }
+            dst[pos++] = '.';
+            if (low_scale < scale) {
+                memset(&dst[pos], '0', scale - low_scale);
+                pos += scale - low_scale;
+            }
+            if (frac_part) {
+                if constexpr (std::is_same_v<T, Int256>) {
+                    std::string num_str {wide::to_string(frac_part)};
+                    auto end = fmt::format_to(dst + pos, "{}", num_str);
+                    pos = end - dst;
+                } else {
+                    auto end = fmt::format_to(&dst[pos], "{}", frac_part);
+                    pos = end - dst;
+                }
+            }
+        }
+
+        return pos;
+    }
+
+    T value;
+};
+
 using Decimal32 = Decimal<Int32>;
 using Decimal64 = Decimal<Int64>;
 using Decimal128 = Decimal<Int128>;
+using Decimal256 = Decimal<Int256>;
+template <typename T>
+inline Decimal<T> operator-(const Decimal<T>& x) {
+    return -x.value;
+}
+
+inline Decimal256 operator+(const Decimal256& x, const Decimal256& y) {
+    return Decimal256(x.value + y.value);
+}
+inline Decimal256 operator-(const Decimal256& x, const Decimal256& y) {
+    return Decimal256(x.value - y.value);
+}
+inline Decimal256 operator*(const Decimal256& x, const Decimal256& y) {
+    return Decimal256(x.value * y.value);
+}
+inline Decimal256 operator/(const Decimal256& x, const Decimal256& y) {
+    return Decimal256(x.value / y.value);
+}
+inline Decimal256 operator%(const Decimal256& x, const Decimal256& y) {
+    return Decimal256(x.value % y.value);
+}
+inline Decimal256 operator-(const Decimal256& x) {
+    return Decimal256(-x.value);
+}
+
+inline bool operator<(const Decimal256& x, const Decimal256& y) {
+    return x.value < y.value;
+}
+inline bool operator>(const Decimal256& x, const Decimal256& y) {
+    return x.value > y.value;
+}
+inline bool operator<=(const Decimal256& x, const Decimal256& y) {
+    return x.value <= y.value;
+}
+inline bool operator>=(const Decimal256& x, const Decimal256& y) {
+    return x.value >= y.value;
+}
+inline bool operator==(const Decimal256& x, const Decimal256& y) {
+    return x.value == y.value;
+}
+inline bool operator!=(const Decimal256& x, const Decimal256& y) {
+    return x.value != y.value;
+}
 
 template <>
 struct TypeName<Decimal32> {
@@ -543,6 +893,11 @@ struct TypeName<Decimal128I> {
     static const char* get() { return "Decimal128I"; }
 };
 
+template <>
+struct TypeName<Decimal256> {
+    static const char* get() { return "Decimal256"; }
+};
+
 template <>
 struct TypeId<Decimal32> {
     static constexpr const TypeIndex value = TypeIndex::Decimal32;
@@ -559,6 +914,10 @@ template <>
 struct TypeId<Decimal128I> {
     static constexpr const TypeIndex value = TypeIndex::Decimal128I;
 };
+template <>
+struct TypeId<Decimal256> {
+    static constexpr const TypeIndex value = TypeIndex::Decimal256;
+};
 
 template <typename T>
 constexpr bool IsDecimalNumber = false;
@@ -570,6 +929,8 @@ template <>
 inline constexpr bool IsDecimalNumber<Decimal128> = true;
 template <>
 inline constexpr bool IsDecimalNumber<Decimal128I> = true;
+template <>
+inline constexpr bool IsDecimalNumber<Decimal256> = true;
 
 template <typename T>
 constexpr bool IsDecimal128 = false;
@@ -581,6 +942,11 @@ constexpr bool IsDecimal128I = false;
 template <>
 inline constexpr bool IsDecimal128I<Decimal128I> = true;
 
+template <typename T>
+constexpr bool IsDecimal256 = false;
+template <>
+inline constexpr bool IsDecimal256<Decimal256> = true;
+
 template <typename T>
 constexpr bool IsDecimalV2 = IsDecimal128<T> && !IsDecimal128I<T>;
 
@@ -588,6 +954,10 @@ template <typename T, typename U>
 using DisposeDecimal = std::conditional_t<IsDecimalV2<T>, Decimal128,
                                           std::conditional_t<IsDecimalNumber<T>, Decimal128I, U>>;
 
+template <typename T, typename U>
+using DisposeDecimal256 = std::conditional_t<IsDecimalV2<T>, Decimal128,
+                                             std::conditional_t<IsDecimalNumber<T>, Decimal256, U>>;
+
 template <typename T>
 constexpr bool IsFloatNumber = false;
 template <>
@@ -615,6 +985,10 @@ template <>
 struct NativeType<Decimal128I> {
     using Type = Int128;
 };
+template <>
+struct NativeType<Decimal256> {
+    using Type = Int256;
+};
 
 inline const char* getTypeName(TypeIndex idx) {
     switch (idx) {
@@ -640,6 +1014,8 @@ inline const char* getTypeName(TypeIndex idx) {
         return TypeName<Int64>::get();
     case TypeIndex::Int128:
         return TypeName<Int128>::get();
+    case TypeIndex::Int256:
+        return TypeName<Int256>::get();
     case TypeIndex::Float32:
         return TypeName<Float32>::get();
     case TypeIndex::Float64:
@@ -670,6 +1046,8 @@ inline const char* getTypeName(TypeIndex idx) {
         return TypeName<Decimal128>::get();
     case TypeIndex::Decimal128I:
         return TypeName<Decimal128I>::get();
+    case TypeIndex::Decimal256:
+        return TypeName<Decimal256>::get();
     case TypeIndex::UUID:
         return "UUID";
     case TypeIndex::Array:
@@ -740,6 +1118,15 @@ struct std::hash<doris::vectorized::Decimal128I> {
     }
 };
 
+template <>
+struct std::hash<doris::vectorized::Decimal256> {
+    size_t operator()(const doris::vectorized::Decimal256& x) const {
+        return std::hash<uint64_t>()(x.value >> 192) ^ std::hash<uint64_t>()(x.value >> 128) ^
+               std::hash<uint64_t>()(x.value >> 64) ^
+               std::hash<uint64_t>()(x.value & std::numeric_limits<uint64_t>::max());
+    }
+};
+
 constexpr bool typeindex_is_int(doris::vectorized::TypeIndex index) {
     using TypeIndex = doris::vectorized::TypeIndex;
     switch (index) {
diff --git a/be/src/vec/core/wide_integer.h b/be/src/vec/core/wide_integer.h
new file mode 100644
index 0000000000..404b4f7a48
--- /dev/null
+++ b/be/src/vec/core/wide_integer.h
@@ -0,0 +1,296 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+///////////////////////////////////////////////////////////////
+//  Distributed under the Boost Software License, Version 1.0.
+//  (See at http://www.boost.org/LICENSE_1_0.txt)
+///////////////////////////////////////////////////////////////
+
+/*  Divide and multiply
+ *
+ *
+ * Copyright (c) 2008
+ * Evan Teran
+ *
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for any purpose and without fee is hereby granted, provided
+ * that the above copyright notice appears in all copies and that both the
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the same name not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. We make no representations about the
+ * suitability this software for any purpose. It is provided "as is"
+ * without express or implied warranty.
+ */
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/base/base/wide_integer.h
+// and modified by Doris
+#pragma once
+
+#include <cstdint>
+#include <initializer_list>
+#include <limits>
+#include <type_traits>
+
+// NOLINTBEGIN(*)
+
+namespace wide {
+template <size_t Bits, typename Signed>
+class integer;
+}
+
+namespace std {
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+struct common_type<wide::integer<Bits, Signed>, wide::integer<Bits2, Signed2>>;
+
+template <size_t Bits, typename Signed, typename Arithmetic>
+struct common_type<wide::integer<Bits, Signed>, Arithmetic>;
+
+template <typename Arithmetic, size_t Bits, typename Signed>
+struct common_type<Arithmetic, wide::integer<Bits, Signed>>;
+
+} // namespace std
+
+namespace wide {
+
+template <size_t Bits, typename Signed>
+class integer {
+public:
+    using base_type = uint64_t;
+    using signed_base_type = int64_t;
+
+    // ctors
+    constexpr integer() noexcept = default;
+
+    template <typename T>
+    constexpr integer(T rhs) noexcept;
+
+    template <typename T>
+    constexpr integer(std::initializer_list<T> il) noexcept;
+
+    // assignment
+    template <size_t Bits2, typename Signed2>
+    constexpr integer<Bits, Signed>& operator=(const integer<Bits2, Signed2>& rhs) noexcept;
+
+    template <typename Arithmetic>
+    constexpr integer<Bits, Signed>& operator=(Arithmetic rhs) noexcept;
+
+    template <typename Arithmetic>
+    constexpr integer<Bits, Signed>& operator*=(const Arithmetic& rhs);
+
+    template <typename Arithmetic>
+    constexpr integer<Bits, Signed>& operator/=(const Arithmetic& rhs);
+
+    template <typename Arithmetic>
+    constexpr integer<Bits, Signed>& operator+=(const Arithmetic& rhs) noexcept(
+            std::is_same_v<Signed, unsigned>);
+
+    template <typename Arithmetic>
+    constexpr integer<Bits, Signed>& operator-=(const Arithmetic& rhs) noexcept(
+            std::is_same_v<Signed, unsigned>);
+
+    template <typename Integral>
+    constexpr integer<Bits, Signed>& operator%=(const Integral& rhs);
+
+    template <typename Integral>
+    constexpr integer<Bits, Signed>& operator&=(const Integral& rhs) noexcept;
+
+    template <typename Integral>
+    constexpr integer<Bits, Signed>& operator|=(const Integral& rhs) noexcept;
+
+    template <typename Integral>
+    constexpr integer<Bits, Signed>& operator^=(const Integral& rhs) noexcept;
+
+    constexpr integer<Bits, Signed>& operator<<=(int n) noexcept;
+    constexpr integer<Bits, Signed>& operator>>=(int n) noexcept;
+
+    constexpr integer<Bits, Signed>& operator++() noexcept(std::is_same_v<Signed, unsigned>);
+    constexpr integer<Bits, Signed> operator++(int) noexcept(std::is_same_v<Signed, unsigned>);
+    constexpr integer<Bits, Signed>& operator--() noexcept(std::is_same_v<Signed, unsigned>);
+    constexpr integer<Bits, Signed> operator--(int) noexcept(std::is_same_v<Signed, unsigned>);
+
+    // observers
+
+    constexpr explicit operator bool() const noexcept;
+
+    template <typename T, typename = std::enable_if_t<std::is_arithmetic_v<T>, T>>
+    constexpr operator T() const noexcept;
+
+    constexpr operator long double() const noexcept;
+    constexpr operator double() const noexcept;
+    constexpr operator float() const noexcept;
+
+    struct _impl;
+
+    base_type items[_impl::item_count];
+
+private:
+    template <size_t Bits2, typename Signed2>
+    friend class integer;
+
+    friend class std::numeric_limits<integer<Bits, signed>>;
+    friend class std::numeric_limits<integer<Bits, unsigned>>;
+};
+
+using Int256 = integer<256, signed>;
+using UInt256 = integer<256, unsigned>;
+
+template <typename T>
+static constexpr bool ArithmeticConcept() noexcept;
+
+template <class T1, class T2>
+using _only_arithmetic =
+        typename std::enable_if<ArithmeticConcept<T1>() && ArithmeticConcept<T2>()>::type;
+
+template <typename T>
+static constexpr bool IntegralConcept() noexcept;
+
+template <class T, class T2>
+using _only_integer = typename std::enable_if<IntegralConcept<T>() && IntegralConcept<T2>()>::type;
+
+// Unary operators
+template <size_t Bits, typename Signed>
+constexpr integer<Bits, Signed> operator~(const integer<Bits, Signed>& lhs) noexcept;
+
+template <size_t Bits, typename Signed>
+constexpr integer<Bits, Signed> operator-(const integer<Bits, Signed>& lhs) noexcept(
+        std::is_same_v<Signed, unsigned>);
+
+template <size_t Bits, typename Signed>
+constexpr integer<Bits, Signed> operator+(const integer<Bits, Signed>& lhs) noexcept(
+        std::is_same_v<Signed, unsigned>);
+
+// Binary operators
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr operator*(
+        const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs);
+template <typename Arithmetic, typename Arithmetic2,
+          class = _only_arithmetic<Arithmetic, Arithmetic2>>
+std::common_type_t<Arithmetic, Arithmetic2> constexpr operator*(const Arithmetic& rhs,
+                                                                const Arithmetic2& lhs);
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr operator/(
+        const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs);
+template <typename Arithmetic, typename Arithmetic2,
+          class = _only_arithmetic<Arithmetic, Arithmetic2>>
+std::common_type_t<Arithmetic, Arithmetic2> constexpr operator/(const Arithmetic& rhs,
+                                                                const Arithmetic2& lhs);
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr operator+(
+        const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs);
+template <typename Arithmetic, typename Arithmetic2,
+          class = _only_arithmetic<Arithmetic, Arithmetic2>>
+std::common_type_t<Arithmetic, Arithmetic2> constexpr operator+(const Arithmetic& rhs,
+                                                                const Arithmetic2& lhs);
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr operator-(
+        const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs);
+template <typename Arithmetic, typename Arithmetic2,
+          class = _only_arithmetic<Arithmetic, Arithmetic2>>
+std::common_type_t<Arithmetic, Arithmetic2> constexpr operator-(const Arithmetic& rhs,
+                                                                const Arithmetic2& lhs);
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr operator%(
+        const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs);
+template <typename Integral, typename Integral2, class = _only_integer<Integral, Integral2>>
+std::common_type_t<Integral, Integral2> constexpr operator%(const Integral& rhs,
+                                                            const Integral2& lhs);
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr operator&(
+        const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs);
+template <typename Integral, typename Integral2, class = _only_integer<Integral, Integral2>>
+std::common_type_t<Integral, Integral2> constexpr operator&(const Integral& rhs,
+                                                            const Integral2& lhs);
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr operator|(
+        const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs);
+template <typename Integral, typename Integral2, class = _only_integer<Integral, Integral2>>
+std::common_type_t<Integral, Integral2> constexpr operator|(const Integral& rhs,
+                                                            const Integral2& lhs);
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr operator^(
+        const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs);
+template <typename Integral, typename Integral2, class = _only_integer<Integral, Integral2>>
+std::common_type_t<Integral, Integral2> constexpr operator^(const Integral& rhs,
+                                                            const Integral2& lhs);
+
+// TODO: Integral
+template <size_t Bits, typename Signed>
+constexpr integer<Bits, Signed> operator<<(const integer<Bits, Signed>& lhs, int n) noexcept;
+
+template <size_t Bits, typename Signed>
+constexpr integer<Bits, Signed> operator>>(const integer<Bits, Signed>& lhs, int n) noexcept;
+
+template <size_t Bits, typename Signed, typename Int,
+          typename = std::enable_if_t<!std::is_same_v<Int, int>>>
+constexpr integer<Bits, Signed> operator<<(const integer<Bits, Signed>& lhs, Int n) noexcept {
+    return lhs << int(n);
+}
+template <size_t Bits, typename Signed, typename Int,
+          typename = std::enable_if_t<!std::is_same_v<Int, int>>>
+constexpr integer<Bits, Signed> operator>>(const integer<Bits, Signed>& lhs, Int n) noexcept {
+    return lhs >> int(n);
+}
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+constexpr bool operator<(const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs);
+template <typename Arithmetic, typename Arithmetic2,
+          class = _only_arithmetic<Arithmetic, Arithmetic2>>
+constexpr bool operator<(const Arithmetic& rhs, const Arithmetic2& lhs);
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+constexpr bool operator>(const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs);
+template <typename Arithmetic, typename Arithmetic2,
+          class = _only_arithmetic<Arithmetic, Arithmetic2>>
+constexpr bool operator>(const Arithmetic& rhs, const Arithmetic2& lhs);
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+constexpr bool operator<=(const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs);
+template <typename Arithmetic, typename Arithmetic2,
+          class = _only_arithmetic<Arithmetic, Arithmetic2>>
+constexpr bool operator<=(const Arithmetic& rhs, const Arithmetic2& lhs);
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+constexpr bool operator>=(const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs);
+template <typename Arithmetic, typename Arithmetic2,
+          class = _only_arithmetic<Arithmetic, Arithmetic2>>
+constexpr bool operator>=(const Arithmetic& rhs, const Arithmetic2& lhs);
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+constexpr bool operator==(const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs);
+template <typename Arithmetic, typename Arithmetic2,
+          class = _only_arithmetic<Arithmetic, Arithmetic2>>
+constexpr bool operator==(const Arithmetic& rhs, const Arithmetic2& lhs);
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+constexpr bool operator!=(const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs);
+template <typename Arithmetic, typename Arithmetic2,
+          class = _only_arithmetic<Arithmetic, Arithmetic2>>
+constexpr bool operator!=(const Arithmetic& rhs, const Arithmetic2& lhs);
+
+} // namespace wide
+
+// NOLINTEND(*)
+
+#include "wide_integer_impl.h"
diff --git a/be/src/vec/core/wide_integer_impl.h b/be/src/vec/core/wide_integer_impl.h
new file mode 100644
index 0000000000..88d1c5233d
--- /dev/null
+++ b/be/src/vec/core/wide_integer_impl.h
@@ -0,0 +1,1479 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+/// Original is here https://github.com/cerevra/int
+/// Distributed under the Boost Software License, Version 1.0.
+/// (See at http://www.boost.org/LICENSE_1_0.txt)
+
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/base/base/wide_integer_impl.h
+// and modified by Doris
+#pragma once
+
+#include <boost/math/special_functions/fpclassify.hpp>
+#include <cassert>
+#include <cfloat>
+#include <cmath>
+#include <compare>
+#include <limits>
+#include <tuple>
+#include <type_traits>
+
+#include "common/exception.h"
+
+// NOLINTBEGIN(*)
+
+/// Use same extended double for all platforms
+#if (LDBL_MANT_DIG == 64)
+#define CONSTEXPR_FROM_DOUBLE constexpr
+using FromDoubleIntermediateType = long double;
+#else
+#include <boost/multiprecision/cpp_bin_float.hpp>
+/// `wide_integer_from_builtin` can't be constexpr with non-literal `cpp_bin_float_double_extended`
+#define CONSTEXPR_FROM_DOUBLE
+using FromDoubleIntermediateType = boost::multiprecision::cpp_bin_float_double_extended;
+#endif
+
+namespace CityHash_v1_0_2 {
+struct uint128;
+}
+
+namespace wide {
+
+template <typename T>
+struct IsWideInteger {
+    static const constexpr bool value = false;
+};
+
+template <size_t Bits, typename Signed>
+struct IsWideInteger<wide::integer<Bits, Signed>> {
+    static const constexpr bool value = true;
+};
+
+template <typename T>
+static constexpr bool ArithmeticConcept() noexcept {
+    return std::is_arithmetic_v<T> || IsWideInteger<T>::value;
+}
+
+template <typename T>
+static constexpr bool IntegralConcept() noexcept {
+    return std::is_integral_v<T> || IsWideInteger<T>::value;
+}
+
+template <typename T>
+class IsTupleLike {
+    template <typename U>
+    static auto check(U* p) -> decltype(std::tuple_size<U>::value, int());
+    template <typename>
+    static void check(...);
+
+public:
+    static constexpr const bool value = !std::is_void<decltype(check<T>(nullptr))>::value;
+};
+
+} // namespace wide
+
+namespace std {
+
+// numeric limits
+template <size_t Bits, typename Signed>
+class numeric_limits<wide::integer<Bits, Signed>> {
+public:
+    static constexpr bool is_specialized = true;
+    static constexpr bool is_signed = is_same<Signed, signed>::value;
+    static constexpr bool is_integer = true;
+    static constexpr bool is_exact = true;
+    static constexpr bool has_infinity = false;
+    static constexpr bool has_quiet_NaN = false;
+    static constexpr bool has_signaling_NaN = true;
+    static constexpr std::float_denorm_style has_denorm = std::denorm_absent;
+    static constexpr bool has_denorm_loss = false;
+    static constexpr std::float_round_style round_style = std::round_toward_zero;
+    static constexpr bool is_iec559 = false;
+    static constexpr bool is_bounded = true;
+    static constexpr bool is_modulo = true;
+    static constexpr int digits = Bits - (is_same<Signed, signed>::value ? 1 : 0);
+    static constexpr int digits10 = digits * 0.30103 /*std::log10(2)*/;
+    static constexpr int max_digits10 = 0;
+    static constexpr int radix = 2;
+    static constexpr int min_exponent = 0;
+    static constexpr int min_exponent10 = 0;
+    static constexpr int max_exponent = 0;
+    static constexpr int max_exponent10 = 0;
+    static constexpr bool traps = true;
+    static constexpr bool tinyness_before = false;
+
+    static constexpr wide::integer<Bits, Signed> min() noexcept {
+        if (is_same<Signed, signed>::value) {
+            using T = wide::integer<Bits, signed>;
+            T res {};
+            res.items[T::_impl::big(0)] = std::numeric_limits<
+                    typename wide::integer<Bits, Signed>::signed_base_type>::min();
+            return res;
+        }
+        return wide::integer<Bits, Signed>(0);
+    }
+
+    static constexpr wide::integer<Bits, Signed> max() noexcept {
+        using T = wide::integer<Bits, Signed>;
+        T res {};
+        res.items[T::_impl::big(0)] =
+                is_same<Signed, signed>::value
+                        ? std::numeric_limits<
+                                  typename wide::integer<Bits, Signed>::signed_base_type>::max()
+                        : std::numeric_limits<
+                                  typename wide::integer<Bits, Signed>::base_type>::max();
+        for (unsigned i = 1; i < wide::integer<Bits, Signed>::_impl::item_count; ++i) {
+            res.items[T::_impl::big(i)] =
+                    std::numeric_limits<typename wide::integer<Bits, Signed>::base_type>::max();
+        }
+        return res;
+    }
+
+    static constexpr wide::integer<Bits, Signed> lowest() noexcept { return min(); }
+    static constexpr wide::integer<Bits, Signed> epsilon() noexcept { return 0; }
+    static constexpr wide::integer<Bits, Signed> round_error() noexcept { return 0; }
+    static constexpr wide::integer<Bits, Signed> infinity() noexcept { return 0; }
+    static constexpr wide::integer<Bits, Signed> quiet_NaN() noexcept { return 0; }
+    static constexpr wide::integer<Bits, Signed> signaling_NaN() noexcept { return 0; }
+    static constexpr wide::integer<Bits, Signed> denorm_min() noexcept { return 0; }
+};
+
+// type traits
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+struct common_type<wide::integer<Bits, Signed>, wide::integer<Bits2, Signed2>> {
+    using type = std::conditional_t < Bits == Bits2,
+          wide::integer<Bits, std::conditional_t<(std::is_same_v<Signed, Signed2> &&
+                                                  std::is_same_v<Signed2, signed>),
+                                                 signed, unsigned>>,
+          std::conditional_t<
+                  Bits2<Bits, wide::integer<Bits, Signed>, wide::integer<Bits2, Signed2>>>;
+};
+
+template <size_t Bits, typename Signed, typename Arithmetic>
+struct common_type<wide::integer<Bits, Signed>, Arithmetic> {
+    static_assert(wide::ArithmeticConcept<Arithmetic>());
+
+    using type = std::conditional_t < std::is_floating_point_v<Arithmetic>, Arithmetic,
+          std::conditional_t<sizeof(Arithmetic) * 8 < Bits, wide::integer<Bits, Signed>,
+                             std::conditional_t<Bits<
+                                     sizeof(Arithmetic) * 8, Arithmetic,
+                                     std::conditional_t<Bits == sizeof(Arithmetic) * 8 &&
+                                                                (std::is_same_v<Signed, signed> ||
+                                                                 std::is_signed_v<Arithmetic>),
+                                                        Arithmetic, wide::integer<Bits, Signed>>>>>;
+};
+
+template <typename Arithmetic, size_t Bits, typename Signed>
+struct common_type<Arithmetic, wide::integer<Bits, Signed>>
+        : common_type<wide::integer<Bits, Signed>, Arithmetic> {};
+
+} // namespace std
+
+namespace wide {
+
+template <size_t Bits, typename Signed>
+struct integer<Bits, Signed>::_impl {
+    static constexpr size_t _bits = Bits;
+    static constexpr const unsigned byte_count = Bits / 8;
+    static constexpr const unsigned item_count = byte_count / sizeof(base_type);
+    static constexpr const unsigned base_bits = sizeof(base_type) * 8;
+
+    static_assert(Bits % base_bits == 0);
+
+    /// Simple iteration in both directions
+    static constexpr unsigned little(unsigned idx) {
+        if constexpr (std::endian::native == std::endian::little) {
+            return idx;
+        } else {
+            return item_count - 1 - idx;
+        }
+    }
+    static constexpr unsigned big(unsigned idx) {
+        if constexpr (std::endian::native == std::endian::little) {
+            return item_count - 1 - idx;
+        } else {
+            return idx;
+        }
+    }
+    static constexpr unsigned any(unsigned idx) { return idx; }
+
+    template <class T>
+    constexpr static bool is_negative(const T& n) noexcept {
+        if constexpr (std::is_signed_v<T>) {
+            return n < 0;
+        } else {
+            return false;
+        }
+    }
+
+    template <size_t B, class T>
+    constexpr static bool is_negative(const integer<B, T>& n) noexcept {
+        if constexpr (std::is_same_v<T, signed>) {
+            return static_cast<signed_base_type>(n.items[integer<B, T>::_impl::big(0)]) < 0;
+        } else {
+            return false;
+        }
+    }
+
+    template <typename T>
+    constexpr static auto make_positive(const T& n) noexcept {
+        if constexpr (std::is_signed_v<T>) {
+            return n < 0 ? -n : n;
+        } else {
+            return n;
+        }
+    }
+
+    template <size_t B, class S>
+    constexpr static integer<B, S> make_positive(const integer<B, S>& n) noexcept {
+        return is_negative(n) ? integer<B, S>(operator_unary_minus(n)) : n;
+    }
+
+    template <typename T>
+    __attribute__((no_sanitize("undefined"))) constexpr static auto to_Integral(T f) noexcept {
+        /// NOTE: this can be called with DB::Decimal, and in this case, result
+        /// will be wrong
+        if constexpr (std::is_signed_v<T>) {
+            return static_cast<int64_t>(f);
+        } else {
+            return static_cast<uint64_t>(f);
+        }
+    }
+
+    template <typename Integral>
+    constexpr static void wide_integer_from_builtin(integer<Bits, Signed>& self,
+                                                    Integral rhs) noexcept {
+        if constexpr (std::is_same_v<Integral, __int128>) {
+            self.items[little(0)] = rhs;
+            self.items[little(1)] = rhs >> 64;
+            if (rhs < 0) {
+                for (unsigned i = 2; i < item_count; ++i) {
+                    self.items[little(i)] = -1;
+                }
+                return;
+            } else {
+                for (unsigned i = 2; i < item_count; ++i) {
+                    self.items[little(i)] = 0;
+                }
+            }
+        } else {
+            static_assert(sizeof(Integral) <= sizeof(base_type));
+
+            self.items[little(0)] = _impl::to_Integral(rhs);
+
+            if constexpr (std::is_signed_v<Integral>) {
+                if (rhs < 0) {
+                    for (unsigned i = 1; i < item_count; ++i) {
+                        self.items[little(i)] = -1;
+                    }
+                    return;
+                }
+            }
+
+            for (unsigned i = 1; i < item_count; ++i) {
+                self.items[little(i)] = 0;
+            }
+        }
+    }
+
+    template <typename TupleLike, size_t i = 0>
+    constexpr static void wide_integer_from_tuple_like(integer<Bits, Signed>& self,
+                                                       const TupleLike& tuple) noexcept {
+        if constexpr (i < item_count) {
+            if constexpr (i < std::tuple_size_v<TupleLike>) {
+                self.items[i] = std::get<i>(tuple);
+            } else {
+                self.items[i] = 0;
+            }
+            wide_integer_from_tuple_like<TupleLike, i + 1>(self, tuple);
+        }
+    }
+
+    template <typename CityHashUInt128 = CityHash_v1_0_2::uint128>
+    constexpr static void wide_integer_from_cityhash_uint128(
+            integer<Bits, Signed>& self, const CityHashUInt128& value) noexcept {
+        static_assert(sizeof(item_count) >= 2);
+
+        if constexpr (std::endian::native == std::endian::little) {
+            wide_integer_from_tuple_like(self, std::make_pair(value.low64, value.high64));
+        } else {
+            wide_integer_from_tuple_like(self, std::make_pair(value.high64, value.low64));
+        }
+    }
+
+    /**
+     * N.B. t is constructed from double, so max(t) = max(double) ~ 2^310
+     * the recursive call happens when t / 2^64 > 2^64, so there won't be more than 5 of them.
+     *
+     * t = a1 * max_int + b1,   a1 > max_int, b1 < max_int
+     * a1 = a2 * max_int + b2,  a2 > max_int, b2 < max_int
+     * a_(n - 1) = a_n * max_int + b2, a_n <= max_int <- base case.
+     */
+    template <class T>
+    constexpr static void set_multiplier(integer<Bits, Signed>& self, T t) noexcept {
+        constexpr uint64_t max_int = std::numeric_limits<uint64_t>::max();
+        static_assert(std::is_same_v<T, double> || std::is_same_v<T, FromDoubleIntermediateType>);
+        /// Implementation specific behaviour on overflow (if we don't check here, stack overflow will triggered in bigint_cast).
+        if constexpr (std::is_same_v<T, double>) {
+            if (!std::isfinite(t)) {
+                self = 0;
+                return;
+            }
+        } else {
+            if (!boost::math::isfinite(t)) {
+                self = 0;
+                return;
+            }
+        }
+
+        const T alpha = t / static_cast<T>(max_int);
+
+        /** Here we have to use strict comparison.
+          * The max_int is 2^64 - 1.
+          * When casted to floating point type, it will be rounded to the closest representable number,
+          * which is 2^64.
+          * But 2^64 is not representable in uint64_t,
+          * so the maximum representable number will be strictly less.
+          */
+        if (alpha < static_cast<T>(max_int)) {
+            self = static_cast<uint64_t>(alpha);
+        } else { // max(double) / 2^64 will surely contain less than 52 precision bits, so speed up computations.
+            set_multiplier<double>(self, static_cast<double>(alpha));
+        }
+
+        self *= max_int;
+        self += static_cast<uint64_t>(t - floor(alpha) * static_cast<T>(max_int)); // += b_i
+    }
+
+    CONSTEXPR_FROM_DOUBLE static void wide_integer_from_builtin(integer<Bits, Signed>& self,
+                                                                double rhs) noexcept {
+        constexpr int64_t max_int = std::numeric_limits<int64_t>::max();
+        constexpr int64_t min_int = std::numeric_limits<int64_t>::lowest();
+
+        /// There are values in int64 that have more than 53 significant bits (in terms of double
+        /// representation). Such values, being promoted to double, are rounded up or down. If they are rounded up,
+        /// the result may not fit in 64 bits.
+        /// The example of such a number is 9.22337e+18.
+        /// As to_Integral does a static_cast to int64_t, it may result in UB.
+        /// The necessary check here is that FromDoubleIntermediateType has enough significant (mantissa) bits to store the
+        /// int64_t max value precisely.
+
+        if (rhs > static_cast<FromDoubleIntermediateType>(min_int) &&
+            rhs < static_cast<FromDoubleIntermediateType>(max_int)) {
+            self = static_cast<int64_t>(rhs);
+            return;
+        }
+
+        const FromDoubleIntermediateType rhs_long_double =
+                (static_cast<FromDoubleIntermediateType>(rhs) < 0)
+                        ? -static_cast<FromDoubleIntermediateType>(rhs)
+                        : rhs;
+
+        set_multiplier(self, rhs_long_double);
+
+        if (rhs < 0) {
+            self = -self;
+        }
+    }
+
+    template <size_t Bits2, typename Signed2>
+    constexpr static void wide_integer_from_wide_integer(
+            integer<Bits, Signed>& self, const integer<Bits2, Signed2>& rhs) noexcept {
+        constexpr const unsigned min_bits = (Bits < Bits2) ? Bits : Bits2;
+        constexpr const unsigned to_copy = min_bits / base_bits;
+
+        for (unsigned i = 0; i < to_copy; ++i) {
+            self.items[little(i)] = rhs.items[integer<Bits2, Signed2>::_impl::little(i)];
+        }
+
+        if constexpr (Bits > Bits2) {
+            if constexpr (std::is_signed_v<Signed2>) {
+                if (rhs < 0) {
+                    for (unsigned i = to_copy; i < item_count; ++i) {
+                        self.items[little(i)] = -1;
+                    }
+                    return;
+                }
+            }
+
+            for (unsigned i = to_copy; i < item_count; ++i) {
+                self.items[little(i)] = 0;
+            }
+        }
+    }
+
+    template <typename T>
+    constexpr static bool should_keep_size() {
+        return sizeof(T) <= byte_count;
+    }
+
+    constexpr static integer<Bits, Signed> shift_left(const integer<Bits, Signed>& rhs,
+                                                      unsigned n) noexcept {
+        integer<Bits, Signed> lhs;
+        unsigned items_shift = n / base_bits;
+
+        if (unsigned bit_shift = n % base_bits) {
+            unsigned overflow_shift = base_bits - bit_shift;
+
+            lhs.items[big(0)] = rhs.items[big(items_shift)] << bit_shift;
+            for (unsigned i = 1; i < item_count - items_shift; ++i) {
+                lhs.items[big(i - 1)] |= rhs.items[big(items_shift + i)] >> overflow_shift;
+                lhs.items[big(i)] = rhs.items[big(items_shift + i)] << bit_shift;
+            }
+        } else {
+            for (unsigned i = 0; i < item_count - items_shift; ++i) {
+                lhs.items[big(i)] = rhs.items[big(items_shift + i)];
+            }
+        }
+
+        for (unsigned i = 0; i < items_shift; ++i) {
+            lhs.items[little(i)] = 0;
+        }
+        return lhs;
+    }
+
+    constexpr static integer<Bits, Signed> shift_right(const integer<Bits, Signed>& rhs,
+                                                       unsigned n) noexcept {
+        integer<Bits, Signed> lhs;
+        unsigned items_shift = n / base_bits;
+        unsigned bit_shift = n % base_bits;
+
+        if (bit_shift) {
+            unsigned overflow_shift = base_bits - bit_shift;
+
+            lhs.items[little(0)] = rhs.items[little(items_shift)] >> bit_shift;
+            for (unsigned i = 1; i < item_count - items_shift; ++i) {
+                lhs.items[little(i - 1)] |= rhs.items[little(items_shift + i)] << overflow_shift;
+                lhs.items[little(i)] = rhs.items[little(items_shift + i)] >> bit_shift;
+            }
+        } else {
+            for (unsigned i = 0; i < item_count - items_shift; ++i) {
+                lhs.items[little(i)] = rhs.items[little(items_shift + i)];
+            }
+        }
+
+        if (is_negative(rhs)) {
+            if (bit_shift) {
+                lhs.items[big(items_shift)] |= std::numeric_limits<base_type>::max()
+                                               << (base_bits - bit_shift);
+            }
+
+            for (unsigned i = 0; i < items_shift; ++i) {
+                lhs.items[big(i)] = std::numeric_limits<base_type>::max();
+            }
+        } else {
+            for (unsigned i = 0; i < items_shift; ++i) {
+                lhs.items[big(i)] = 0;
+            }
+        }
+
+        return lhs;
+    }
+
+private:
+    template <typename T>
+    constexpr static base_type get_item(const T& x, unsigned idx) {
+        if constexpr (IsWideInteger<T>::value) {
+            if (idx < T::_impl::item_count) {
+                return x.items[idx];
+            }
+            return 0;
+        } else {
+            if constexpr (sizeof(T) <= sizeof(base_type)) {
+                if (little(0) == idx) {
+                    return static_cast<base_type>(x);
+                }
+            } else if (idx * sizeof(base_type) < sizeof(T)) {
+                return x >> (idx * base_bits); // & std::numeric_limits<base_type>::max()
+            }
+            return 0;
+        }
+    }
+
+    template <typename T>
+    constexpr static integer<Bits, Signed> minus(const integer<Bits, Signed>& lhs, T rhs) {
+        constexpr const unsigned rhs_items =
+                (sizeof(T) > sizeof(base_type)) ? (sizeof(T) / sizeof(base_type)) : 1;
+        constexpr const unsigned op_items = (item_count < rhs_items) ? item_count : rhs_items;
+
+        integer<Bits, Signed> res(lhs);
+        bool underflows[item_count] = {};
+
+        for (unsigned i = 0; i < op_items; ++i) {
+            base_type rhs_item = get_item(rhs, little(i));
+            base_type& res_item = res.items[little(i)];
+
+            underflows[i] = res_item < rhs_item;
+            res_item -= rhs_item;
+        }
+
+        for (unsigned i = 1; i < item_count; ++i) {
+            if (underflows[i - 1]) {
+                base_type& res_item = res.items[little(i)];
+                if (res_item == 0) {
+                    underflows[i] = true;
+                }
+                --res_item;
+            }
+        }
+
+        return res;
+    }
+
+    template <typename T>
+    constexpr static integer<Bits, Signed> plus(const integer<Bits, Signed>& lhs, T rhs) {
+        constexpr const unsigned rhs_items =
+                (sizeof(T) > sizeof(base_type)) ? (sizeof(T) / sizeof(base_type)) : 1;
+        constexpr const unsigned op_items = (item_count < rhs_items) ? item_count : rhs_items;
+
+        integer<Bits, Signed> res(lhs);
+        bool overflows[item_count] = {};
+
+        for (unsigned i = 0; i < op_items; ++i) {
+            base_type rhs_item = get_item(rhs, little(i));
+            base_type& res_item = res.items[little(i)];
+
+            res_item += rhs_item;
+            overflows[i] = res_item < rhs_item;
+        }
+
+        for (unsigned i = 1; i < item_count; ++i) {
+            if (overflows[i - 1]) {
+                base_type& res_item = res.items[little(i)];
+                ++res_item;
+                if (res_item == 0) {
+                    overflows[i] = true;
+                }
+            }
+        }
+
+        return res;
+    }
+
+    template <typename T>
+    constexpr static integer<Bits, Signed> multiply(const integer<Bits, Signed>& lhs,
+                                                    const T& rhs) {
+        if constexpr (Bits == 256 && sizeof(base_type) == 8) {
+            /// @sa https://github.com/abseil/abseil-cpp/blob/master/absl/numeric/int128.h
+            using HalfType = unsigned __int128;
+
+            HalfType a01 = (HalfType(lhs.items[little(1)]) << 64) + lhs.items[little(0)];
+            HalfType a23 = (HalfType(lhs.items[little(3)]) << 64) + lhs.items[little(2)];
+            HalfType a0 = lhs.items[little(0)];
+            HalfType a1 = lhs.items[little(1)];
+
+            HalfType b01 = rhs;
+            uint64_t b0 = b01;
+            uint64_t b1 = 0;
+            HalfType b23 = 0;
+            if constexpr (sizeof(T) > 8) {
+                b1 = b01 >> 64;
+            }
+            if constexpr (sizeof(T) > 16) {
+                b23 = (HalfType(rhs.items[little(3)]) << 64) + rhs.items[little(2)];
+            }
+
+            HalfType r23 = a23 * b01 + a01 * b23 + a1 * b1;
+            HalfType r01 = a0 * b0;
+            HalfType r12 = (r01 >> 64) + (r23 << 64);
+            HalfType r12_x = a1 * b0;
+
+            integer<Bits, Signed> res;
+            res.items[little(0)] = r01;
+            res.items[little(3)] = r23 >> 64;
+
+            if constexpr (sizeof(T) > 8) {
+                HalfType r12_y = a0 * b1;
+                r12_x += r12_y;
+                if (r12_x < r12_y) {
+                    ++res.items[little(3)];
+                }
+            }
+
+            r12 += r12_x;
+            if (r12 < r12_x) {
+                ++res.items[little(3)];
+            }
+
+            res.items[little(1)] = r12;
+            res.items[little(2)] = r12 >> 64;
+            return res;
+        } else if constexpr (Bits == 128 && sizeof(base_type) == 8) {
+            using CompilerUInt128 = unsigned __int128;
+            CompilerUInt128 a =
+                    (CompilerUInt128(lhs.items[little(1)]) << 64) +
+                    lhs.items[little(
+                            0)]; // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult)
+            CompilerUInt128 b =
+                    (CompilerUInt128(rhs.items[little(1)]) << 64) +
+                    rhs.items[little(
+                            0)]; // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult)
+            CompilerUInt128 c = a * b;
+            integer<Bits, Signed> res;
+            res.items[little(0)] = c;
+            res.items[little(1)] = c >> 64;
+            return res;
+        } else {
+            integer<Bits, Signed> res {};
+#if 1
+            integer<Bits, Signed> lhs2 = plus(lhs, shift_left(lhs, 1));
+            integer<Bits, Signed> lhs3 = plus(lhs2, shift_left(lhs, 2));
+#endif
+            for (unsigned i = 0; i < item_count; ++i) {
+                base_type rhs_item = get_item(rhs, little(i));
+                unsigned pos = i * base_bits;
+
+                while (rhs_item) {
+#if 1 /// optimization
+                    if ((rhs_item & 0x7) == 0x7) {
+                        res = plus(res, shift_left(lhs3, pos));
+                        rhs_item >>= 3;
+                        pos += 3;
+                        continue;
+                    }
+
+                    if ((rhs_item & 0x3) == 0x3) {
+                        res = plus(res, shift_left(lhs2, pos));
+                        rhs_item >>= 2;
+                        pos += 2;
+                        continue;
+                    }
+#endif
+                    if (rhs_item & 1) {
+                        res = plus(res, shift_left(lhs, pos));
+                    }
+
+                    rhs_item >>= 1;
+                    ++pos;
+                }
+            }
+
+            return res;
+        }
+    }
+
+public:
+    constexpr static integer<Bits, Signed> operator_unary_tilda(
+            const integer<Bits, Signed>& lhs) noexcept {
+        integer<Bits, Signed> res;
+
+        for (unsigned i = 0; i < item_count; ++i) {
+            res.items[any(i)] = ~lhs.items[any(i)];
+        }
+        return res;
+    }
+
+    constexpr static integer<Bits, Signed> operator_unary_minus(
+            const integer<Bits, Signed>& lhs) noexcept(std::is_same_v<Signed, unsigned>) {
+        return plus(operator_unary_tilda(lhs), 1);
+    }
+
+    template <typename T>
+    constexpr static auto operator_plus(const integer<Bits, Signed>& lhs,
+                                        const T& rhs) noexcept(std::is_same_v<Signed, unsigned>) {
+        if constexpr (should_keep_size<T>()) {
+            if (is_negative(rhs)) {
+                return minus(lhs, -rhs);
+            } else {
+                return plus(lhs, rhs);
+            }
+        } else {
+            static_assert(IsWideInteger<T>::value);
+            return std::common_type_t<integer<Bits, Signed>, integer<T::_impl::_bits, Signed>>::
+                    _impl::operator_plus(integer<T::_impl::_bits, Signed>(lhs), rhs);
+        }
+    }
+
+    template <typename T>
+    constexpr static auto operator_minus(const integer<Bits, Signed>& lhs,
+                                         const T& rhs) noexcept(std::is_same_v<Signed, unsigned>) {
+        if constexpr (should_keep_size<T>()) {
+            if (is_negative(rhs)) {
+                return plus(lhs, -rhs);
+            } else {
+                return minus(lhs, rhs);
+            }
+        } else {
+            static_assert(IsWideInteger<T>::value);
+            return std::common_type_t<integer<Bits, Signed>, integer<T::_impl::_bits, Signed>>::
+                    _impl::operator_minus(integer<T::_impl::_bits, Signed>(lhs), rhs);
+        }
+    }
+
+    template <typename T>
+    constexpr static auto operator_star(const integer<Bits, Signed>& lhs, const T& rhs) {
+        if constexpr (should_keep_size<T>()) {
+            integer<Bits, Signed> res;
+
+            if constexpr (std::is_signed_v<Signed>) {
+                res = multiply((is_negative(lhs) ? make_positive(lhs) : lhs),
+                               (is_negative(rhs) ? make_positive(rhs) : rhs));
+            } else {
+                res = multiply(lhs, (is_negative(rhs) ? make_positive(rhs) : rhs));
+            }
+
+            if (std::is_same_v<Signed, signed> && is_negative(lhs) != is_negative(rhs)) {
+                res = operator_unary_minus(res);
+            }
+
+            return res;
+        } else {
+            static_assert(IsWideInteger<T>::value);
+            return std::common_type_t<integer<Bits, Signed>, T>::_impl::operator_star(T(lhs), rhs);
+        }
+    }
+
+    template <typename T>
+    constexpr static bool operator_greater(const integer<Bits, Signed>& lhs,
+                                           const T& rhs) noexcept {
+        if constexpr (should_keep_size<T>()) {
+            if (std::numeric_limits<T>::is_signed && (is_negative(lhs) != is_negative(rhs))) {
+                return is_negative(rhs);
+            }
+
+            integer<Bits, Signed> t = rhs;
+            for (unsigned i = 0; i < item_count; ++i) {
+                base_type rhs_item = get_item(t, big(i));
+
+                if (lhs.items[big(i)] != rhs_item) {
+                    return lhs.items[big(i)] > rhs_item;
+                }
+            }
+
+            return false;
+        } else {
+            static_assert(IsWideInteger<T>::value);
+            return std::common_type_t<integer<Bits, Signed>, T>::_impl::operator_greater(T(lhs),
+                                                                                         rhs);
+        }
+    }
+
+    template <typename T>
+    constexpr static bool operator_less(const integer<Bits, Signed>& lhs, const T& rhs) noexcept {
+        if constexpr (should_keep_size<T>()) {
+            if (std::numeric_limits<T>::is_signed && (is_negative(lhs) != is_negative(rhs))) {
+                return is_negative(lhs);
+            }
+
+            integer<Bits, Signed> t = rhs;
+            for (unsigned i = 0; i < item_count; ++i) {
+                base_type rhs_item = get_item(t, big(i));
+
+                if (lhs.items[big(i)] != rhs_item) {
+                    return lhs.items[big(i)] < rhs_item;
+                }
+            }
+
+            return false;
+        } else {
+            static_assert(IsWideInteger<T>::value);
+            return std::common_type_t<integer<Bits, Signed>, T>::_impl::operator_less(T(lhs), rhs);
+        }
+    }
+
+    template <typename T>
+    constexpr static bool operator_eq(const integer<Bits, Signed>& lhs, const T& rhs) noexcept {
+        if constexpr (should_keep_size<T>()) {
+            integer<Bits, Signed> t = rhs;
+            for (unsigned i = 0; i < item_count; ++i) {
+                base_type rhs_item = get_item(t, any(i));
+
+                if (lhs.items[any(i)] != rhs_item) {
+                    return false;
+                }
+            }
+
+            return true;
+        } else {
+            static_assert(IsWideInteger<T>::value);
+            return std::common_type_t<integer<Bits, Signed>, T>::_impl::operator_eq(T(lhs), rhs);
+        }
+    }
+
+    template <typename T>
+    constexpr static auto operator_pipe(const integer<Bits, Signed>& lhs, const T& rhs) noexcept {
+        if constexpr (should_keep_size<T>()) {
+            integer<Bits, Signed> res;
+
+            for (unsigned i = 0; i < item_count; ++i) {
+                res.items[little(i)] = lhs.items[little(i)] | get_item(rhs, little(i));
+            }
+            return res;
+        } else {
+            static_assert(IsWideInteger<T>::value);
+            return std::common_type_t<integer<Bits, Signed>, T>::_impl::operator_pipe(T(lhs), rhs);
+        }
+    }
+
+    template <typename T>
+    constexpr static auto operator_amp(const integer<Bits, Signed>& lhs, const T& rhs) noexcept {
+        if constexpr (should_keep_size<T>()) {
+            integer<Bits, Signed> res;
+
+            for (unsigned i = 0; i < item_count; ++i) {
+                res.items[little(i)] = lhs.items[little(i)] & get_item(rhs, little(i));
+            }
+            return res;
+        } else {
+            static_assert(IsWideInteger<T>::value);
+            return std::common_type_t<integer<Bits, Signed>, T>::_impl::operator_amp(T(lhs), rhs);
+        }
+    }
+
+    template <typename T>
+    constexpr static bool is_zero(const T& x) {
+        bool is_zero = true;
+        for (auto item : x.items) {
+            if (item != 0) {
+                is_zero = false;
+                break;
+            }
+        }
+        return is_zero;
+    }
+
+    /// returns quotient as result and remainder in numerator.
+    template <size_t Bits2>
+    constexpr static integer<Bits2, unsigned> divide(integer<Bits2, unsigned>& numerator,
+                                                     integer<Bits2, unsigned> denominator) {
+        static_assert(std::is_unsigned_v<Signed>);
+
+        if constexpr (Bits == 128 && sizeof(base_type) == 8) {
+            using CompilerUInt128 = unsigned __int128;
+
+            CompilerUInt128 a =
+                    (CompilerUInt128(numerator.items[little(1)]) << 64) +
+                    numerator.items[little(
+                            0)]; // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult)
+            CompilerUInt128 b =
+                    (CompilerUInt128(denominator.items[little(1)]) << 64) +
+                    denominator.items[little(
+                            0)];       // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult)
+            CompilerUInt128 c = a / b; // NOLINT
+
+            integer<Bits, Signed> res;
+            res.items[little(0)] = c;
+            res.items[little(1)] = c >> 64;
+
+            CompilerUInt128 remainder = a - b * c;
+            numerator.items[little(0)] = remainder;
+            numerator.items[little(1)] = remainder >> 64;
+
+            return res;
+        }
+
+        if (is_zero(denominator)) {
+            throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT, "Division by zero");
+        }
+
+        integer<Bits2, unsigned> x = 1;
+        integer<Bits2, unsigned> quotient = 0;
+
+        while (!operator_greater(denominator, numerator) &&
+               is_zero(operator_amp(shift_right(denominator, Bits2 - 1), 1))) {
+            x = shift_left(x, 1);
+            denominator = shift_left(denominator, 1);
+        }
+
+        while (!is_zero(x)) {
+            if (!operator_greater(denominator, numerator)) {
+                numerator = operator_minus(numerator, denominator);
+                quotient = operator_pipe(quotient, x);
+            }
+
+            x = shift_right(x, 1);
+            denominator = shift_right(denominator, 1);
+        }
+
+        return quotient;
+    }
+
+    template <typename T>
+    constexpr static auto operator_slash(const integer<Bits, Signed>& lhs, const T& rhs) {
+        if constexpr (should_keep_size<T>()) {
+            integer<Bits, unsigned> numerator = make_positive(lhs);
+            integer<Bits, unsigned> denominator = make_positive(integer<Bits, Signed>(rhs));
+            integer<Bits, unsigned> quotient =
+                    integer<Bits, unsigned>::_impl::divide(numerator, std::move(denominator));
+
+            if (std::is_same_v<Signed, signed> && is_negative(rhs) != is_negative(lhs))
+                quotient = operator_unary_minus(quotient);
+            return quotient;
+        } else {
+            static_assert(IsWideInteger<T>::value);
+            return std::common_type_t<integer<Bits, Signed>,
+                                      integer<T::_impl::_bits, Signed>>::operator_slash(T(lhs),
+                                                                                        rhs);
+        }
+    }
+
+    template <typename T>
+    constexpr static auto operator_percent(const integer<Bits, Signed>& lhs, const T& rhs) {
+        if constexpr (should_keep_size<T>()) {
+            integer<Bits, unsigned> remainder = make_positive(lhs);
+            integer<Bits, unsigned> denominator = make_positive(integer<Bits, Signed>(rhs));
+            integer<Bits, unsigned>::_impl::divide(remainder, std::move(denominator));
+
+            if (std::is_same_v<Signed, signed> && is_negative(lhs)) {
+                remainder = operator_unary_minus(remainder);
+            }
+            return remainder;
+        } else {
+            static_assert(IsWideInteger<T>::value);
+            return std::common_type_t<integer<Bits, Signed>,
+                                      integer<T::_impl::_bits, Signed>>::operator_percent(T(lhs),
+                                                                                          rhs);
+        }
+    }
+
+    // ^
+    template <typename T>
+    constexpr static auto operator_circumflex(const integer<Bits, Signed>& lhs,
+                                              const T& rhs) noexcept {
+        if constexpr (should_keep_size<T>()) {
+            integer<Bits, Signed> t(rhs);
+            integer<Bits, Signed> res = lhs;
+
+            for (unsigned i = 0; i < item_count; ++i) {
+                res.items[any(i)] ^= t.items[any(i)];
+            }
+            return res;
+        } else {
+            static_assert(IsWideInteger<T>::value);
+            return T::operator_circumflex(T(lhs), rhs);
+        }
+    }
+
+    constexpr static integer<Bits, Signed> from_str(const char* c) {
+        integer<Bits, Signed> res = 0;
+
+        bool is_neg = std::is_same_v<Signed, signed> && *c == '-';
+        if (is_neg) {
+            ++c;
+        }
+
+        if (*c == '0' && (*(c + 1) == 'x' || *(c + 1) == 'X')) { // hex
+            ++c;
+            ++c;
+            while (*c) {
+                if (*c >= '0' && *c <= '9') {
+                    res = multiply(res, 16U);
+                    res = plus(res, *c - '0');
+                    ++c;
+                } else if (*c >= 'a' && *c <= 'f') {
+                    res = multiply(res, 16U);
+                    res = plus(res, *c - 'a' + 10U);
+                    ++c;
+                } else if (*c >= 'A' &&
+                           *c <= 'F') { // tolower must be used, but it is not constexpr
+                    res = multiply(res, 16U);
+                    res = plus(res, *c - 'A' + 10U);
+                    ++c;
+                } else {
+                    throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT, "Invalid char from");
+                }
+            }
+        } else { // dec
+            while (*c) {
+                if (*c < '0' || *c > '9') {
+                    throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT, "Invalid char from");
+                }
+
+                res = multiply(res, 10U);
+                res = plus(res, *c - '0');
+                ++c;
+            }
+        }
+
+        if (is_neg) {
+            res = operator_unary_minus(res);
+        }
+
+        return res;
+    }
+};
+
+// Members
+
+template <size_t Bits, typename Signed>
+template <typename T>
+constexpr integer<Bits, Signed>::integer(T rhs) noexcept : items {} {
+    if constexpr (IsWideInteger<T>::value) {
+        _impl::wide_integer_from_wide_integer(*this, rhs);
+    } else if constexpr (IsTupleLike<T>::value) {
+        _impl::wide_integer_from_tuple_like(*this, rhs);
+    } else if constexpr (std::is_same_v<std::remove_cvref_t<T>, CityHash_v1_0_2::uint128>) {
+        _impl::wide_integer_from_cityhash_uint128(*this, rhs);
+    } else {
+        _impl::wide_integer_from_builtin(*this, rhs);
+    }
+}
+
+template <size_t Bits, typename Signed>
+template <typename T>
+constexpr integer<Bits, Signed>::integer(std::initializer_list<T> il) noexcept : items {} {
+    if (il.size() == 1) {
+        if constexpr (IsWideInteger<T>::value) {
+            _impl::wide_integer_from_wide_integer(*this, *il.begin());
+        } else if constexpr (IsTupleLike<T>::value) {
+            _impl::wide_integer_from_tuple_like(*this, *il.begin());
+        } else if constexpr (std::is_same_v<std::remove_cvref_t<T>, CityHash_v1_0_2::uint128>) {
+            _impl::wide_integer_from_cityhash_uint128(*this, *il.begin());
+        } else {
+            _impl::wide_integer_from_builtin(*this, *il.begin());
+        }
+    } else if (il.size() == 0) {
+        _impl::wide_integer_from_builtin(*this, 0);
+    } else {
+        auto it = il.begin();
+        for (unsigned i = 0; i < _impl::item_count; ++i) {
+            if (it < il.end()) {
+                items[_impl::little(i)] = *it;
+                ++it;
+            } else {
+                items[_impl::little(i)] = 0;
+            }
+        }
+    }
+}
+
+template <size_t Bits, typename Signed>
+template <size_t Bits2, typename Signed2>
+constexpr integer<Bits, Signed>& integer<Bits, Signed>::operator=(
+        const integer<Bits2, Signed2>& rhs) noexcept {
+    _impl::wide_integer_from_wide_integer(*this, rhs);
+    return *this;
+}
+
+template <size_t Bits, typename Signed>
+template <typename T>
+constexpr integer<Bits, Signed>& integer<Bits, Signed>::operator=(T rhs) noexcept {
+    if constexpr (IsTupleLike<T>::value) {
+        _impl::wide_integer_from_tuple_like(*this, rhs);
+    } else if constexpr (std::is_same_v<std::remove_cvref_t<T>, CityHash_v1_0_2::uint128>) {
+        _impl::wide_integer_from_cityhash_uint128(*this, rhs);
+    } else {
+        _impl::wide_integer_from_builtin(*this, rhs);
+    }
+    return *this;
+}
+
+template <size_t Bits, typename Signed>
+template <typename T>
+constexpr integer<Bits, Signed>& integer<Bits, Signed>::operator*=(const T& rhs) {
+    *this = *this * rhs;
+    return *this;
+}
+
+template <size_t Bits, typename Signed>
+template <typename T>
+constexpr integer<Bits, Signed>& integer<Bits, Signed>::operator/=(const T& rhs) {
+    *this = *this / rhs;
+    return *this;
+}
+
+template <size_t Bits, typename Signed>
+template <typename T>
+constexpr integer<Bits, Signed>& integer<Bits, Signed>::operator+=(const T& rhs) noexcept(
+        std::is_same_v<Signed, unsigned>) {
+    *this = *this + rhs;
+    return *this;
+}
+
+template <size_t Bits, typename Signed>
+template <typename T>
+constexpr integer<Bits, Signed>& integer<Bits, Signed>::operator-=(const T& rhs) noexcept(
+        std::is_same_v<Signed, unsigned>) {
+    *this = *this - rhs;
+    return *this;
+}
+
+template <size_t Bits, typename Signed>
+template <typename T>
+constexpr integer<Bits, Signed>& integer<Bits, Signed>::operator%=(const T& rhs) {
+    *this = *this % rhs;
+    return *this;
+}
+
+template <size_t Bits, typename Signed>
+template <typename T>
+constexpr integer<Bits, Signed>& integer<Bits, Signed>::operator&=(const T& rhs) noexcept {
+    *this = *this & rhs;
+    return *this;
+}
+
+template <size_t Bits, typename Signed>
+template <typename T>
+constexpr integer<Bits, Signed>& integer<Bits, Signed>::operator|=(const T& rhs) noexcept {
+    *this = *this | rhs;
+    return *this;
+}
+
+template <size_t Bits, typename Signed>
+template <typename T>
+constexpr integer<Bits, Signed>& integer<Bits, Signed>::operator^=(const T& rhs) noexcept {
+    *this = *this ^ rhs;
+    return *this;
+}
+
+template <size_t Bits, typename Signed>
+constexpr integer<Bits, Signed>& integer<Bits, Signed>::operator<<=(int n) noexcept {
+    if (static_cast<size_t>(n) >= Bits) {
+        *this = 0;
+    } else if (n > 0) {
+        *this = _impl::shift_left(*this, n);
+    }
+    return *this;
+}
+
+template <size_t Bits, typename Signed>
+constexpr integer<Bits, Signed>& integer<Bits, Signed>::operator>>=(int n) noexcept {
+    if (static_cast<size_t>(n) >= Bits) {
+        if (_impl::is_negative(*this)) {
+            *this = -1;
+        } else {
+            *this = 0;
+        }
+    } else if (n > 0) {
+        *this = _impl::shift_right(*this, n);
+    }
+    return *this;
+}
+
+template <size_t Bits, typename Signed>
+constexpr integer<Bits, Signed>& integer<Bits, Signed>::operator++() noexcept(
+        std::is_same_v<Signed, unsigned>) {
+    *this = _impl::operator_plus(*this, 1);
+    return *this;
+}
+
+template <size_t Bits, typename Signed>
+constexpr integer<Bits, Signed> integer<Bits, Signed>::operator++(int) noexcept(
+        std::is_same_v<Signed, unsigned>) {
+    auto tmp = *this;
+    *this = _impl::operator_plus(*this, 1);
+    return tmp;
+}
+
+template <size_t Bits, typename Signed>
+constexpr integer<Bits, Signed>& integer<Bits, Signed>::operator--() noexcept(
+        std::is_same_v<Signed, unsigned>) {
+    *this = _impl::operator_minus(*this, 1);
+    return *this;
+}
+
+template <size_t Bits, typename Signed>
+constexpr integer<Bits, Signed> integer<Bits, Signed>::operator--(int) noexcept(
+        std::is_same_v<Signed, unsigned>) {
+    auto tmp = *this;
+    *this = _impl::operator_minus(*this, 1);
+    return tmp;
+}
+
+template <size_t Bits, typename Signed>
+constexpr integer<Bits, Signed>::operator bool() const noexcept {
+    return !_impl::operator_eq(*this, 0);
+}
+
+template <size_t Bits, typename Signed>
+template <class T, class>
+constexpr integer<Bits, Signed>::operator T() const noexcept {
+    static_assert(std::numeric_limits<T>::is_integer);
+
+    /// NOTE: memcpy will suffice, but unfortunately, this function is constexpr.
+
+    using UnsignedT = std::make_unsigned_t<T>;
+
+    UnsignedT res {};
+    for (unsigned i = 0;
+         i < _impl::item_count && i < (sizeof(T) + sizeof(base_type) - 1) / sizeof(base_type);
+         ++i) {
+        res += UnsignedT(items[_impl::little(i)])
+               << (sizeof(base_type) * 8 *
+                   i); // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult)
+    }
+
+    return res;
+}
+
+template <size_t Bits, typename Signed>
+constexpr integer<Bits, Signed>::operator long double() const noexcept {
+    if (_impl::operator_eq(*this, 0)) {
+        return 0;
+    }
+
+    integer<Bits, Signed> tmp = *this;
+    if (_impl::is_negative(*this)) {
+        tmp = -tmp;
+    }
+
+    long double res = 0;
+    for (unsigned i = 0; i < _impl::item_count; ++i) {
+        long double t = res;
+        res *= static_cast<long double>(std::numeric_limits<base_type>::max());
+        res += t;
+        res += tmp.items[_impl::big(i)];
+    }
+
+    if (_impl::is_negative(*this)) {
+        res = -res;
+    }
+
+    return res;
+}
+
+template <size_t Bits, typename Signed>
+constexpr integer<Bits, Signed>::operator double() const noexcept {
+    return static_cast<double>(static_cast<long double>(*this));
+}
+
+template <size_t Bits, typename Signed>
+constexpr integer<Bits, Signed>::operator float() const noexcept {
+    return static_cast<float>(static_cast<long double>(*this));
+}
+
+// Unary operators
+template <size_t Bits, typename Signed>
+constexpr integer<Bits, Signed> operator~(const integer<Bits, Signed>& lhs) noexcept {
+    return integer<Bits, Signed>::_impl::operator_unary_tilda(lhs);
+}
+
+template <size_t Bits, typename Signed>
+constexpr integer<Bits, Signed> operator-(const integer<Bits, Signed>& lhs) noexcept(
+        std::is_same_v<Signed, unsigned>) {
+    return integer<Bits, Signed>::_impl::operator_unary_minus(lhs);
+}
+
+template <size_t Bits, typename Signed>
+constexpr integer<Bits, Signed> operator+(const integer<Bits, Signed>& lhs) noexcept(
+        std::is_same_v<Signed, unsigned>) {
+    return lhs;
+}
+
+#define CT(x)                                                                      \
+    std::common_type_t<std::decay_t<decltype(rhs)>, std::decay_t<decltype(lhs)>> { \
+        x                                                                          \
+    }
+
+// Binary operators
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr operator*(
+        const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs) {
+    return std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>>::_impl::operator_star(
+            lhs, rhs);
+}
+
+template <typename Arithmetic, typename Arithmetic2, class>
+std::common_type_t<Arithmetic, Arithmetic2> constexpr operator*(const Arithmetic& lhs,
+                                                                const Arithmetic2& rhs) {
+    return CT(lhs) * CT(rhs);
+}
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr operator/(
+        const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs) {
+    return std::common_type_t<integer<Bits, Signed>,
+                              integer<Bits2, Signed2>>::_impl::operator_slash(lhs, rhs);
+}
+template <typename Arithmetic, typename Arithmetic2, class>
+std::common_type_t<Arithmetic, Arithmetic2> constexpr operator/(const Arithmetic& lhs,
+                                                                const Arithmetic2& rhs) {
+    return CT(lhs) / CT(rhs);
+}
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr operator+(
+        const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs) {
+    return std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>>::_impl::operator_plus(
+            lhs, rhs);
+}
+template <typename Arithmetic, typename Arithmetic2, class>
+std::common_type_t<Arithmetic, Arithmetic2> constexpr operator+(const Arithmetic& lhs,
+                                                                const Arithmetic2& rhs) {
+    return CT(lhs) + CT(rhs);
+}
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr operator-(
+        const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs) {
+    return std::common_type_t<integer<Bits, Signed>,
+                              integer<Bits2, Signed2>>::_impl::operator_minus(lhs, rhs);
+}
+template <typename Arithmetic, typename Arithmetic2, class>
+std::common_type_t<Arithmetic, Arithmetic2> constexpr operator-(const Arithmetic& lhs,
+                                                                const Arithmetic2& rhs) {
+    return CT(lhs) - CT(rhs);
+}
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr operator%(
+        const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs) {
+    return std::common_type_t<integer<Bits, Signed>,
+                              integer<Bits2, Signed2>>::_impl::operator_percent(lhs, rhs);
+}
+template <typename Integral, typename Integral2, class>
+std::common_type_t<Integral, Integral2> constexpr operator%(const Integral& lhs,
+                                                            const Integral2& rhs) {
+    return CT(lhs) % CT(rhs);
+}
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr operator&(
+        const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs) {
+    return std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>>::_impl::operator_amp(
+            lhs, rhs);
+}
+template <typename Integral, typename Integral2, class>
+std::common_type_t<Integral, Integral2> constexpr operator&(const Integral& lhs,
+                                                            const Integral2& rhs) {
+    return CT(lhs) & CT(rhs);
+}
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr operator|(
+        const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs) {
+    return std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>>::_impl::operator_pipe(
+            lhs, rhs);
+}
+template <typename Integral, typename Integral2, class>
+std::common_type_t<Integral, Integral2> constexpr operator|(const Integral& lhs,
+                                                            const Integral2& rhs) {
+    return CT(lhs) | CT(rhs);
+}
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>> constexpr operator^(
+        const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs) {
+    return std::common_type_t<integer<Bits, Signed>,
+                              integer<Bits2, Signed2>>::_impl::operator_circumflex(lhs, rhs);
+}
+template <typename Integral, typename Integral2, class>
+std::common_type_t<Integral, Integral2> constexpr operator^(const Integral& lhs,
+                                                            const Integral2& rhs) {
+    return CT(lhs) ^ CT(rhs);
+}
+
+template <size_t Bits, typename Signed>
+constexpr integer<Bits, Signed> operator<<(const integer<Bits, Signed>& lhs, int n) noexcept {
+    if (static_cast<size_t>(n) >= Bits) {
+        return integer<Bits, Signed>(0);
+    }
+    if (n <= 0) {
+        return lhs;
+    }
+    return integer<Bits, Signed>::_impl::shift_left(lhs, n);
+}
+template <size_t Bits, typename Signed>
+constexpr integer<Bits, Signed> operator>>(const integer<Bits, Signed>& lhs, int n) noexcept {
+    if (static_cast<size_t>(n) >= Bits) {
+        return integer<Bits, Signed>(0);
+    }
+    if (n <= 0) {
+        return lhs;
+    }
+    return integer<Bits, Signed>::_impl::shift_right(lhs, n);
+}
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+constexpr bool operator<(const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs) {
+    return std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>>::_impl::operator_less(
+            lhs, rhs);
+}
+template <typename Arithmetic, typename Arithmetic2, class>
+constexpr bool operator<(const Arithmetic& lhs, const Arithmetic2& rhs) {
+    return CT(lhs) < CT(rhs);
+}
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+constexpr bool operator>(const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs) {
+    return std::common_type_t<integer<Bits, Signed>,
+                              integer<Bits2, Signed2>>::_impl::operator_greater(lhs, rhs);
+}
+template <typename Arithmetic, typename Arithmetic2, class>
+constexpr bool operator>(const Arithmetic& lhs, const Arithmetic2& rhs) {
+    return CT(lhs) > CT(rhs);
+}
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+constexpr bool operator<=(const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs) {
+    return std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>>::_impl::operator_less(
+                   lhs, rhs) ||
+           std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>>::_impl::operator_eq(
+                   lhs, rhs);
+}
+template <typename Arithmetic, typename Arithmetic2, class>
+constexpr bool operator<=(const Arithmetic& lhs, const Arithmetic2& rhs) {
+    return CT(lhs) <= CT(rhs);
+}
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+constexpr bool operator>=(const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs) {
+    return std::common_type_t<integer<Bits, Signed>,
+                              integer<Bits2, Signed2>>::_impl::operator_greater(lhs, rhs) ||
+           std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>>::_impl::operator_eq(
+                   lhs, rhs);
+}
+template <typename Arithmetic, typename Arithmetic2, class>
+constexpr bool operator>=(const Arithmetic& lhs, const Arithmetic2& rhs) {
+    return CT(lhs) >= CT(rhs);
+}
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+constexpr bool operator==(const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs) {
+    return std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>>::_impl::operator_eq(
+            lhs, rhs);
+}
+template <typename Arithmetic, typename Arithmetic2, class>
+constexpr bool operator==(const Arithmetic& lhs, const Arithmetic2& rhs) {
+    return CT(lhs) == CT(rhs);
+}
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+constexpr bool operator!=(const integer<Bits, Signed>& lhs, const integer<Bits2, Signed2>& rhs) {
+    return !std::common_type_t<integer<Bits, Signed>, integer<Bits2, Signed2>>::_impl::operator_eq(
+            lhs, rhs);
+}
+template <typename Arithmetic, typename Arithmetic2, class>
+constexpr bool operator!=(const Arithmetic& lhs, const Arithmetic2& rhs) {
+    return CT(lhs) != CT(rhs);
+}
+
+#undef CT
+
+} // namespace wide
+
+namespace std {
+
+template <size_t Bits, typename Signed>
+struct hash<wide::integer<Bits, Signed>> {
+    std::size_t operator()(const wide::integer<Bits, Signed>& lhs) const {
+        static_assert(Bits % (sizeof(size_t) * 8) == 0);
+
+        const auto* ptr = reinterpret_cast<const size_t*>(lhs.items);
+        unsigned count = Bits / (sizeof(size_t) * 8);
+
+        size_t res = 0;
+        for (unsigned i = 0; i < count; ++i) {
+            res ^= ptr[i];
+        }
+        return res;
+    }
+};
+
+} // namespace std
+
+// NOLINTEND(*)
diff --git a/be/src/vec/core/wide_integer_to_string.h b/be/src/vec/core/wide_integer_to_string.h
new file mode 100644
index 0000000000..9d7c28e323
--- /dev/null
+++ b/be/src/vec/core/wide_integer_to_string.h
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/base/base/wide_integer_to_string.h
+// and modified by Doris
+#pragma once
+
+#include <fmt/format.h>
+
+#include <ostream>
+#include <string>
+
+#include "wide_integer.h"
+
+namespace wide {
+
+template <size_t Bits, typename Signed>
+inline std::string to_string(const integer<Bits, Signed>& n) {
+    std::string res;
+    if (integer<Bits, Signed>::_impl::operator_eq(n, 0U)) {
+        return "0";
+    }
+
+    integer<Bits, unsigned> t;
+    bool is_neg = integer<Bits, Signed>::_impl::is_negative(n);
+    if (is_neg) {
+        t = integer<Bits, Signed>::_impl::operator_unary_minus(n);
+    } else {
+        t = n;
+    }
+
+    while (!integer<Bits, unsigned>::_impl::operator_eq(t, 0U)) {
+        res.insert(res.begin(),
+                   '0' + char(integer<Bits, unsigned>::_impl::operator_percent(t, 10U)));
+        t = integer<Bits, unsigned>::_impl::operator_slash(t, 10U);
+    }
+
+    if (is_neg) {
+        res.insert(res.begin(), '-');
+    }
+    return res;
+}
+
+} // namespace wide
+
+template <size_t Bits, typename Signed>
+std::ostream& operator<<(std::ostream& out, const wide::integer<Bits, Signed>& value) {
+    return out << to_string(value);
+}
+
+/// See https://fmt.dev/latest/api.html#formatting-user-defined-types
+template <size_t Bits, typename Signed>
+struct fmt::formatter<wide::integer<Bits, Signed>> {
+    constexpr auto parse(format_parse_context& ctx) {
+        const auto* it = ctx.begin();
+        const auto* end = ctx.end();
+
+        /// Only support {}.
+        if (it != end && *it != '}') {
+            throw format_error("invalid format");
+        }
+
+        return it;
+    }
+
+    template <typename FormatContext>
+    auto format(const wide::integer<Bits, Signed>& value, FormatContext& ctx) {
+        return fmt::format_to(ctx.out(), "{}", to_string(value));
+    }
+};
diff --git a/be/src/vec/data_types/convert_field_to_type.cpp b/be/src/vec/data_types/convert_field_to_type.cpp
index ba49257898..b5c4263181 100644
--- a/be/src/vec/data_types/convert_field_to_type.cpp
+++ b/be/src/vec/data_types/convert_field_to_type.cpp
@@ -82,6 +82,9 @@ public:
     [[noreturn]] String operator()(const DecimalField<Decimal128I>& x) const {
         LOG(FATAL) << "not implemeted";
     }
+    [[noreturn]] String operator()(const DecimalField<Decimal256>& x) const {
+        LOG(FATAL) << "not implemeted";
+    }
 };
 
 namespace {
diff --git a/be/src/vec/data_types/data_type.cpp b/be/src/vec/data_types/data_type.cpp
index 8b7a094dcf..48d37b38c3 100644
--- a/be/src/vec/data_types/data_type.cpp
+++ b/be/src/vec/data_types/data_type.cpp
@@ -139,6 +139,8 @@ PGenericType_TypeId IDataType::get_pdata_type(const IDataType* data_type) {
         return PGenericType::DECIMAL128;
     case TypeIndex::Decimal128I:
         return PGenericType::DECIMAL128I;
+    case TypeIndex::Decimal256:
+        return PGenericType::DECIMAL256;
     case TypeIndex::String:
         return PGenericType::STRING;
     case TypeIndex::Date:
diff --git a/be/src/vec/data_types/data_type.h b/be/src/vec/data_types/data_type.h
index 6ccc61b25e..fda34cc84a 100644
--- a/be/src/vec/data_types/data_type.h
+++ b/be/src/vec/data_types/data_type.h
@@ -286,8 +286,10 @@ struct WhichDataType {
     bool is_decimal64() const { return idx == TypeIndex::Decimal64; }
     bool is_decimal128() const { return idx == TypeIndex::Decimal128; }
     bool is_decimal128i() const { return idx == TypeIndex::Decimal128I; }
+    bool is_decimal256() const { return idx == TypeIndex::Decimal256; }
     bool is_decimal() const {
-        return is_decimal32() || is_decimal64() || is_decimal128() || is_decimal128i();
+        return is_decimal32() || is_decimal64() || is_decimal128() || is_decimal128i() ||
+               is_decimal256();
     }
 
     bool is_float32() const { return idx == TypeIndex::Float32; }
diff --git a/be/src/vec/data_types/data_type_decimal.cpp b/be/src/vec/data_types/data_type_decimal.cpp
index 2f71ee736c..f69d169179 100644
--- a/be/src/vec/data_types/data_type_decimal.cpp
+++ b/be/src/vec/data_types/data_type_decimal.cpp
@@ -35,6 +35,7 @@
 #include "vec/common/int_exp.h"
 #include "vec/common/string_buffer.hpp"
 #include "vec/common/typeid_cast.h"
+#include "vec/core/types.h"
 #include "vec/io/io_helper.h"
 #include "vec/io/reader_buffer.h"
 
@@ -166,10 +167,10 @@ bool DataTypeDecimal<T>::parse_from_string(const std::string& str, T* res) const
 
 DataTypePtr create_decimal(UInt64 precision_value, UInt64 scale_value, bool use_v2) {
     if (precision_value < min_decimal_precision() ||
-        precision_value > max_decimal_precision<Decimal128>()) {
+        precision_value > max_decimal_precision<Decimal256>()) {
         throw doris::Exception(doris::ErrorCode::NOT_IMPLEMENTED_ERROR,
                                "Wrong precision {}, min: {}, max: {}", precision_value,
-                               min_decimal_precision(), max_decimal_precision<Decimal128>());
+                               min_decimal_precision(), max_decimal_precision<Decimal256>());
     }
 
     if (static_cast<UInt64>(scale_value) > precision_value) {
@@ -187,8 +188,10 @@ DataTypePtr create_decimal(UInt64 precision_value, UInt64 scale_value, bool use_
         return std::make_shared<DataTypeDecimal<Decimal32>>(precision_value, scale_value);
     } else if (precision_value <= max_decimal_precision<Decimal64>()) {
         return std::make_shared<DataTypeDecimal<Decimal64>>(precision_value, scale_value);
+    } else if (precision_value <= max_decimal_precision<Decimal128I>()) {
+        return std::make_shared<DataTypeDecimal<Decimal128I>>(precision_value, scale_value);
     }
-    return std::make_shared<DataTypeDecimal<Decimal128I>>(precision_value, scale_value);
+    return std::make_shared<DataTypeDecimal<Decimal256>>(precision_value, scale_value);
 }
 
 template <>
@@ -211,10 +214,16 @@ Decimal128I DataTypeDecimal<Decimal128I>::get_scale_multiplier(UInt32 scale) {
     return common::exp10_i128(scale);
 }
 
+template <>
+Decimal256 DataTypeDecimal<Decimal256>::get_scale_multiplier(UInt32 scale) {
+    return Decimal256(common::exp10_i256(scale));
+}
+
 /// Explicit template instantiations.
 template class DataTypeDecimal<Decimal32>;
 template class DataTypeDecimal<Decimal64>;
 template class DataTypeDecimal<Decimal128>;
 template class DataTypeDecimal<Decimal128I>;
+template class DataTypeDecimal<Decimal256>;
 
 } // namespace doris::vectorized
diff --git a/be/src/vec/data_types/data_type_decimal.h b/be/src/vec/data_types/data_type_decimal.h
index 7bc07c90cf..c7128c9b82 100644
--- a/be/src/vec/data_types/data_type_decimal.h
+++ b/be/src/vec/data_types/data_type_decimal.h
@@ -34,6 +34,7 @@
 
 // IWYU pragma: no_include <opentelemetry/common/threadlocal.h>
 #include "common/compiler_util.h" // IWYU pragma: keep
+#include "common/consts.h"
 #include "common/logging.h"
 #include "common/status.h"
 #include "olap/olap_common.h"
@@ -75,19 +76,23 @@ constexpr size_t max_decimal_precision() {
 }
 template <>
 constexpr size_t max_decimal_precision<Decimal32>() {
-    return 9;
+    return BeConsts::MAX_DECIMAL32_PRECISION;
 }
 template <>
 constexpr size_t max_decimal_precision<Decimal64>() {
-    return 18;
+    return BeConsts::MAX_DECIMAL64_PRECISION;
 }
 template <>
 constexpr size_t max_decimal_precision<Decimal128>() {
-    return 38;
+    return BeConsts::MAX_DECIMAL128_PRECISION;
 }
 template <>
 constexpr size_t max_decimal_precision<Decimal128I>() {
-    return 38;
+    return BeConsts::MAX_DECIMAL128_PRECISION;
+}
+template <>
+constexpr size_t max_decimal_precision<Decimal256>() {
+    return BeConsts::MAX_DECIMAL256_PRECISION;
 }
 
 DataTypePtr create_decimal(UInt64 precision, UInt64 scale, bool use_v2);
@@ -154,6 +159,8 @@ public:
             desc = TypeDescriptor(TYPE_DECIMAL64);
         } else if constexpr (std::is_same_v<TypeId<T>, TypeId<Decimal128I>>) {
             desc = TypeDescriptor(TYPE_DECIMAL128I);
+        } else if constexpr (std::is_same_v<TypeId<T>, TypeId<Decimal256>>) {
+            desc = TypeDescriptor(TYPE_DECIMAL256);
         } else {
             desc = TypeDescriptor(TYPE_DECIMALV2);
         }
@@ -172,6 +179,9 @@ public:
         if constexpr (std::is_same_v<TypeId<T>, TypeId<Decimal128I>>) {
             return TPrimitiveType::DECIMAL128I;
         }
+        if constexpr (std::is_same_v<TypeId<T>, TypeId<Decimal256>>) {
+            return TPrimitiveType::DECIMAL256;
+        }
         LOG(FATAL) << "__builtin_unreachable";
         __builtin_unreachable();
     }
@@ -258,7 +268,7 @@ public:
         return x % get_scale_multiplier();
     }
 
-    T max_whole_value() const { return get_scale_multiplier(max_precision() - scale) - 1; }
+    T max_whole_value() const { return get_scale_multiplier(max_precision() - scale) - T(1); }
 
     bool can_store_whole(T x) const {
         T max = max_whole_value();
@@ -330,12 +340,12 @@ DataTypePtr decimal_result_type(const DataTypeDecimal<T>& tx, const DataTypeDeci
                 scale + 1;
         if (is_multiply) {
             scale = tx.get_scale() + ty.get_scale();
-            precision = std::min(multiply_precision, max_decimal_precision<Decimal128I>());
+            precision = std::min(multiply_precision, max_decimal_precision<Decimal256>());
         } else if (is_divide) {
             scale = tx.get_scale();
-            precision = std::min(divide_precision, max_decimal_precision<Decimal128I>());
+            precision = std::min(divide_precision, max_decimal_precision<Decimal256>());
         } else if (is_plus_minus) {
-            precision = std::min(plus_minus_precision, max_decimal_precision<Decimal128I>());
+            precision = std::min(plus_minus_precision, max_decimal_precision<Decimal256>());
         }
         return create_decimal(precision, scale, false);
     }
@@ -359,6 +369,9 @@ inline UInt32 get_decimal_scale(const IDataType& data_type, UInt32 default_value
     if (auto* decimal_type = check_decimal<Decimal128I>(data_type)) {
         return decimal_type->get_scale();
     }
+    if (auto* decimal_type = check_decimal<Decimal256>(data_type)) {
+        return decimal_type->get_scale();
+    }
     return default_value;
 }
 
@@ -374,6 +387,8 @@ template <>
 inline constexpr bool IsDataTypeDecimal<DataTypeDecimal<Decimal128>> = true;
 template <>
 inline constexpr bool IsDataTypeDecimal<DataTypeDecimal<Decimal128I>> = true;
+template <>
+inline constexpr bool IsDataTypeDecimal<DataTypeDecimal<Decimal256>> = true;
 
 template <typename DataType>
 constexpr bool IsDataTypeDecimalV2 = false;
@@ -385,6 +400,11 @@ constexpr bool IsDataTypeDecimal128I = false;
 template <>
 inline constexpr bool IsDataTypeDecimal128I<DataTypeDecimal<Decimal128I>> = true;
 
+template <typename DataType>
+constexpr bool IsDataTypeDecimal256 = false;
+template <>
+inline constexpr bool IsDataTypeDecimal256<DataTypeDecimal<Decimal256>> = true;
+
 template <typename DataType>
 constexpr bool IsDataTypeDecimalOrNumber =
         IsDataTypeDecimal<DataType> || IsDataTypeNumber<DataType>;
@@ -418,7 +438,8 @@ ToDataType::FieldType convert_decimals(const typename FromDataType::FieldType& v
         }
     } else {
         converted_value =
-                value / DataTypeDecimal<MaxFieldType>::get_scale_multiplier(scale_from - scale_to);
+                static_cast<MaxFieldType>(value) /
+                DataTypeDecimal<MaxFieldType>::get_scale_multiplier(scale_from - scale_to);
     }
 
     if constexpr (sizeof(FromFieldType) > sizeof(ToFieldType)) {
@@ -461,8 +482,9 @@ void convert_decimal_cols(
                 DataTypeDecimal<MaxFieldType>::get_scale_multiplier(scale_to - scale_from);
         MaxNativeType res;
         for (size_t i = 0; i < sz; i++) {
-            if (std::is_same_v<MaxNativeType, Int128>) {
-                if (common::mul_overflow(static_cast<MaxNativeType>(vec_from[i]), multiplier,
+            if constexpr (std::is_same_v<MaxNativeType, Int128> ||
+                          std::is_same_v<MaxNativeType, wide::Int256>) {
+                if (common::mul_overflow(static_cast<MaxNativeType>(vec_from[i].value), multiplier,
                                          res)) {
                     if (overflow_flag) {
                         overflow_flag[i] = 1;
@@ -470,10 +492,10 @@ void convert_decimal_cols(
                     vec_to[i] = res < 0 ? type_limit<ToFieldType>::min()
                                         : type_limit<ToFieldType>::max();
                 } else {
-                    vec_to[i] = res;
+                    vec_to[i] = ToFieldType(res);
                 }
             } else {
-                vec_to[i] = vec_from[i] * multiplier;
+                vec_to[i] = ToFieldType(vec_from[i].value * multiplier);
             }
         }
     } else {
@@ -481,9 +503,9 @@ void convert_decimal_cols(
                 DataTypeDecimal<MaxFieldType>::get_scale_multiplier(scale_from - scale_to);
         for (size_t i = 0; i < sz; i++) {
             if (vec_from[i] >= FromFieldType(0)) {
-                vec_to[i] = (vec_from[i] + multiplier / 2) / multiplier;
+                vec_to[i] = ToFieldType((vec_from[i].value + multiplier / 2) / multiplier);
             } else {
-                vec_to[i] = (vec_from[i] - multiplier / 2) / multiplier;
+                vec_to[i] = ToFieldType((vec_from[i].value - multiplier / 2) / multiplier);
             }
         }
     }
@@ -516,7 +538,8 @@ ToDataType::FieldType convert_from_decimal(const typename FromDataType::FieldTyp
         if constexpr (IsDecimalV2<FromFieldType>) {
             return binary_cast<int128_t, DecimalV2Value>(value);
         } else {
-            return static_cast<ToFieldType>(value) / FromDataType::get_scale_multiplier(scale);
+            return static_cast<ToFieldType>(value.value) /
+                   FromDataType::get_scale_multiplier(scale).value;
         }
     } else {
         FromFieldType converted_value =
@@ -566,7 +589,7 @@ ToDataType::FieldType convert_to_decimal(const typename FromDataType::FieldType&
             VLOG_DEBUG << "Decimal convert overflow. Float is out of Decimal range";
             return type_limit<ToFieldType>::max();
         }
-        return out;
+        return typename ToDataType::FieldType(out);
     } else {
         if constexpr (std::is_same_v<FromFieldType, UInt64>) {
             if (value > static_cast<UInt64>(std::numeric_limits<Int64>::max())) {
@@ -580,14 +603,16 @@ ToDataType::FieldType convert_to_decimal(const typename FromDataType::FieldType&
 template <typename T>
     requires IsDecimalNumber<T>
 typename T::NativeType max_decimal_value(UInt32 precision) {
-    return type_limit<T>::max() / DataTypeDecimal<T>::get_scale_multiplier(
-                                          (UInt32)(max_decimal_precision<T>() - precision));
+    return type_limit<T>::max().value / DataTypeDecimal<T>::get_scale_multiplier(
+                                                (UInt32)(max_decimal_precision<T>() - precision))
+                                                .value;
 }
 
 template <typename T>
     requires IsDecimalNumber<T>
 typename T::NativeType min_decimal_value(UInt32 precision) {
-    return type_limit<T>::min() / DataTypeDecimal<T>::get_scale_multiplier(
-                                          (UInt32)(max_decimal_precision<T>() - precision));
+    return type_limit<T>::min().value / DataTypeDecimal<T>::get_scale_multiplier(
+                                                (UInt32)(max_decimal_precision<T>() - precision))
+                                                .value;
 }
 } // namespace doris::vectorized
diff --git a/be/src/vec/data_types/data_type_factory.cpp b/be/src/vec/data_types/data_type_factory.cpp
index 4ab836141b..b5700cb7f0 100644
--- a/be/src/vec/data_types/data_type_factory.cpp
+++ b/be/src/vec/data_types/data_type_factory.cpp
@@ -187,6 +187,7 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeDescriptor& col_desc, bo
     case TYPE_DECIMAL32:
     case TYPE_DECIMAL64:
     case TYPE_DECIMAL128I:
+    case TYPE_DECIMAL256:
         nested = vectorized::create_decimal(col_desc.precision, col_desc.scale, false);
         break;
     // Just Mock A NULL Type in Vec Exec Engine
@@ -302,6 +303,10 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeIndex& type_index, bool
         nested = std::make_shared<DataTypeDecimal<Decimal128I>>(BeConsts::MAX_DECIMAL128_PRECISION,
                                                                 0);
         break;
+    case TypeIndex::Decimal256:
+        nested = std::make_shared<DataTypeDecimal<Decimal256>>(BeConsts::MAX_DECIMAL256_PRECISION,
+                                                               0);
+        break;
     case TypeIndex::JSONB:
         nested = std::make_shared<vectorized::DataTypeJsonb>();
         break;
@@ -394,6 +399,7 @@ DataTypePtr DataTypeFactory::_create_primitive_data_type(const FieldType& type,
     case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
     case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
     case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
+    case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
         result = vectorized::create_decimal(precision, scale, false);
         break;
     default:
@@ -479,6 +485,10 @@ DataTypePtr DataTypeFactory::create_data_type(const PColumnMeta& pcolumn) {
         nested = std::make_shared<DataTypeDecimal<Decimal128I>>(pcolumn.decimal_param().precision(),
                                                                 pcolumn.decimal_param().scale());
         break;
+    case PGenericType::DECIMAL256:
+        nested = std::make_shared<DataTypeDecimal<Decimal256>>(pcolumn.decimal_param().precision(),
+                                                               pcolumn.decimal_param().scale());
+        break;
     case PGenericType::BITMAP:
         nested = std::make_shared<DataTypeBitMap>();
         break;
diff --git a/be/src/vec/data_types/get_least_supertype.cpp b/be/src/vec/data_types/get_least_supertype.cpp
index be9dd5c05c..9db1271450 100644
--- a/be/src/vec/data_types/get_least_supertype.cpp
+++ b/be/src/vec/data_types/get_least_supertype.cpp
@@ -358,10 +358,12 @@ void get_least_supertype(const DataTypes& types, DataTypePtr* type, bool compati
         UInt32 have_decimal64 = type_ids.count(TypeIndex::Decimal64);
         UInt32 have_decimal128 = type_ids.count(TypeIndex::Decimal128);
         UInt32 have_decimal128i = type_ids.count(TypeIndex::Decimal128I);
+        UInt32 have_decimal256 = type_ids.count(TypeIndex::Decimal256);
 
-        if (have_decimal32 || have_decimal64 || have_decimal128 || have_decimal128i) {
-            UInt32 num_supported =
-                    have_decimal32 + have_decimal64 + have_decimal128 + have_decimal128i;
+        if (have_decimal32 || have_decimal64 || have_decimal128 || have_decimal128i ||
+            have_decimal256) {
+            UInt32 num_supported = have_decimal32 + have_decimal64 + have_decimal128 +
+                                   have_decimal128i + have_decimal256;
 
             std::vector<TypeIndex> int_ids = {
                     TypeIndex::Int8,  TypeIndex::UInt8,  TypeIndex::Int16, TypeIndex::UInt16,
@@ -401,7 +403,7 @@ void get_least_supertype(const DataTypes& types, DataTypePtr* type, bool compati
                     min_precision = DataTypeDecimal<Decimal64>::max_precision();
             }
 
-            if (min_precision > DataTypeDecimal<Decimal128>::max_precision()) {
+            if (min_precision > DataTypeDecimal<Decimal256>::max_precision()) {
                 LOG(INFO) << fmt::format("{} because the least supertype is Decimal({},{})",
                                          get_exception_message_prefix(types), min_precision,
                                          max_scale);
@@ -412,6 +414,11 @@ void get_least_supertype(const DataTypes& types, DataTypePtr* type, bool compati
                                        doris::ErrorCode::INVALID_ARGUMENT);
             }
 
+            if (have_decimal256 || min_precision > DataTypeDecimal<Decimal128I>::max_precision()) {
+                *type = std::make_shared<DataTypeDecimal<Decimal256>>(
+                        DataTypeDecimal<Decimal256>::max_precision(), max_scale);
+                return;
+            }
             if (have_decimal128 || min_precision > DataTypeDecimal<Decimal64>::max_precision()) {
                 *type = std::make_shared<DataTypeDecimal<Decimal128>>(
                         DataTypeDecimal<Decimal128>::max_precision(), max_scale);
diff --git a/be/src/vec/data_types/number_traits.h b/be/src/vec/data_types/number_traits.h
index 8b87e55d93..2d05d65681 100644
--- a/be/src/vec/data_types/number_traits.h
+++ b/be/src/vec/data_types/number_traits.h
@@ -26,6 +26,7 @@
 #include "vec/columns/column_vector.h"
 #include "vec/common/uint128.h"
 #include "vec/core/types.h"
+#include "vec/core/wide_integer.h"
 
 namespace doris::vectorized {
 
@@ -76,6 +77,10 @@ struct Construct<false, false, 16> {
     using Type = Int128;
 };
 template <>
+struct Construct<false, false, 32> {
+    using Type = wide::Int256;
+};
+template <>
 struct Construct<false, true, 1> {
     using Type = Float32;
 };
@@ -112,6 +117,10 @@ struct Construct<true, false, 16> {
     using Type = Int128;
 };
 template <>
+struct Construct<true, false, 32> {
+    using Type = wide::Int256;
+};
+template <>
 struct Construct<true, true, 1> {
     using Type = Float32;
 };
diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
index e6628d8c5b..dae309119b 100644
--- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
@@ -111,6 +111,7 @@ void DataTypeDecimalSerDe<T>::write_column_to_arrow(const IColumn& column, const
             checkArrowStatus(builder.Append(value), column.get_name(),
                              array_builder->type()->name());
         }
+        // TODO: decimal256
     } else if constexpr (std::is_same_v<T, Decimal128I>) {
         std::shared_ptr<arrow::DataType> s_decimal_ptr =
                 std::make_shared<arrow::Decimal128Type>(38, col.get_scale());
@@ -277,5 +278,6 @@ template class DataTypeDecimalSerDe<Decimal32>;
 template class DataTypeDecimalSerDe<Decimal64>;
 template class DataTypeDecimalSerDe<Decimal128>;
 template class DataTypeDecimalSerDe<Decimal128I>;
+template class DataTypeDecimalSerDe<Decimal256>;
 } // namespace vectorized
 } // namespace doris
diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.h b/be/src/vec/data_types/serde/data_type_decimal_serde.h
index 5085d40361..4843a6b90e 100644
--- a/be/src/vec/data_types/serde/data_type_decimal_serde.h
+++ b/be/src/vec/data_types/serde/data_type_decimal_serde.h
@@ -28,6 +28,7 @@
 #include "common/status.h"
 #include "data_type_serde.h"
 #include "olap/olap_common.h"
+#include "runtime/define_primitive_type.h"
 #include "util/jsonb_document.h"
 #include "util/jsonb_writer.h"
 #include "vec/columns/column.h"
@@ -60,6 +61,9 @@ public:
         if constexpr (std::is_same_v<TypeId<T>, TypeId<Decimal128>>) {
             return TYPE_DECIMALV2;
         }
+        if constexpr (std::is_same_v<TypeId<T>, TypeId<Decimal256>>) {
+            return TYPE_DECIMAL256;
+        }
         LOG(FATAL) << "__builtin_unreachable";
         __builtin_unreachable();
     }
@@ -128,6 +132,8 @@ Status DataTypeDecimalSerDe<T>::write_column_to_pb(const IColumn& column, PValue
         ptype->set_id(PGenericType::DECIMAL128);
     } else if constexpr (std::is_same_v<T, Decimal128I>) {
         ptype->set_id(PGenericType::DECIMAL128I);
+    } else if constexpr (std::is_same_v<T, Decimal256>) {
+        ptype->set_id(PGenericType::DECIMAL256);
     } else if constexpr (std::is_same_v<T, Decimal<Int32>>) {
         ptype->set_id(PGenericType::INT32);
     } else if constexpr (std::is_same_v<T, Decimal<Int64>>) {
@@ -143,10 +149,12 @@ Status DataTypeDecimalSerDe<T>::write_column_to_pb(const IColumn& column, PValue
     return Status::OK();
 }
 
+// TODO: decimal256
 template <typename T>
 Status DataTypeDecimalSerDe<T>::read_column_from_pb(IColumn& column, const PValues& arg) const {
     if constexpr (std::is_same_v<T, Decimal<Int128>> || std::is_same_v<T, Decimal128I> ||
-                  std::is_same_v<T, Decimal<Int16>> || std::is_same_v<T, Decimal<Int32>>) {
+                  std::is_same_v<T, Decimal256> || std::is_same_v<T, Decimal<Int16>> ||
+                  std::is_same_v<T, Decimal<Int32>>) {
         column.resize(arg.bytes_value_size());
         auto& data = reinterpret_cast<ColumnDecimal<T>&>(column).get_data();
         for (int i = 0; i < arg.bytes_value_size(); ++i) {
@@ -164,6 +172,7 @@ void DataTypeDecimalSerDe<T>::write_one_cell_to_jsonb(const IColumn& column, Jso
                                                       int row_num) const {
     StringRef data_ref = column.get_data_at(row_num);
     result.writeKey(col_id);
+    // TODO: decimal256
     if constexpr (std::is_same_v<T, Decimal<Int128>>) {
         Decimal128::NativeType val =
                 *reinterpret_cast<const Decimal128::NativeType*>(data_ref.data);
@@ -188,6 +197,7 @@ template <typename T>
 void DataTypeDecimalSerDe<T>::read_one_cell_from_jsonb(IColumn& column,
                                                        const JsonbValue* arg) const {
     auto& col = reinterpret_cast<ColumnDecimal<T>&>(column);
+    // TODO: decimal256
     if constexpr (std::is_same_v<T, Decimal<Int128>>) {
         col.insert_value(static_cast<const JsonbInt128Val*>(arg)->val());
     } else if constexpr (std::is_same_v<T, Decimal128I>) {
diff --git a/be/src/vec/exec/format/parquet/byte_array_dict_decoder.cpp b/be/src/vec/exec/format/parquet/byte_array_dict_decoder.cpp
index 1e09890a98..6f5f36a33a 100644
--- a/be/src/vec/exec/format/parquet/byte_array_dict_decoder.cpp
+++ b/be/src/vec/exec/format/parquet/byte_array_dict_decoder.cpp
@@ -169,6 +169,7 @@ Status ByteArrayDictDecoder::_decode_values(MutableColumnPtr& doris_column, Data
         return _decode_binary_decimal<Int128, has_filter>(doris_column, data_type, select_vector);
     case TypeIndex::Decimal128I:
         return _decode_binary_decimal<Int128, has_filter>(doris_column, data_type, select_vector);
+    // TODO: decimal256
     default:
         break;
     }
diff --git a/be/src/vec/exec/format/parquet/byte_array_plain_decoder.cpp b/be/src/vec/exec/format/parquet/byte_array_plain_decoder.cpp
index 9a032b540b..e91f9f1db9 100644
--- a/be/src/vec/exec/format/parquet/byte_array_plain_decoder.cpp
+++ b/be/src/vec/exec/format/parquet/byte_array_plain_decoder.cpp
@@ -118,6 +118,7 @@ Status ByteArrayPlainDecoder::_decode_values(MutableColumnPtr& doris_column, Dat
         return _decode_binary_decimal<Int128, has_filter>(doris_column, data_type, select_vector);
     case TypeIndex::Decimal128I:
         return _decode_binary_decimal<Int128, has_filter>(doris_column, data_type, select_vector);
+    // TODO: decimal256
     default:
         break;
     }
diff --git a/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp b/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
index a30c2dff3d..35880cfcdd 100644
--- a/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
+++ b/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
@@ -150,6 +150,7 @@ public:
                                                                             select_vector);
             }
             break;
+            // TODO: decimal256
         case TypeIndex::String:
             [[fallthrough]];
         case TypeIndex::FixedString:
@@ -512,6 +513,7 @@ public:
                                                                   select_vector);
             }
             break;
+            // TODO: decimal256
         case TypeIndex::String:
             [[fallthrough]];
         case TypeIndex::FixedString:
diff --git a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp
index af464c1554..8e6f6ebb67 100644
--- a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp
+++ b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp
@@ -173,6 +173,7 @@ Status FixLengthPlainDecoder::_decode_values(MutableColumnPtr& doris_column, Dat
                                                                         select_vector);
         }
         break;
+    // TODO: decimal256
     case TypeIndex::String:
         [[fallthrough]];
     case TypeIndex::FixedString:
diff --git a/be/src/vec/exec/scan/vscan_node.cpp b/be/src/vec/exec/scan/vscan_node.cpp
index 16ef362b96..c4ba02a3df 100644
--- a/be/src/vec/exec/scan/vscan_node.cpp
+++ b/be/src/vec/exec/scan/vscan_node.cpp
@@ -399,6 +399,7 @@ Status VScanNode::_normalize_conjuncts() {
     M(DECIMAL32)                    \
     M(DECIMAL64)                    \
     M(DECIMAL128I)                  \
+    M(DECIMAL256)                   \
     M(DECIMALV2)                    \
     M(BOOLEAN)
             APPLY_FOR_PRIMITIVE_TYPE(M)
@@ -1217,7 +1218,8 @@ Status VScanNode::_change_value_range(ColumnValueRange<PrimitiveType>& temp_rang
                          (PrimitiveType == TYPE_SMALLINT) || (PrimitiveType == TYPE_INT) ||
                          (PrimitiveType == TYPE_BIGINT) || (PrimitiveType == TYPE_LARGEINT) ||
                          (PrimitiveType == TYPE_DECIMAL32) || (PrimitiveType == TYPE_DECIMAL64) ||
-                         (PrimitiveType == TYPE_DECIMAL128I) || (PrimitiveType == TYPE_STRING) ||
+                         (PrimitiveType == TYPE_DECIMAL128I) ||
+                         (PrimitiveType == TYPE_DECIMAL256) || (PrimitiveType == TYPE_STRING) ||
                          (PrimitiveType == TYPE_BOOLEAN) || (PrimitiveType == TYPE_DATEV2)) {
         if constexpr (IsFixed) {
             func(temp_range,
diff --git a/be/src/vec/exec/vjdbc_connector.cpp b/be/src/vec/exec/vjdbc_connector.cpp
index 8209cb3a0b..6c0857f237 100644
--- a/be/src/vec/exec/vjdbc_connector.cpp
+++ b/be/src/vec/exec/vjdbc_connector.cpp
@@ -335,7 +335,8 @@ Status JdbcConnector::_check_type(SlotDescriptor* slot_desc, const std::string&
     case TYPE_DECIMALV2:
     case TYPE_DECIMAL32:
     case TYPE_DECIMAL64:
-    case TYPE_DECIMAL128I: {
+    case TYPE_DECIMAL128I:
+    case TYPE_DECIMAL256: {
         if (type_str != "java.math.BigDecimal") {
             return Status::InternalError(error_msg);
         }
diff --git a/be/src/vec/exprs/vectorized_agg_fn.cpp b/be/src/vec/exprs/vectorized_agg_fn.cpp
index 28c4c97b39..06a776efac 100644
--- a/be/src/vec/exprs/vectorized_agg_fn.cpp
+++ b/be/src/vec/exprs/vectorized_agg_fn.cpp
@@ -203,7 +203,7 @@ Status AggFnEvaluator::prepare(RuntimeState* state, const RowDescriptor& desc,
     } else {
         _function = AggregateFunctionSimpleFactory::instance().get(
                 _fn.name.function_name, argument_types, _data_type->is_nullable(),
-                state->be_exec_version());
+                state->be_exec_version(), state->enable_decima256());
     }
     if (_function == nullptr) {
         return Status::InternalError("Agg Function {} is not implemented", _fn.signature);
diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp
index b483642f9c..109ef5e77d 100644
--- a/be/src/vec/exprs/vexpr.cpp
+++ b/be/src/vec/exprs/vexpr.cpp
@@ -125,6 +125,11 @@ TExprNode create_texpr_node_from(const void* data, const PrimitiveType& type, in
                 create_texpr_literal_node<TYPE_DECIMAL128I>(data, &node, precision, scale));
         break;
     }
+    case TYPE_DECIMAL256: {
+        static_cast<void>(
+                create_texpr_literal_node<TYPE_DECIMAL256>(data, &node, precision, scale));
+        break;
+    }
     case TYPE_CHAR: {
         static_cast<void>(create_texpr_literal_node<TYPE_CHAR>(data, &node));
         break;
diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h
index ad5af0aa5b..64d389bd00 100644
--- a/be/src/vec/exprs/vexpr.h
+++ b/be/src/vec/exprs/vexpr.h
@@ -39,6 +39,7 @@
 #include "vec/columns/column.h"
 #include "vec/core/block.h"
 #include "vec/core/column_with_type_and_name.h"
+#include "vec/core/wide_integer.h"
 #include "vec/data_types/data_type.h"
 #include "vec/exprs/vexpr_fwd.h"
 #include "vec/functions/function.h"
@@ -362,6 +363,13 @@ Status create_texpr_literal_node(const void* data, TExprNode* node, int precisio
         decimal_literal.__set_value(origin_value->to_string(scale));
         (*node).__set_decimal_literal(decimal_literal);
         (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL128I, precision, scale));
+    } else if constexpr (T == TYPE_DECIMAL256) {
+        const auto* origin_value = reinterpret_cast<const vectorized::Decimal<wide::Int256>*>(data);
+        (*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL);
+        TDecimalLiteral decimal_literal;
+        decimal_literal.__set_value(origin_value->to_string(scale));
+        (*node).__set_decimal_literal(decimal_literal);
+        (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL256, precision, scale));
     } else if constexpr (T == TYPE_FLOAT) {
         auto origin_value = reinterpret_cast<const float*>(data);
         (*node).__set_node_type(TExprNodeType::FLOAT_LITERAL);
diff --git a/be/src/vec/functions/array/function_array_apply.cpp b/be/src/vec/functions/array/function_array_apply.cpp
index d05ba904f3..0e9076e65e 100644
--- a/be/src/vec/functions/array/function_array_apply.cpp
+++ b/be/src/vec/functions/array/function_array_apply.cpp
@@ -210,6 +210,8 @@ private:
             *dst = _apply_internal<Decimal128, OP>(src_column, src_offsets, cmp);  \
         } else if (which.is_decimal128i()) {                                       \
             *dst = _apply_internal<Decimal128I, OP>(src_column, src_offsets, cmp); \
+        } else if (which.is_decimal256()) {                                        \
+            *dst = _apply_internal<Decimal256, OP>(src_column, src_offsets, cmp);  \
         } else {                                                                   \
             LOG(FATAL) << "unsupported type " << nested_type->get_name();          \
         }                                                                          \
diff --git a/be/src/vec/functions/array/function_array_difference.h b/be/src/vec/functions/array/function_array_difference.h
index 956cd51fa2..cc2efa64bd 100644
--- a/be/src/vec/functions/array/function_array_difference.h
+++ b/be/src/vec/functions/array/function_array_difference.h
@@ -233,6 +233,9 @@ private:
         } else if (which_type.is_decimal128()) {
             res = _execute_number_expanded<Decimal128, Decimal128>(offsets, *nested_column,
                                                                    nested_null_map);
+        } else if (which_type.is_decimal256()) {
+            res = _execute_number_expanded<Decimal256, Decimal256>(offsets, *nested_column,
+                                                                   nested_null_map);
         } else {
             return nullptr;
         }
diff --git a/be/src/vec/functions/array/function_array_distinct.h b/be/src/vec/functions/array/function_array_distinct.h
index dff894d115..7e5e0a7372 100644
--- a/be/src/vec/functions/array/function_array_distinct.h
+++ b/be/src/vec/functions/array/function_array_distinct.h
@@ -304,6 +304,9 @@ private:
         } else if (which.is_decimal128i()) {
             res = _execute_number<ColumnDecimal128I>(src_column, src_offsets, dest_column,
                                                      dest_offsets, src_null_map, dest_null_map);
+        } else if (which.is_decimal256()) {
+            res = _execute_number<ColumnDecimal256>(src_column, src_offsets, dest_column,
+                                                    dest_offsets, src_null_map, dest_null_map);
         } else if (which.is_decimal128()) {
             res = _execute_number<ColumnDecimal128>(src_column, src_offsets, dest_column,
                                                     dest_offsets, src_null_map, dest_null_map);
diff --git a/be/src/vec/functions/array/function_array_element.h b/be/src/vec/functions/array/function_array_element.h
index 5c7627261f..53c7b9df3d 100644
--- a/be/src/vec/functions/array/function_array_element.h
+++ b/be/src/vec/functions/array/function_array_element.h
@@ -398,6 +398,9 @@ private:
         } else if (which_type.is_decimal128()) {
             res = _execute_number<ColumnDecimal128>(offsets, *nested_column, src_null_map, *idx_col,
                                                     nested_null_map, dst_null_map);
+        } else if (which_type.is_decimal256()) {
+            res = _execute_number<ColumnDecimal256>(offsets, *nested_column, src_null_map, *idx_col,
+                                                    nested_null_map, dst_null_map);
         } else if (which_type.is_string_or_fixed_string()) {
             res = _execute_string(offsets, *nested_column, src_null_map, *idx_col, nested_null_map,
                                   dst_null_map);
diff --git a/be/src/vec/functions/array/function_array_enumerate_uniq.cpp b/be/src/vec/functions/array/function_array_enumerate_uniq.cpp
index ead65b0e1e..f3bbf3c57a 100644
--- a/be/src/vec/functions/array/function_array_enumerate_uniq.cpp
+++ b/be/src/vec/functions/array/function_array_enumerate_uniq.cpp
@@ -198,6 +198,8 @@ public:
                 _execute_number<ColumnDecimal64>(data_columns, *offsets, null_map, dst_values);
             } else if (which.is_decimal128i()) {
                 _execute_number<ColumnDecimal128I>(data_columns, *offsets, null_map, dst_values);
+            } else if (which.is_decimal256()) {
+                _execute_number<ColumnDecimal256>(data_columns, *offsets, null_map, dst_values);
             } else if (which.is_date_time_v2()) {
                 _execute_number<ColumnDateTimeV2>(data_columns, *offsets, null_map, dst_values);
             } else if (which.is_decimal128()) {
diff --git a/be/src/vec/functions/array/function_array_index.h b/be/src/vec/functions/array/function_array_index.h
index a7208df68e..23f251e68c 100644
--- a/be/src/vec/functions/array/function_array_index.h
+++ b/be/src/vec/functions/array/function_array_index.h
@@ -357,6 +357,10 @@ private:
                 return_column = _execute_number_expanded<ColumnDecimal128>(
                         offsets, nested_null_map, *nested_column, *right_column,
                         right_nested_null_map, array_null_map);
+            } else if (left_which_type.is_decimal256()) {
+                return_column = _execute_number_expanded<ColumnDecimal256>(
+                        offsets, nested_null_map, *nested_column, *right_column,
+                        right_nested_null_map, array_null_map);
             }
         } else if ((is_date_or_datetime(right_type) || is_date_v2_or_datetime_v2(right_type)) &&
                    (is_date_or_datetime(left_element_type) ||
diff --git a/be/src/vec/functions/array/function_array_join.h b/be/src/vec/functions/array/function_array_join.h
index d822c45a41..b982964914 100644
--- a/be/src/vec/functions/array/function_array_join.h
+++ b/be/src/vec/functions/array/function_array_join.h
@@ -251,6 +251,9 @@ private:
             res = _execute_number<ColumnDecimal128I>(src_column, src_offsets, src_null_map, sep_str,
                                                      null_replace_str, nested_type,
                                                      dest_column_ptr);
+        } else if (which.is_decimal256()) {
+            res = _execute_number<ColumnDecimal256>(src_column, src_offsets, src_null_map, sep_str,
+                                                    null_replace_str, nested_type, dest_column_ptr);
         } else if (which.is_decimal128()) {
             res = _execute_number<ColumnDecimal128>(src_column, src_offsets, src_null_map, sep_str,
                                                     null_replace_str, nested_type, dest_column_ptr);
diff --git a/be/src/vec/functions/array/function_array_remove.h b/be/src/vec/functions/array/function_array_remove.h
index d60426d430..e1560d06d2 100644
--- a/be/src/vec/functions/array/function_array_remove.h
+++ b/be/src/vec/functions/array/function_array_remove.h
@@ -329,6 +329,9 @@ private:
             } else if (left_which_type.is_decimal128()) {
                 res = _execute_number_expanded<ColumnDecimal128>(offsets, *nested_column,
                                                                  *right_column, nested_null_map);
+            } else if (left_which_type.is_decimal256()) {
+                res = _execute_number_expanded<ColumnDecimal256>(offsets, *nested_column,
+                                                                 *right_column, nested_null_map);
             }
         } else if (is_date_or_datetime(right_type) && is_date_or_datetime(left_element_type)) {
             if (left_which_type.is_date()) {
diff --git a/be/src/vec/functions/array/function_arrays_overlap.h b/be/src/vec/functions/array/function_arrays_overlap.h
index d8c4862ca8..fb138d3edb 100644
--- a/be/src/vec/functions/array/function_arrays_overlap.h
+++ b/be/src/vec/functions/array/function_arrays_overlap.h
@@ -225,6 +225,10 @@ public:
             ret = _execute_internal<ColumnDecimal128>(left_exec_data, right_exec_data,
                                                       dst_null_map_data,
                                                       dst_nested_col->get_data().data());
+        } else if (left_which_type.is_decimal256()) {
+            ret = _execute_internal<ColumnDecimal256>(left_exec_data, right_exec_data,
+                                                      dst_null_map_data,
+                                                      dst_nested_col->get_data().data());
         }
 
         if (ret.ok()) {
diff --git a/be/src/vec/functions/function.h b/be/src/vec/functions/function.h
index aeea5d1df0..5f481aeef3 100644
--- a/be/src/vec/functions/function.h
+++ b/be/src/vec/functions/function.h
@@ -682,11 +682,12 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const Colum
     M(Float32, ColumnFloat32)          \
     M(Float64, ColumnFloat64)
 
-#define DECIMAL_TYPE_TO_COLUMN_TYPE(M)       \
-    M(Decimal32, ColumnDecimal<Decimal32>)   \
-    M(Decimal64, ColumnDecimal<Decimal64>)   \
-    M(Decimal128, ColumnDecimal<Decimal128>) \
-    M(Decimal128I, ColumnDecimal<Decimal128I>)
+#define DECIMAL_TYPE_TO_COLUMN_TYPE(M)         \
+    M(Decimal32, ColumnDecimal<Decimal32>)     \
+    M(Decimal64, ColumnDecimal<Decimal64>)     \
+    M(Decimal128, ColumnDecimal<Decimal128>)   \
+    M(Decimal128I, ColumnDecimal<Decimal128I>) \
+    M(Decimal256, ColumnDecimal<Decimal256>)
 
 #define STRING_TYPE_TO_COLUMN_TYPE(M) \
     M(String, ColumnString)           \
diff --git a/be/src/vec/functions/function_binary_arithmetic.h b/be/src/vec/functions/function_binary_arithmetic.h
index 122d5b01e9..7198a08bb2 100644
--- a/be/src/vec/functions/function_binary_arithmetic.h
+++ b/be/src/vec/functions/function_binary_arithmetic.h
@@ -79,7 +79,7 @@ struct OperationTraits {
             std::is_same_v<Op, DivideIntegralImpl<T, T>>;
     static constexpr bool can_overflow =
             (is_plus_minus || is_multiply) &&
-            (IsDecimalV2<OpA> || IsDecimalV2<OpB> || IsDecimal128I<OpA> || IsDecimal128I<OpB>);
+            (IsDecimalV2<OpA> || IsDecimalV2<OpB> || IsDecimal256<OpA> || IsDecimal256<OpB>);
     static constexpr bool has_variadic_argument =
             !std::is_void_v<decltype(has_variadic_argument_types(std::declval<Op>()))>;
 };
@@ -239,7 +239,7 @@ struct DecimalBinaryOperation {
             Op::vector_vector(a, b, c, size);
         } else {
             for (size_t i = 0; i < size; i++) {
-                c[i] = apply(a[i], b[i]);
+                c[i] = typename ArrayC::value_type(apply(a[i], b[i]));
             }
         }
     }
@@ -251,11 +251,20 @@ struct DecimalBinaryOperation {
         if constexpr (IsDecimalV2<B> || IsDecimalV2<A>) {
             /// default: use it if no return before
             for (size_t i = 0; i < size; ++i) {
-                c[i] = apply(a[i], b[i], null_map[i]);
+                c[i] = typename ArrayC::value_type(apply(a[i], b[i], null_map[i]));
             }
         } else if constexpr (OpTraits::is_division && (IsDecimalNumber<B> || IsDecimalNumber<A>)) {
             for (size_t i = 0; i < size; ++i) {
-                c[i] = apply_scaled_div(a[i], b[i], null_map[i]);
+                if constexpr (IsDecimalNumber<B> && IsDecimalNumber<A>) {
+                    c[i] = typename ArrayC::value_type(
+                            apply_scaled_div(a[i].value, b[i].value, null_map[i]));
+                } else if constexpr (IsDecimalNumber<A>) {
+                    c[i] = typename ArrayC::value_type(
+                            apply_scaled_div(a[i].value, b[i], null_map[i]));
+                } else {
+                    c[i] = typename ArrayC::value_type(
+                            apply_scaled_div(a[i], b[i].value, null_map[i]));
+                }
             }
         } else if constexpr ((OpTraits::is_multiply || OpTraits::is_plus_minus) &&
                              (IsDecimalNumber<B> || IsDecimalNumber<A>)) {
@@ -264,7 +273,7 @@ struct DecimalBinaryOperation {
             }
         } else {
             for (size_t i = 0; i < size; ++i) {
-                c[i] = apply(a[i], b[i], null_map[i]);
+                c[i] = typename ArrayC::value_type(apply(a[i], b[i], null_map[i]));
             }
         }
     }
@@ -273,14 +282,14 @@ struct DecimalBinaryOperation {
                                 typename ArrayC::value_type* c, size_t size) {
         if constexpr (OpTraits::is_division && IsDecimalNumber<B>) {
             for (size_t i = 0; i < size; ++i) {
-                c[i] = apply_scaled_div(a[i], b);
+                c[i] = typename ArrayC::value_type(apply_scaled_div(a[i], b));
             }
             return;
         }
 
         /// default: use it if no return before
         for (size_t i = 0; i < size; ++i) {
-            c[i] = apply(a[i], b);
+            c[i] = typename ArrayC::value_type(apply(a[i], b));
         }
     }
 
@@ -288,7 +297,7 @@ struct DecimalBinaryOperation {
                                 typename ArrayC::value_type* c, NullMap& null_map, size_t size) {
         if constexpr (OpTraits::is_division && IsDecimalNumber<B>) {
             for (size_t i = 0; i < size; ++i) {
-                c[i] = apply_scaled_div(a[i], b, null_map[i]);
+                c[i] = typename ArrayC::value_type(apply_scaled_div(a[i], b.value, null_map[i]));
             }
         } else if constexpr ((OpTraits::is_multiply || OpTraits::is_plus_minus) &&
                              (IsDecimalNumber<B> || IsDecimalNumber<A>)) {
@@ -297,7 +306,7 @@ struct DecimalBinaryOperation {
             }
         } else {
             for (size_t i = 0; i < size; ++i) {
-                c[i] = apply(a[i], b, null_map[i]);
+                c[i] = typename ArrayC::value_type(apply(a[i], b, null_map[i]));
             }
         }
     }
@@ -307,11 +316,12 @@ struct DecimalBinaryOperation {
         if constexpr (IsDecimalV2<A> || IsDecimalV2<B>) {
             DecimalV2Value da(a);
             for (size_t i = 0; i < size; ++i) {
-                c[i] = Op::template apply(da, DecimalV2Value(b[i])).value();
+                c[i] = typename ArrayC::value_type(
+                        Op::template apply(da, DecimalV2Value(b[i])).value());
             }
         } else {
             for (size_t i = 0; i < size; ++i) {
-                c[i] = apply(a, b[i]);
+                c[i] = typename ArrayC::value_type(apply(a, b[i]));
             }
         }
     }
@@ -320,7 +330,7 @@ struct DecimalBinaryOperation {
                                 typename ArrayC::value_type* c, NullMap& null_map, size_t size) {
         if constexpr (OpTraits::is_division && IsDecimalNumber<B>) {
             for (size_t i = 0; i < size; ++i) {
-                c[i] = apply_scaled_div(a, b[i], null_map[i]);
+                c[i] = typename ArrayC::value_type(apply_scaled_div(a, b[i].value, null_map[i]));
             }
         } else if constexpr ((OpTraits::is_multiply || OpTraits::is_plus_minus) &&
                              (IsDecimalNumber<B> || IsDecimalNumber<A>)) {
@@ -329,23 +339,27 @@ struct DecimalBinaryOperation {
             }
         } else {
             for (size_t i = 0; i < size; ++i) {
-                c[i] = apply(a, b[i], null_map[i]);
+                c[i] = typename ArrayC::value_type(apply(a, b[i], null_map[i]));
             }
         }
     }
 
-    static ResultType constant_constant(A a, B b) { return apply(a, b); }
+    static ResultType constant_constant(A a, B b) { return ResultType(apply(a, b)); }
 
     static ResultType constant_constant(A a, B b, UInt8& is_null) {
         if constexpr (OpTraits::is_division && IsDecimalNumber<B>) {
-            return apply_scaled_div(a, b, is_null);
+            if constexpr (IsDecimalNumber<A>) {
+                return ResultType(apply_scaled_div(a.value, b.value, is_null));
+            } else {
+                return ResultType(apply_scaled_div(a, b.value, is_null));
+            }
         } else if constexpr ((OpTraits::is_multiply || OpTraits::is_plus_minus) &&
                              (IsDecimalNumber<B> || IsDecimalNumber<A>)) {
             NativeResultType res;
             is_null = apply_op_safely(a, b, res);
-            return res;
+            return ResultType(res);
         } else {
-            return apply(a, b, is_null);
+            return ResultType(apply(a, b, is_null));
         }
     }
 
@@ -459,7 +473,7 @@ private:
                 NativeResultType res;
                 // TODO handle overflow gracefully
                 if (Op::template apply<NativeResultType>(a, b, res)) {
-                    res = type_limit<ResultType>::max();
+                    res = type_limit<ResultType>::max().value;
                 }
                 return res;
             } else {
@@ -475,7 +489,7 @@ private:
             DecimalV2Value l(a);
             DecimalV2Value r(b);
             auto ans = Op::template apply(l, r, is_null);
-            NativeResultType result;
+            NativeResultType result {};
             memcpy(&result, &ans, std::min(sizeof(result), sizeof(ans)));
             return result;
         } else {
@@ -483,32 +497,6 @@ private:
         }
     }
 
-    static NativeResultType apply_scaled(NativeResultType a, NativeResultType b) {
-        if constexpr (OpTraits::is_plus_minus) {
-            NativeResultType res;
-
-            if constexpr (check_overflow) {
-                bool overflow = false;
-
-                if constexpr (OpTraits::can_overflow) {
-                    overflow |= Op::template apply<NativeResultType>(a, b, res);
-                } else {
-                    res = Op::template apply<NativeResultType>(a, b);
-                }
-
-                // TODO handle overflow gracefully
-                if (overflow) {
-                    LOG(WARNING) << "Decimal math overflow";
-                    res = type_limit<ResultType>::max();
-                }
-            } else {
-                res = apply(a, b);
-            }
-
-            return res;
-        }
-    }
-
     static NativeResultType apply_scaled_div(NativeResultType a, NativeResultType b,
                                              UInt8& is_null) {
         if constexpr (OpTraits::is_division) {
@@ -559,6 +547,15 @@ inline constexpr bool IsIntegral<DataTypeInt128> = true;
 template <typename A, typename B>
 constexpr bool UseLeftDecimal = false;
 template <>
+inline constexpr bool UseLeftDecimal<DataTypeDecimal<Decimal256>, DataTypeDecimal<Decimal32>> =
+        true;
+template <>
+inline constexpr bool UseLeftDecimal<DataTypeDecimal<Decimal256>, DataTypeDecimal<Decimal64>> =
+        true;
+template <>
+inline constexpr bool UseLeftDecimal<DataTypeDecimal<Decimal256>, DataTypeDecimal<Decimal128I>> =
+        true;
+template <>
 inline constexpr bool UseLeftDecimal<DataTypeDecimal<Decimal128I>, DataTypeDecimal<Decimal32>> =
         true;
 template <>
@@ -725,8 +722,8 @@ class FunctionBinaryArithmetic : public IFunction {
         return cast_type_to_either<DataTypeUInt8, DataTypeInt8, DataTypeInt16, DataTypeInt32,
                                    DataTypeInt64, DataTypeInt128, DataTypeFloat32, DataTypeFloat64,
                                    DataTypeDecimal<Decimal32>, DataTypeDecimal<Decimal64>,
-                                   DataTypeDecimal<Decimal128>, DataTypeDecimal<Decimal128I>>(
-                type, std::forward<F>(f));
+                                   DataTypeDecimal<Decimal128>, DataTypeDecimal<Decimal128I>,
+                                   DataTypeDecimal<Decimal256>>(type, std::forward<F>(f));
     }
 
     template <typename F>
diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h
index 5f0c7d2a3d..55fc8d178b 100644
--- a/be/src/vec/functions/function_cast.h
+++ b/be/src/vec/functions/function_cast.h
@@ -817,6 +817,9 @@ struct NameToDecimal128 {
 struct NameToDecimal128I {
     static constexpr auto name = "toDecimal128I";
 };
+struct NameToDecimal256 {
+    static constexpr auto name = "toDecimal256";
+};
 struct NameToUInt8 {
     static constexpr auto name = "toUInt8";
 };
@@ -930,6 +933,12 @@ StringParser::ParseResult try_parse_decimal_impl(typename DataType::FieldType& x
         UInt32 precision = ((PrecisionScaleArg)additions).precision;
         return try_read_decimal_text<TYPE_DECIMAL128I>(x, rb, precision, scale);
     }
+
+    if constexpr (IsDataTypeDecimal256<DataType>) {
+        UInt32 scale = ((PrecisionScaleArg)additions).scale;
+        UInt32 precision = ((PrecisionScaleArg)additions).precision;
+        return try_read_decimal_text<TYPE_DECIMAL256>(x, rb, precision, scale);
+    }
 }
 
 /// Monotonicity.
@@ -1094,9 +1103,6 @@ public:
     using Monotonic = MonotonicityImpl;
 
     static constexpr auto name = Name::name;
-    static constexpr bool to_decimal =
-            std::is_same_v<Name, NameToDecimal32> || std::is_same_v<Name, NameToDecimal64> ||
-            std::is_same_v<Name, NameToDecimal128> || std::is_same_v<Name, NameToDecimal128I>;
 
     static FunctionPtr create() { return std::make_shared<FunctionConvert>(); }
 
@@ -1203,6 +1209,8 @@ using FunctionToDecimal128 =
         FunctionConvert<DataTypeDecimal<Decimal128>, NameToDecimal128, UnknownMonotonicity>;
 using FunctionToDecimal128I =
         FunctionConvert<DataTypeDecimal<Decimal128I>, NameToDecimal128I, UnknownMonotonicity>;
+using FunctionToDecimal256 =
+        FunctionConvert<DataTypeDecimal<Decimal256>, NameToDecimal256, UnknownMonotonicity>;
 using FunctionToDate = FunctionConvert<DataTypeDate, NameToDate, UnknownMonotonicity>;
 using FunctionToDateTime = FunctionConvert<DataTypeDateTime, NameToDateTime, UnknownMonotonicity>;
 using FunctionToDateV2 = FunctionConvert<DataTypeDateV2, NameToDate, UnknownMonotonicity>;
@@ -1273,6 +1281,10 @@ struct FunctionTo<DataTypeDecimal<Decimal128I>> {
     using Type = FunctionToDecimal128I;
 };
 template <>
+struct FunctionTo<DataTypeDecimal<Decimal256>> {
+    using Type = FunctionToDecimal256;
+};
+template <>
 struct FunctionTo<DataTypeDate> {
     using Type = FunctionToDate;
 };
@@ -1430,6 +1442,9 @@ struct ConvertImpl<DataTypeString, DataTypeDecimal<Decimal128>, Name>
 template <typename Name>
 struct ConvertImpl<DataTypeString, DataTypeDecimal<Decimal128I>, Name>
         : ConvertThroughParsing<DataTypeString, DataTypeDecimal<Decimal128I>, Name> {};
+template <typename Name>
+struct ConvertImpl<DataTypeString, DataTypeDecimal<Decimal256>, Name>
+        : ConvertThroughParsing<DataTypeString, DataTypeDecimal<Decimal256>, Name> {};
 
 template <typename ToDataType, typename Name>
 class FunctionConvertFromString : public IFunction {
@@ -2093,7 +2108,8 @@ private:
             if constexpr (std::is_same_v<ToDataType, DataTypeDecimal<Decimal32>> ||
                           std::is_same_v<ToDataType, DataTypeDecimal<Decimal64>> ||
                           std::is_same_v<ToDataType, DataTypeDecimal<Decimal128>> ||
-                          std::is_same_v<ToDataType, DataTypeDecimal<Decimal128I>>) {
+                          std::is_same_v<ToDataType, DataTypeDecimal<Decimal128I>> ||
+                          std::is_same_v<ToDataType, DataTypeDecimal<Decimal256>>) {
                 ret = create_decimal_wrapper(from_type,
                                              check_and_get_data_type<ToDataType>(to_type.get()));
                 return true;
diff --git a/be/src/vec/functions/function_multi_same_args.h b/be/src/vec/functions/function_multi_same_args.h
index a43b31c05f..a0cd4e01ed 100644
--- a/be/src/vec/functions/function_multi_same_args.h
+++ b/be/src/vec/functions/function_multi_same_args.h
@@ -18,7 +18,6 @@
 #pragma once
 
 #include "udf/udf.h"
-#include "vec/data_types/get_least_supertype.h"
 #include "vec/functions/function_helpers.h"
 #include "vec/functions/simple_function_factory.h"
 #include "vec/utils/template_helpers.hpp"
diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h
index 69c2e3a528..aa76dafd20 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -2423,12 +2423,33 @@ struct MoneyFormatDecimalImpl {
                     frac_part = frac_part * multiplier;
                 }
 
-                StringRef str = MoneyFormat::do_money_format<int64_t, 26>(
+                StringRef str = MoneyFormat::do_money_format<__int128, 53>(
                         context, decimal128_column->get_whole_part(i), frac_part);
 
                 result_column->insert_data(str.data, str.size);
             }
         }
+        // TODO: decimal256
+        /* else if (auto* decimal256_column =
+                           check_and_get_column<ColumnDecimal<Decimal256>>(*col_ptr)) {
+            const UInt32 scale = decimal256_column->get_scale();
+            const auto multiplier =
+                    scale > 2 ? common::exp10_i32(scale - 2) : common::exp10_i32(2 - scale);
+            for (size_t i = 0; i < input_rows_count; i++) {
+                Decimal256 frac_part = decimal256_column->get_fractional_part(i);
+                if (scale > 2) {
+                    int delta = ((frac_part % multiplier) << 1) > multiplier;
+                    frac_part = Decimal256(frac_part / multiplier + delta);
+                } else if (scale < 2) {
+                    frac_part = Decimal256(frac_part * multiplier);
+                }
+
+                StringRef str = MoneyFormat::do_money_format<int64_t, 26>(
+                        context, decimal256_column->get_whole_part(i), frac_part);
+
+                result_column->insert_data(str.data, str.size);
+            }
+        }*/
     }
 };
 
diff --git a/be/src/vec/functions/function_unary_arithmetic.h b/be/src/vec/functions/function_unary_arithmetic.h
index 63376f93d7..51aabb9a80 100644
--- a/be/src/vec/functions/function_unary_arithmetic.h
+++ b/be/src/vec/functions/function_unary_arithmetic.h
@@ -72,8 +72,8 @@ class FunctionUnaryArithmetic : public IFunction {
                                    DataTypeInt8, DataTypeInt16, DataTypeInt32, DataTypeInt64,
                                    DataTypeInt128, DataTypeFloat32, DataTypeFloat64,
                                    DataTypeDecimal<Decimal32>, DataTypeDecimal<Decimal64>,
-                                   DataTypeDecimal<Decimal128>, DataTypeDecimal<Decimal128I>>(
-                type, std::forward<F>(f));
+                                   DataTypeDecimal<Decimal128>, DataTypeDecimal<Decimal128I>,
+                                   DataTypeDecimal<Decimal256>>(type, std::forward<F>(f));
     }
 
 public:
diff --git a/be/src/vec/functions/function_width_bucket.cpp b/be/src/vec/functions/function_width_bucket.cpp
index 1daf3ed5ea..40c08a950e 100644
--- a/be/src/vec/functions/function_width_bucket.cpp
+++ b/be/src/vec/functions/function_width_bucket.cpp
@@ -148,6 +148,9 @@ private:
         } else if (which.is_decimal128i()) {
             _execute<ColumnDecimal128I>(expr_column, min_value_column, max_value_column,
                                         num_buckets, nested_column_column);
+        } else if (which.is_decimal256()) {
+            _execute<ColumnDecimal256>(expr_column, min_value_column, max_value_column, num_buckets,
+                                       nested_column_column);
         } else if (which.is_date()) {
             _execute<ColumnDate>(expr_column, min_value_column, max_value_column, num_buckets,
                                  nested_column_column);
diff --git a/be/src/vec/functions/functions_comparison.h b/be/src/vec/functions/functions_comparison.h
index 0bf03310b9..3b58f21c40 100644
--- a/be/src/vec/functions/functions_comparison.h
+++ b/be/src/vec/functions/functions_comparison.h
@@ -35,7 +35,6 @@
 #include "vec/core/decimal_comparison.h"
 #include "vec/data_types/data_type_number.h"
 #include "vec/data_types/data_type_string.h"
-#include "vec/data_types/get_least_supertype.h"
 #include "vec/functions/function.h"
 #include "vec/functions/function_helpers.h"
 #include "vec/functions/functions_logical.h"
diff --git a/be/src/vec/functions/if.cpp b/be/src/vec/functions/if.cpp
index 1664c0719e..9b14abce2a 100644
--- a/be/src/vec/functions/if.cpp
+++ b/be/src/vec/functions/if.cpp
@@ -46,7 +46,6 @@
 #include "vec/data_types/data_type.h"
 #include "vec/data_types/data_type_nullable.h"
 #include "vec/data_types/data_type_number.h"
-#include "vec/data_types/get_least_supertype.h"
 #include "vec/functions/function.h"
 #include "vec/functions/function_helpers.h"
 #include "vec/functions/simple_function_factory.h"
diff --git a/be/src/vec/functions/least_greast.cpp b/be/src/vec/functions/least_greast.cpp
index 90f8fa99cf..be35504d83 100644
--- a/be/src/vec/functions/least_greast.cpp
+++ b/be/src/vec/functions/least_greast.cpp
@@ -138,7 +138,8 @@ private:
             }
         } else if constexpr (std::is_same_v<ColumnType, ColumnDecimal32> ||
                              std::is_same_v<ColumnType, ColumnDecimal64> ||
-                             std::is_same_v<ColumnType, ColumnDecimal128I>) {
+                             std::is_same_v<ColumnType, ColumnDecimal128I> ||
+                             std::is_same_v<ColumnType, ColumnDecimal256>) {
             for (size_t i = 0; i < input_rows_count; ++i) {
                 using type = std::decay_t<decltype(result_raw_data[0].value)>;
                 result_raw_data[i] =
@@ -243,7 +244,8 @@ private:
             }
         } else if constexpr (std::is_same_v<ColumnType, ColumnDecimal32> ||
                              std::is_same_v<ColumnType, ColumnDecimal64> ||
-                             std::is_same_v<ColumnType, ColumnDecimal128I>) {
+                             std::is_same_v<ColumnType, ColumnDecimal128I> ||
+                             std::is_same_v<ColumnType, ColumnDecimal256>) {
             for (size_t i = 0; i < input_rows_count; ++i) {
                 using type = std::decay_t<decltype(first_raw_data[0].value)>;
                 res_data[i] |= (!res_data[i] *
diff --git a/be/src/vec/olap/olap_data_convertor.cpp b/be/src/vec/olap/olap_data_convertor.cpp
index 7519976752..e15c3926a9 100644
--- a/be/src/vec/olap/olap_data_convertor.cpp
+++ b/be/src/vec/olap/olap_data_convertor.cpp
@@ -132,6 +132,9 @@ OlapBlockDataConvertor::create_olap_column_data_convertor(const TabletColumn& co
     case FieldType::OLAP_FIELD_TYPE_DECIMAL128I: {
         return std::make_unique<OlapColumnDataConvertorDecimalV3<Decimal128I>>();
     }
+    case FieldType::OLAP_FIELD_TYPE_DECIMAL256: {
+        return std::make_unique<OlapColumnDataConvertorDecimalV3<Decimal256>>();
+    }
     case FieldType::OLAP_FIELD_TYPE_JSONB: {
         return std::make_unique<OlapColumnDataConvertorVarChar>(true);
     }
diff --git a/be/src/vec/sink/vtablet_block_convertor.cpp b/be/src/vec/sink/vtablet_block_convertor.cpp
index d1f9174d74..18484d6405 100644
--- a/be/src/vec/sink/vtablet_block_convertor.cpp
+++ b/be/src/vec/sink/vtablet_block_convertor.cpp
@@ -137,14 +137,16 @@ DecimalType OlapTableBlockConvertor::_get_decimalv3_min_or_max(const TypeDescrip
         pmap = IsMin ? &_min_decimal32_val : &_max_decimal32_val;
     } else if constexpr (std::is_same_v<DecimalType, vectorized::Decimal64>) {
         pmap = IsMin ? &_min_decimal64_val : &_max_decimal64_val;
-    } else {
+    } else if constexpr (std::is_same_v<DecimalType, vectorized::Decimal128I>) {
         pmap = IsMin ? &_min_decimal128_val : &_max_decimal128_val;
+    } else {
+        pmap = IsMin ? &_min_decimal256_val : &_max_decimal256_val;
     }
 
     // found
     auto iter = pmap->find(type.precision);
     if (iter != pmap->end()) {
-        return iter->second;
+        return DecimalType(iter->second);
     }
 
     typename DecimalType::NativeType value;
@@ -154,7 +156,7 @@ DecimalType OlapTableBlockConvertor::_get_decimalv3_min_or_max(const TypeDescrip
         value = vectorized::max_decimal_value<DecimalType>(type.precision);
     }
     pmap->emplace(type.precision, value);
-    return value;
+    return DecimalType(value);
 }
 
 Status OlapTableBlockConvertor::_validate_column(RuntimeState* state, const TypeDescriptor& type,
@@ -336,6 +338,10 @@ Status OlapTableBlockConvertor::_validate_column(RuntimeState* state, const Type
         CHECK_VALIDATION_FOR_DECIMALV3(vectorized::Decimal128I);
         break;
     }
+    case TYPE_DECIMAL256: {
+        CHECK_VALIDATION_FOR_DECIMALV3(vectorized::Decimal256);
+        break;
+    }
 #undef CHECK_VALIDATION_FOR_DECIMALV3
     case TYPE_ARRAY: {
         const auto* column_array =
diff --git a/be/src/vec/sink/vtablet_block_convertor.h b/be/src/vec/sink/vtablet_block_convertor.h
index 27440c628b..3ee3d58265 100644
--- a/be/src/vec/sink/vtablet_block_convertor.h
+++ b/be/src/vec/sink/vtablet_block_convertor.h
@@ -93,6 +93,8 @@ private:
     std::map<int, int64_t> _min_decimal64_val;
     std::map<int, int128_t> _max_decimal128_val;
     std::map<int, int128_t> _min_decimal128_val;
+    std::map<int, wide::Int256> _max_decimal256_val;
+    std::map<int, wide::Int256> _min_decimal256_val;
 
     std::vector<char> _filter_map;
 
diff --git a/be/src/vec/sink/writer/vmysql_table_writer.cpp b/be/src/vec/sink/writer/vmysql_table_writer.cpp
index 6eebbfbed6..a7eca340c7 100644
--- a/be/src/vec/sink/writer/vmysql_table_writer.cpp
+++ b/be/src/vec/sink/writer/vmysql_table_writer.cpp
@@ -205,7 +205,8 @@ Status VMysqlTableWriter::_insert_row(vectorized::Block& block, size_t row) {
         }
         case TYPE_DECIMAL32:
         case TYPE_DECIMAL64:
-        case TYPE_DECIMAL128I: {
+        case TYPE_DECIMAL128I:
+        case TYPE_DECIMAL256: {
             auto val = type_ptr->to_string(*column, row);
             fmt::format_to(_insert_stmt_buffer, "{}", val);
             break;
diff --git a/be/test/vec/data_types/decimal_test.cpp b/be/test/vec/data_types/decimal_test.cpp
new file mode 100644
index 0000000000..0f4b950201
--- /dev/null
+++ b/be/test/vec/data_types/decimal_test.cpp
@@ -0,0 +1,212 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest-message.h>
+#include <gtest/gtest-test-part.h>
+#include <gtest/gtest.h>
+
+#include <functional>
+#include <memory>
+
+#include "gtest/gtest_pred_impl.h"
+#include "runtime/raw_value.h"
+#include "runtime/type_limit.h"
+#include "util/string_parser.hpp"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type_decimal.h"
+namespace doris::vectorized {
+
+TEST(DecimalTest, Decimal256) {
+    // 9999999999999999999999999999999999999999999999999999999999999999999999999999
+    Decimal256 dec1(type_limit<vectorized::Decimal256>::max());
+    auto des_str = dec1.to_string(10);
+    EXPECT_EQ(des_str,
+              "999999999999999999999999999999999999999999999999999999999999999999.9999999999");
+    des_str = dec1.to_string(0);
+    EXPECT_EQ(des_str,
+              "9999999999999999999999999999999999999999999999999999999999999999999999999999");
+    des_str = dec1.to_string(76);
+    EXPECT_EQ(des_str,
+              "0.9999999999999999999999999999999999999999999999999999999999999999999999999999");
+
+    auto dec2 = type_limit<vectorized::Decimal256>::min();
+    des_str = dec2.to_string(10);
+    EXPECT_EQ(des_str,
+              "-999999999999999999999999999999999999999999999999999999999999999999.9999999999");
+    des_str = dec2.to_string(0);
+    EXPECT_EQ(des_str,
+              "-9999999999999999999999999999999999999999999999999999999999999999999999999999");
+    des_str = dec2.to_string(76);
+    EXPECT_EQ(des_str,
+              "-0.9999999999999999999999999999999999999999999999999999999999999999999999999999");
+
+    // plus
+    Decimal256 dec3 = dec1 + dec2;
+    des_str = dec3.to_string(10);
+    EXPECT_EQ(des_str, "0.0000000000");
+    des_str = dec3.to_string(0);
+    EXPECT_EQ(des_str, "0");
+    des_str = dec3.to_string(76);
+    EXPECT_EQ(des_str,
+              "0.0000000000000000000000000000000000000000000000000000000000000000000000000000");
+
+    // minus
+    dec2 = type_limit<vectorized::Decimal256>::max();
+    dec3 = dec1 - dec2;
+    des_str = dec3.to_string(10);
+    EXPECT_EQ(des_str, "0.0000000000");
+
+    // multiply
+
+    // divide
+    dec1 = type_limit<vectorized::Decimal256>::max();
+    dec2 = vectorized::Decimal256(10);
+    dec3 = dec1 / dec2;
+    des_str = dec3.to_string(1);
+    EXPECT_EQ(des_str,
+              "99999999999999999999999999999999999999999999999999999999999999999999999999.9");
+
+    // overflow
+}
+
+TEST(DecimalTest, compare) {
+    Decimal256 dec_max(type_limit<vectorized::Decimal256>::max());
+    Decimal256 dec_min(type_limit<vectorized::Decimal256>::min());
+
+    Decimal256 dec3 = vectorized::Decimal256(10);
+    Decimal256 dec4 = vectorized::Decimal256(9);
+    Decimal256 dec5 = vectorized::Decimal256(-10);
+
+    Decimal256 dec_max2(type_limit<vectorized::Decimal256>::max());
+    Decimal256 dec_min2(type_limit<vectorized::Decimal256>::min());
+
+    Decimal256 dec3_2 = vectorized::Decimal256(10);
+    Decimal256 dec4_2 = vectorized::Decimal256(9);
+    Decimal256 dec5_2 = vectorized::Decimal256(-10);
+
+    EXPECT_EQ(dec_max, dec_max2);
+    EXPECT_EQ(dec_min, dec_min2);
+    EXPECT_EQ(dec3, dec3_2);
+    EXPECT_EQ(dec4, dec4_2);
+    EXPECT_EQ(dec5, dec5_2);
+
+    EXPECT_NE(dec_max, dec_min);
+    EXPECT_NE(dec_max, dec3);
+    EXPECT_NE(dec_max, dec5);
+    EXPECT_NE(dec3, dec4);
+    EXPECT_NE(dec3, dec5);
+
+    EXPECT_GT(dec_max, dec_min);
+    EXPECT_GT(dec_max, dec3);
+    EXPECT_GT(dec_max, dec4);
+    EXPECT_GT(dec_max, dec5);
+    EXPECT_GT(dec3, dec4);
+    EXPECT_GT(dec3, dec5);
+
+    EXPECT_GE(dec_max, dec_max2);
+    EXPECT_GE(dec_max, dec_min);
+    EXPECT_GE(dec_max, dec3);
+    EXPECT_GE(dec_max, dec4);
+    EXPECT_GE(dec_max, dec5);
+    EXPECT_GE(dec3, dec4);
+    EXPECT_GE(dec3, dec3_2);
+    EXPECT_GE(dec3, dec5);
+    EXPECT_GE(dec5, dec5_2);
+
+    EXPECT_LT(dec_min, dec_max);
+    EXPECT_LT(dec_min, dec3);
+    EXPECT_LT(dec_min, dec4);
+    EXPECT_LT(dec_min, dec5);
+    EXPECT_LT(dec4, dec3);
+    EXPECT_LT(dec5, dec3);
+    EXPECT_LT(dec5, dec4);
+
+    EXPECT_LE(dec_min, dec_min);
+    EXPECT_LE(dec_min, dec_max);
+    EXPECT_LE(dec_min, dec3);
+    EXPECT_LE(dec_min, dec4);
+    EXPECT_LE(dec_min, dec5);
+    EXPECT_LE(dec4, dec3);
+    EXPECT_LE(dec5, dec3);
+    EXPECT_LE(dec5, dec4);
+}
+
+TEST(DecimalTest, string_parser) {
+    Decimal256 dec_max(type_limit<vectorized::Decimal256>::max());
+    std::string dec_str(
+            "999999999999999999999999999999999999999999999999999999999999999999.9999999999");
+
+    StringParser::ParseResult result = StringParser::PARSE_SUCCESS;
+    wide::Int256 value = StringParser::string_to_decimal<TYPE_DECIMAL256>(
+            dec_str.data(), dec_str.size(), 76, 10, &result);
+    EXPECT_EQ(result, StringParser::PARSE_SUCCESS);
+    EXPECT_EQ(value, dec_max.value);
+}
+TEST(DecimalTest, crc32) {
+    PrimitiveType type = PrimitiveType::TYPE_DECIMAL256;
+    DataTypeDecimal<vectorized::Decimal256> data_type(76, 10);
+    auto col = data_type.create_column();
+    Decimal256 dec_max(type_limit<vectorized::Decimal256>::max());
+    Decimal256 dec_min(type_limit<vectorized::Decimal256>::min());
+    Decimal256 dec3 = vectorized::Decimal256(1);
+    Decimal256 dec4 = vectorized::Decimal256(-1);
+    auto& decimal_data =
+            ((vectorized::ColumnDecimal<vectorized::Decimal256>*)col.get())->get_data();
+    decimal_data.push_back(dec_max);
+    decimal_data.push_back(dec_min);
+    decimal_data.push_back(dec3);
+    decimal_data.push_back(dec4);
+
+    auto column_value = col->get_data_at(0);
+    uint32_t hash_val = 0;
+    hash_val = RawValue::zlib_crc32(column_value.data, column_value.size, type, hash_val);
+    EXPECT_EQ(hash_val, 1277249500);
+
+    column_value = col->get_data_at(1);
+    hash_val = 0;
+    hash_val = RawValue::zlib_crc32(column_value.data, column_value.size, type, hash_val);
+    EXPECT_EQ(hash_val, 1537064144);
+
+    column_value = col->get_data_at(2);
+    hash_val = 0;
+    hash_val = RawValue::zlib_crc32(column_value.data, column_value.size, type, hash_val);
+    EXPECT_EQ(hash_val, 3905966087);
+
+    column_value = col->get_data_at(3);
+    hash_val = 0;
+    hash_val = RawValue::zlib_crc32(column_value.data, column_value.size, type, hash_val);
+    EXPECT_EQ(hash_val, 4285311755);
+}
+TEST(DecimalTest, hash) {
+    Decimal256 dec_max(type_limit<vectorized::Decimal256>::max());
+    Decimal256 dec_min(type_limit<vectorized::Decimal256>::min());
+    Decimal256 dec3 = vectorized::Decimal256(12345);
+    Decimal256 dec4 = vectorized::Decimal256(-12345);
+
+    {
+        auto hash_op = std::hash<vectorized::Decimal256>();
+        auto hash_val = hash_op(dec_max);
+        EXPECT_EQ(hash_val, 11093810651088735436ULL);
+        hash_val = hash_op(dec_min);
+        EXPECT_EQ(hash_val, 11093810651088735437ULL);
+        hash_val = hash_op(dec3);
+        EXPECT_EQ(hash_val, 12345);
+        hash_val = hash_op(dec4);
+        EXPECT_EQ(hash_val, 12344);
+    }
+}
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/PrimitiveType.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/PrimitiveType.java
index 78a60239f2..cfae49e2b5 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/catalog/PrimitiveType.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/PrimitiveType.java
@@ -54,6 +54,7 @@ public enum PrimitiveType {
     DECIMAL32("DECIMAL32", 4, TPrimitiveType.DECIMAL32, true),
     DECIMAL64("DECIMAL64", 8, TPrimitiveType.DECIMAL64, true),
     DECIMAL128("DECIMAL128", 16, TPrimitiveType.DECIMAL128I, true),
+    DECIMAL256("DECIMAL256", 32, TPrimitiveType.DECIMAL256, false),
     TIME("TIME", 8, TPrimitiveType.TIME, false),
     // these following types are stored as object binary in BE.
     HLL("HLL", 16, TPrimitiveType.HLL, true),
@@ -94,6 +95,7 @@ public enum PrimitiveType {
         builder.add(DECIMAL32);
         builder.add(DECIMAL64);
         builder.add(DECIMAL128);
+        builder.add(DECIMAL256);
         builder.add(DATETIMEV2);
         typeWithPrecision = builder.build();
     }
@@ -123,6 +125,7 @@ public enum PrimitiveType {
         builder.put(NULL_TYPE, DECIMAL32);
         builder.put(NULL_TYPE, DECIMAL64);
         builder.put(NULL_TYPE, DECIMAL128);
+        builder.put(NULL_TYPE, DECIMAL256);
         builder.put(NULL_TYPE, CHAR);
         builder.put(NULL_TYPE, VARCHAR);
         builder.put(NULL_TYPE, STRING);
@@ -148,6 +151,7 @@ public enum PrimitiveType {
         builder.put(BOOLEAN, DECIMAL32);
         builder.put(BOOLEAN, DECIMAL64);
         builder.put(BOOLEAN, DECIMAL128);
+        builder.put(BOOLEAN, DECIMAL256);
         builder.put(BOOLEAN, VARCHAR);
         builder.put(BOOLEAN, STRING);
         // Tinyint
@@ -167,6 +171,7 @@ public enum PrimitiveType {
         builder.put(TINYINT, DECIMAL32);
         builder.put(TINYINT, DECIMAL64);
         builder.put(TINYINT, DECIMAL128);
+        builder.put(TINYINT, DECIMAL256);
         builder.put(TINYINT, VARCHAR);
         builder.put(TINYINT, STRING);
         builder.put(TINYINT, TIME);
@@ -188,6 +193,7 @@ public enum PrimitiveType {
         builder.put(SMALLINT, DECIMAL32);
         builder.put(SMALLINT, DECIMAL64);
         builder.put(SMALLINT, DECIMAL128);
+        builder.put(SMALLINT, DECIMAL256);
         builder.put(SMALLINT, VARCHAR);
         builder.put(SMALLINT, STRING);
         builder.put(SMALLINT, TIME);
@@ -209,6 +215,7 @@ public enum PrimitiveType {
         builder.put(INT, DECIMAL32);
         builder.put(INT, DECIMAL64);
         builder.put(INT, DECIMAL128);
+        builder.put(INT, DECIMAL256);
         builder.put(INT, VARCHAR);
         builder.put(INT, STRING);
         builder.put(INT, TIME);
@@ -230,6 +237,7 @@ public enum PrimitiveType {
         builder.put(BIGINT, DECIMAL32);
         builder.put(BIGINT, DECIMAL64);
         builder.put(BIGINT, DECIMAL128);
+        builder.put(BIGINT, DECIMAL256);
         builder.put(BIGINT, VARCHAR);
         builder.put(BIGINT, STRING);
         builder.put(BIGINT, TIME);
@@ -251,6 +259,7 @@ public enum PrimitiveType {
         builder.put(LARGEINT, DECIMAL32);
         builder.put(LARGEINT, DECIMAL64);
         builder.put(LARGEINT, DECIMAL128);
+        builder.put(LARGEINT, DECIMAL256);
         builder.put(LARGEINT, VARCHAR);
         builder.put(LARGEINT, STRING);
         builder.put(LARGEINT, TIME);
@@ -272,6 +281,7 @@ public enum PrimitiveType {
         builder.put(FLOAT, DECIMAL32);
         builder.put(FLOAT, DECIMAL64);
         builder.put(FLOAT, DECIMAL128);
+        builder.put(FLOAT, DECIMAL256);
         builder.put(FLOAT, VARCHAR);
         builder.put(FLOAT, STRING);
         builder.put(FLOAT, TIME);
@@ -293,6 +303,7 @@ public enum PrimitiveType {
         builder.put(DOUBLE, DECIMAL32);
         builder.put(DOUBLE, DECIMAL64);
         builder.put(DOUBLE, DECIMAL128);
+        builder.put(DOUBLE, DECIMAL256);
         builder.put(DOUBLE, VARCHAR);
         builder.put(DOUBLE, STRING);
         builder.put(DOUBLE, TIME);
@@ -314,6 +325,7 @@ public enum PrimitiveType {
         builder.put(DATE, DECIMAL32);
         builder.put(DATE, DECIMAL64);
         builder.put(DATE, DECIMAL128);
+        builder.put(DATE, DECIMAL256);
         builder.put(DATE, VARCHAR);
         builder.put(DATE, STRING);
         // Datetime
@@ -333,6 +345,7 @@ public enum PrimitiveType {
         builder.put(DATETIME, DECIMAL32);
         builder.put(DATETIME, DECIMAL64);
         builder.put(DATETIME, DECIMAL128);
+        builder.put(DATETIME, DECIMAL256);
         builder.put(DATETIME, VARCHAR);
         builder.put(DATETIME, STRING);
         // DateV2
@@ -352,6 +365,7 @@ public enum PrimitiveType {
         builder.put(DATEV2, DECIMAL32);
         builder.put(DATEV2, DECIMAL64);
         builder.put(DATEV2, DECIMAL128);
+        builder.put(DATEV2, DECIMAL256);
         builder.put(DATEV2, VARCHAR);
         builder.put(DATEV2, STRING);
         // DatetimeV2
@@ -371,6 +385,7 @@ public enum PrimitiveType {
         builder.put(DATETIMEV2, DECIMAL32);
         builder.put(DATETIMEV2, DECIMAL64);
         builder.put(DATETIMEV2, DECIMAL128);
+        builder.put(DATETIMEV2, DECIMAL256);
         builder.put(DATETIMEV2, VARCHAR);
         builder.put(DATETIMEV2, STRING);
         // Char
@@ -391,6 +406,7 @@ public enum PrimitiveType {
         builder.put(CHAR, DECIMAL32);
         builder.put(CHAR, DECIMAL64);
         builder.put(CHAR, DECIMAL128);
+        builder.put(CHAR, DECIMAL256);
         builder.put(CHAR, VARCHAR);
         builder.put(CHAR, STRING);
         builder.put(CHAR, TIME);
@@ -412,6 +428,7 @@ public enum PrimitiveType {
         builder.put(VARCHAR, DECIMAL32);
         builder.put(VARCHAR, DECIMAL64);
         builder.put(VARCHAR, DECIMAL128);
+        builder.put(VARCHAR, DECIMAL256);
         builder.put(VARCHAR, VARCHAR);
         builder.put(VARCHAR, JSONB);
         builder.put(VARCHAR, VARIANT);
@@ -436,6 +453,7 @@ public enum PrimitiveType {
         builder.put(STRING, DECIMAL32);
         builder.put(STRING, DECIMAL64);
         builder.put(STRING, DECIMAL128);
+        builder.put(STRING, DECIMAL256);
         builder.put(STRING, VARCHAR);
         builder.put(STRING, JSONB);
         builder.put(STRING, VARIANT);
@@ -456,6 +474,7 @@ public enum PrimitiveType {
         builder.put(DECIMALV2, DECIMAL32);
         builder.put(DECIMALV2, DECIMAL64);
         builder.put(DECIMALV2, DECIMAL128);
+        builder.put(DECIMALV2, DECIMAL256);
         builder.put(DECIMALV2, VARCHAR);
         builder.put(DECIMALV2, STRING);
 
@@ -471,6 +490,7 @@ public enum PrimitiveType {
         builder.put(DECIMAL32, DECIMAL32);
         builder.put(DECIMAL32, DECIMAL64);
         builder.put(DECIMAL32, DECIMAL128);
+        builder.put(DECIMAL32, DECIMAL256);
         builder.put(DECIMAL32, VARCHAR);
         builder.put(DECIMAL32, STRING);
 
@@ -486,6 +506,7 @@ public enum PrimitiveType {
         builder.put(DECIMAL64, DECIMAL32);
         builder.put(DECIMAL64, DECIMAL64);
         builder.put(DECIMAL64, DECIMAL128);
+        builder.put(DECIMAL64, DECIMAL256);
         builder.put(DECIMAL64, VARCHAR);
         builder.put(DECIMAL64, STRING);
 
@@ -501,9 +522,27 @@ public enum PrimitiveType {
         builder.put(DECIMAL128, DECIMAL32);
         builder.put(DECIMAL128, DECIMAL64);
         builder.put(DECIMAL128, DECIMAL128);
+        builder.put(DECIMAL128, DECIMAL256);
         builder.put(DECIMAL128, VARCHAR);
         builder.put(DECIMAL128, STRING);
 
+        // decimal256
+        builder.put(DECIMAL256, BOOLEAN);
+        builder.put(DECIMAL256, TINYINT);
+        builder.put(DECIMAL256, SMALLINT);
+        builder.put(DECIMAL256, INT);
+        builder.put(DECIMAL256, BIGINT);
+        builder.put(DECIMAL256, LARGEINT);
+        builder.put(DECIMAL256, FLOAT);
+        builder.put(DECIMAL256, DOUBLE);
+        builder.put(DECIMAL256, DECIMALV2);
+        builder.put(DECIMAL256, DECIMAL32);
+        builder.put(DECIMAL256, DECIMAL64);
+        builder.put(DECIMAL256, DECIMAL128);
+        builder.put(DECIMAL256, DECIMAL256);
+        builder.put(DECIMAL256, VARCHAR);
+        builder.put(DECIMAL256, STRING);
+
         // JSONB
         builder.put(JSONB, BOOLEAN);
         builder.put(JSONB, TINYINT);
@@ -517,6 +556,8 @@ public enum PrimitiveType {
         builder.put(JSONB, DECIMAL32);
         builder.put(JSONB, DECIMAL64);
         builder.put(JSONB, DECIMAL128);
+        // TODO: support and test decimal256?
+        // builder.put(JSONB, DECIMAL256);
         builder.put(JSONB, VARCHAR);
         builder.put(JSONB, STRING);
         builder.put(JSONB, VARIANT);
@@ -575,6 +616,7 @@ public enum PrimitiveType {
         numericTypes.add(DECIMAL32);
         numericTypes.add(DECIMAL64);
         numericTypes.add(DECIMAL128);
+        numericTypes.add(DECIMAL256);
 
         supportedTypes = Lists.newArrayList();
         supportedTypes.add(NULL_TYPE);
@@ -602,6 +644,7 @@ public enum PrimitiveType {
         supportedTypes.add(DECIMAL32);
         supportedTypes.add(DECIMAL64);
         supportedTypes.add(DECIMAL128);
+        supportedTypes.add(DECIMAL256);
         supportedTypes.add(BITMAP);
         supportedTypes.add(ARRAY);
         supportedTypes.add(MAP);
@@ -685,6 +728,8 @@ public enum PrimitiveType {
                 return DECIMAL64;
             case DECIMAL128I:
                 return DECIMAL128;
+            case DECIMAL256:
+                return DECIMAL256;
             case TIME:
                 return TIME;
             case TIMEV2:
@@ -767,7 +812,7 @@ public enum PrimitiveType {
     }
 
     public boolean isDecimalV3Type() {
-        return this == DECIMAL32 || this == DECIMAL64 || this == DECIMAL128;
+        return this == DECIMAL32 || this == DECIMAL64 || this == DECIMAL128 || this == DECIMAL256;
     }
 
     public boolean isNumericType() {
@@ -876,6 +921,7 @@ public enum PrimitiveType {
             case DECIMAL32:
             case DECIMAL64:
             case DECIMAL128:
+            case DECIMAL256:
                 return MysqlColType.MYSQL_TYPE_NEWDECIMAL;
             case STRING:
                 return MysqlColType.MYSQL_TYPE_BLOB;
@@ -913,6 +959,8 @@ public enum PrimitiveType {
                 return 8;
             case DECIMAL128:
                 return 16;
+            case DECIMAL256:
+                return 32;
             default:
                 return this.getSlotSize();
         }
diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java
index 540f8821f5..7271429b66 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java
@@ -72,14 +72,15 @@ public class ScalarType extends Type {
     public static final int MAX_JSONB_LENGTH = 0x7fffffff - 4;
 
     // Hive, mysql, sql server standard.
-    public static final int MAX_PRECISION = 38;
     public static final int MAX_DECIMALV2_PRECISION = 27;
     public static final int MAX_DECIMALV2_SCALE = 9;
     public static final int MAX_DECIMAL32_PRECISION = 9;
     public static final int MAX_DECIMAL64_PRECISION = 18;
     public static final int MAX_DECIMAL128_PRECISION = 38;
+    public static final int MAX_DECIMAL256_PRECISION = 76;
     public static final int DEFAULT_MIN_AVG_DECIMAL128_SCALE = 4;
     public static final int MAX_DATETIMEV2_SCALE = 6;
+    public static final int MAX_PRECISION = MAX_DECIMAL256_PRECISION;
 
     private long byteSize = -1;
 
@@ -138,6 +139,7 @@ public class ScalarType extends Type {
             case DECIMAL32:
             case DECIMAL64:
             case DECIMAL128:
+            case DECIMAL256:
                 return createDecimalV3Type(precision, scale);
             case DECIMALV2:
                 return createDecimalType(precision, scale);
@@ -210,6 +212,8 @@ public class ScalarType extends Type {
                 return DEFAULT_DECIMAL64;
             case DECIMAL128:
                 return DEFAULT_DECIMAL128;
+            case DECIMAL256:
+                return DEFAULT_DECIMAL256;
             case DECIMALV2:
                 return DEFAULT_DECIMALV2;
             case LARGEINT:
@@ -385,8 +389,10 @@ public class ScalarType extends Type {
             return PrimitiveType.DECIMAL32;
         } else if (precision <= MAX_DECIMAL64_PRECISION) {
             return PrimitiveType.DECIMAL64;
-        } else {
+        } else if (precision <= MAX_DECIMAL128_PRECISION) {
             return PrimitiveType.DECIMAL128;
+        } else {
+            return PrimitiveType.DECIMAL256;
         }
     }
 
@@ -469,22 +475,6 @@ public class ScalarType extends Type {
         }
     }
 
-    /**
-     * create a wider decimal type.
-     */
-    public static ScalarType createWiderDecimalV3Type(int precision, int scale) {
-        ScalarType type = new ScalarType(getSuitableDecimalType(precision, false));
-        if (precision <= MAX_DECIMAL32_PRECISION) {
-            type.precision = MAX_DECIMAL32_PRECISION;
-        } else if (precision <= MAX_DECIMAL64_PRECISION) {
-            type.precision = MAX_DECIMAL64_PRECISION;
-        } else {
-            type.precision = MAX_DECIMAL128_PRECISION;
-        }
-        type.scale = scale;
-        return type;
-    }
-
     public static ScalarType createVarcharType(int len) {
         // length checked in analysis
         ScalarType type = new ScalarType(PrimitiveType.VARCHAR);
@@ -611,6 +601,7 @@ public class ScalarType extends Type {
             case DECIMAL32:
             case DECIMAL64:
             case DECIMAL128:
+            case DECIMAL256:
                 String typeName = "decimalv3";
                 if (Strings.isNullOrEmpty(precisionStr)) {
                     stringBuilder.append(typeName).append("(").append(precision)
@@ -701,6 +692,7 @@ public class ScalarType extends Type {
             case DECIMAL32:
             case DECIMAL64:
             case DECIMAL128:
+            case DECIMAL256:
             case DATETIMEV2: {
                 Preconditions.checkArgument(precision >= scale,
                         String.format("given precision %d is out of scale bound %d", precision, scale));
@@ -724,14 +716,16 @@ public class ScalarType extends Type {
     public int decimalPrecision() {
         Preconditions.checkState(type == PrimitiveType.DECIMALV2 || type == PrimitiveType.DATETIMEV2
                 || type == PrimitiveType.TIMEV2 || type == PrimitiveType.DECIMAL32
-                || type == PrimitiveType.DECIMAL64 || type == PrimitiveType.DECIMAL128);
+                || type == PrimitiveType.DECIMAL64 || type == PrimitiveType.DECIMAL128
+                || type == PrimitiveType.DECIMAL256);
         return precision;
     }
 
     public int decimalScale() {
         Preconditions.checkState(type == PrimitiveType.DECIMALV2 || type == PrimitiveType.DATETIMEV2
                 || type == PrimitiveType.TIMEV2 || type == PrimitiveType.DECIMAL32
-                || type == PrimitiveType.DECIMAL64 || type == PrimitiveType.DECIMAL128);
+                || type == PrimitiveType.DECIMAL64 || type == PrimitiveType.DECIMAL128
+                || type == PrimitiveType.DECIMAL256);
         return scale;
     }
 
@@ -923,53 +917,6 @@ public class ScalarType extends Type {
         return true;
     }
 
-    public Type getMaxResolutionType() {
-        if (isIntegerType()) {
-            return ScalarType.BIGINT;
-            // Timestamps get summed as DOUBLE for AVG.
-        } else if (isFloatingPointType()) {
-            return ScalarType.DOUBLE;
-        } else if (isNull()) {
-            return ScalarType.NULL;
-        } else if (isDecimalV2()) {
-            return createDecimalTypeInternal(MAX_PRECISION, scale, true);
-        } else if (getPrimitiveType() == PrimitiveType.DECIMAL32) {
-            return createDecimalTypeInternal(MAX_DECIMAL32_PRECISION, scale, false);
-        } else if (getPrimitiveType() == PrimitiveType.DECIMAL64) {
-            return createDecimalTypeInternal(MAX_DECIMAL64_PRECISION, scale, false);
-        } else if (getPrimitiveType() == PrimitiveType.DECIMAL128) {
-            return createDecimalTypeInternal(MAX_DECIMAL128_PRECISION, scale, false);
-        } else if (isLargeIntType()) {
-            return ScalarType.LARGEINT;
-        } else if (isDatetimeV2()) {
-            return createDatetimeV2Type(6);
-        } else if (isTimeV2()) {
-            return createTimeV2Type(6);
-        } else {
-            return ScalarType.INVALID;
-        }
-    }
-
-    public ScalarType getNextResolutionType() {
-        Preconditions.checkState(isNumericType() || isNull());
-        if (type == PrimitiveType.DOUBLE || type == PrimitiveType.BIGINT || isNull()) {
-            return this;
-        } else if (type == PrimitiveType.DECIMALV2) {
-            return createDecimalTypeInternal(MAX_PRECISION, scale, true);
-        } else if (type == PrimitiveType.DECIMAL32) {
-            return createDecimalTypeInternal(MAX_DECIMAL64_PRECISION, scale, false);
-        } else if (type == PrimitiveType.DECIMAL64) {
-            return createDecimalTypeInternal(MAX_DECIMAL128_PRECISION, scale, false);
-        } else if (type == PrimitiveType.DECIMAL128) {
-            return createDecimalTypeInternal(MAX_DECIMAL128_PRECISION, scale, false);
-        } else if (type == PrimitiveType.DATETIMEV2) {
-            return createDatetimeV2Type(6);
-        } else if (type == PrimitiveType.TIMEV2) {
-            return createTimeV2Type(6);
-        }
-        return createType(PrimitiveType.values()[type.ordinal() + 1]);
-    }
-
     /**
      * Returns the smallest decimal type that can safely store this type. Returns
      * INVALID if this type cannot be stored as a decimal.
@@ -989,9 +936,9 @@ public class ScalarType extends Type {
             case BIGINT:
                 return createDecimalType(19);
             case FLOAT:
-                return createDecimalTypeInternal(MAX_PRECISION, 9, false);
+                return createDecimalTypeInternal(MAX_DECIMAL128_PRECISION, 9, false);
             case DOUBLE:
-                return createDecimalTypeInternal(MAX_PRECISION, 17, false);
+                return createDecimalTypeInternal(MAX_DECIMAL128_PRECISION, 17, false);
             default:
                 return ScalarType.INVALID;
         }
diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
index ad498773d4..64d14350ae 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
@@ -87,6 +87,10 @@ public abstract class Type {
     public static final ScalarType DEFAULT_DECIMAL128 =
             ScalarType.createDecimalType(PrimitiveType.DECIMAL128, ScalarType.MAX_DECIMAL128_PRECISION,
                     ScalarType.DEFAULT_SCALE);
+
+    public static final ScalarType DEFAULT_DECIMAL256 =
+            ScalarType.createDecimalType(PrimitiveType.DECIMAL256, ScalarType.MAX_DECIMAL256_PRECISION,
+                    ScalarType.DEFAULT_SCALE);
     public static final ScalarType DEFAULT_DECIMALV3 = DEFAULT_DECIMAL32;
     public static final ScalarType DEFAULT_DATETIMEV2 = ScalarType.createDatetimeV2Type(0);
     public static final ScalarType DATETIMEV2 = DEFAULT_DATETIMEV2;
@@ -96,6 +100,7 @@ public abstract class Type {
     public static final ScalarType DECIMAL32 = DEFAULT_DECIMAL32;
     public static final ScalarType DECIMAL64 = DEFAULT_DECIMAL64;
     public static final ScalarType DECIMAL128 = DEFAULT_DECIMAL128;
+    public static final ScalarType DECIMAL256 = DEFAULT_DECIMAL256;
     public static final ScalarType JSONB = new ScalarType(PrimitiveType.JSONB);
     // (ScalarType) ScalarType.createDecimalTypeInternal(-1, -1);
     public static final ScalarType DEFAULT_VARCHAR = ScalarType.createVarcharType(-1);
@@ -149,6 +154,7 @@ public abstract class Type {
         numericTypes.add(DECIMAL32);
         numericTypes.add(DECIMAL64);
         numericTypes.add(DECIMAL128);
+        numericTypes.add(DECIMAL256);
 
         numericDateTimeTypes = Lists.newArrayList();
         numericDateTimeTypes.add(DATE);
@@ -391,7 +397,7 @@ public abstract class Type {
 
     public boolean isDecimalV3() {
         return isScalarType(PrimitiveType.DECIMAL32) || isScalarType(PrimitiveType.DECIMAL64)
-                || isScalarType(PrimitiveType.DECIMAL128);
+                || isScalarType(PrimitiveType.DECIMAL128) || isScalarType(PrimitiveType.DECIMAL256);
     }
 
     public boolean isDatetimeV2() {
@@ -975,7 +981,8 @@ public abstract class Type {
                             scalarType.getScale());
                 } else if (scalarType.getType() == TPrimitiveType.DECIMAL32
                         || scalarType.getType() == TPrimitiveType.DECIMAL64
-                        || scalarType.getType() == TPrimitiveType.DECIMAL128I) {
+                        || scalarType.getType() == TPrimitiveType.DECIMAL128I
+                        || scalarType.getType() == TPrimitiveType.DECIMAL256) {
                     Preconditions.checkState(scalarType.isSetPrecision()
                             && scalarType.isSetScale());
                     type = ScalarType.createDecimalV3Type(scalarType.getPrecision(),
@@ -1130,6 +1137,7 @@ public abstract class Type {
             case DECIMAL32:
             case DECIMAL64:
             case DECIMAL128:
+            case DECIMAL256:
             case DATETIMEV2:
             case TIMEV2:
                 return t.decimalPrecision();
@@ -1166,6 +1174,7 @@ public abstract class Type {
             case DECIMAL32:
             case DECIMAL64:
             case DECIMAL128:
+            case DECIMAL256:
                 return t.decimalScale();
             default:
                 return null;
@@ -1200,6 +1209,7 @@ public abstract class Type {
             case DECIMAL32:
             case DECIMAL64:
             case DECIMAL128:
+            case DECIMAL256:
                 return 10;
             default:
                 // everything else (including boolean and string) is null
@@ -1267,6 +1277,7 @@ public abstract class Type {
         compatibilityMatrix[BOOLEAN.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[BOOLEAN.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[BOOLEAN.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.INVALID_TYPE;
+        compatibilityMatrix[BOOLEAN.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[BOOLEAN.ordinal()][AGG_STATE.ordinal()] = PrimitiveType.INVALID_TYPE;
 
         // TINYINT
@@ -1288,6 +1299,7 @@ public abstract class Type {
         compatibilityMatrix[TINYINT.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.DECIMAL32;
         compatibilityMatrix[TINYINT.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.DECIMAL64;
         compatibilityMatrix[TINYINT.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.DECIMAL128;
+        compatibilityMatrix[TINYINT.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.DECIMAL256;
         compatibilityMatrix[TINYINT.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[TINYINT.ordinal()][TIME.ordinal()] = PrimitiveType.DOUBLE;
         compatibilityMatrix[TINYINT.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE;
@@ -1315,6 +1327,7 @@ public abstract class Type {
         compatibilityMatrix[SMALLINT.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.DECIMAL32;
         compatibilityMatrix[SMALLINT.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.DECIMAL64;
         compatibilityMatrix[SMALLINT.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.DECIMAL128;
+        compatibilityMatrix[SMALLINT.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.DECIMAL256;
         compatibilityMatrix[SMALLINT.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[SMALLINT.ordinal()][TIME.ordinal()] = PrimitiveType.DOUBLE;
         compatibilityMatrix[SMALLINT.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE;
@@ -1345,6 +1358,7 @@ public abstract class Type {
         compatibilityMatrix[INT.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.DECIMAL32;
         compatibilityMatrix[INT.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.DECIMAL64;
         compatibilityMatrix[INT.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.DECIMAL128;
+        compatibilityMatrix[INT.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.DECIMAL256;
         compatibilityMatrix[INT.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[INT.ordinal()][TIME.ordinal()] = PrimitiveType.DOUBLE;
         compatibilityMatrix[INT.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE;
@@ -1376,6 +1390,7 @@ public abstract class Type {
         compatibilityMatrix[BIGINT.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.DECIMAL64;
         compatibilityMatrix[BIGINT.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.DECIMAL64;
         compatibilityMatrix[BIGINT.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.DECIMAL128;
+        compatibilityMatrix[BIGINT.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.DECIMAL256;
         compatibilityMatrix[BIGINT.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[BIGINT.ordinal()][TIME.ordinal()] = PrimitiveType.DOUBLE;
         compatibilityMatrix[BIGINT.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE;
@@ -1399,6 +1414,7 @@ public abstract class Type {
         compatibilityMatrix[LARGEINT.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.DECIMAL128;
         compatibilityMatrix[LARGEINT.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.DECIMAL128;
         compatibilityMatrix[LARGEINT.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.DECIMAL128;
+        compatibilityMatrix[LARGEINT.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.DECIMAL256;
         compatibilityMatrix[LARGEINT.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[LARGEINT.ordinal()][TIME.ordinal()] = PrimitiveType.DOUBLE;
         compatibilityMatrix[LARGEINT.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE;
@@ -1421,6 +1437,7 @@ public abstract class Type {
         compatibilityMatrix[FLOAT.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.DECIMAL128;
         compatibilityMatrix[FLOAT.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.DECIMAL128;
         compatibilityMatrix[FLOAT.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.DECIMAL128;
+        compatibilityMatrix[FLOAT.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.DECIMAL256;
         compatibilityMatrix[FLOAT.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[FLOAT.ordinal()][TIME.ordinal()] = PrimitiveType.DOUBLE;
         compatibilityMatrix[FLOAT.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE;
@@ -1439,6 +1456,7 @@ public abstract class Type {
         compatibilityMatrix[DOUBLE.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.DECIMAL128;
         compatibilityMatrix[DOUBLE.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.DECIMAL128;
         compatibilityMatrix[DOUBLE.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.DECIMAL128;
+        compatibilityMatrix[DOUBLE.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.DECIMAL256;
         compatibilityMatrix[DOUBLE.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[DOUBLE.ordinal()][TIME.ordinal()] = PrimitiveType.DOUBLE;
         compatibilityMatrix[DOUBLE.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE;
@@ -1461,6 +1479,7 @@ public abstract class Type {
         compatibilityMatrix[DATE.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.DECIMAL32;
         compatibilityMatrix[DATE.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.DECIMAL64;
         compatibilityMatrix[DATE.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.DECIMAL128;
+        compatibilityMatrix[DATE.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.DECIMAL256;
         compatibilityMatrix[DATE.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[DATE.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[DATE.ordinal()][TIMEV2.ordinal()] = PrimitiveType.INVALID_TYPE;
@@ -1481,6 +1500,7 @@ public abstract class Type {
         compatibilityMatrix[DATEV2.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.DECIMAL32;
         compatibilityMatrix[DATEV2.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.DECIMAL64;
         compatibilityMatrix[DATEV2.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.DECIMAL128;
+        compatibilityMatrix[DATEV2.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.DECIMAL256;
         compatibilityMatrix[DATEV2.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[DATEV2.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[DATEV2.ordinal()][TIMEV2.ordinal()] = PrimitiveType.INVALID_TYPE;
@@ -1500,6 +1520,7 @@ public abstract class Type {
         compatibilityMatrix[DATETIME.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.DECIMAL64;
         compatibilityMatrix[DATETIME.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.DECIMAL64;
         compatibilityMatrix[DATETIME.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.DECIMAL128;
+        compatibilityMatrix[DATETIME.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.DECIMAL256;
         compatibilityMatrix[DATETIME.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[DATETIME.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[DATETIME.ordinal()][TIMEV2.ordinal()] = PrimitiveType.INVALID_TYPE;
@@ -1519,6 +1540,7 @@ public abstract class Type {
         compatibilityMatrix[DATETIMEV2.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.DECIMAL64;
         compatibilityMatrix[DATETIMEV2.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.DECIMAL64;
         compatibilityMatrix[DATETIMEV2.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.DECIMAL128;
+        compatibilityMatrix[DATETIMEV2.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.DECIMAL256;
         compatibilityMatrix[DATETIMEV2.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[DATETIMEV2.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[DATETIMEV2.ordinal()][TIMEV2.ordinal()] = PrimitiveType.INVALID_TYPE;
@@ -1538,6 +1560,7 @@ public abstract class Type {
         compatibilityMatrix[CHAR.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[CHAR.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[CHAR.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.INVALID_TYPE;
+        compatibilityMatrix[CHAR.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[CHAR.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[CHAR.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[CHAR.ordinal()][TIMEV2.ordinal()] = PrimitiveType.INVALID_TYPE;
@@ -1553,6 +1576,7 @@ public abstract class Type {
         compatibilityMatrix[VARCHAR.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[VARCHAR.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[VARCHAR.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.INVALID_TYPE;
+        compatibilityMatrix[VARCHAR.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[VARCHAR.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[VARCHAR.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[VARCHAR.ordinal()][TIMEV2.ordinal()] = PrimitiveType.INVALID_TYPE;
@@ -1576,6 +1600,7 @@ public abstract class Type {
         compatibilityMatrix[STRING.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[STRING.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[STRING.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.INVALID_TYPE;
+        compatibilityMatrix[STRING.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[STRING.ordinal()][JSONB.ordinal()] = PrimitiveType.STRING;
         compatibilityMatrix[STRING.ordinal()][VARIANT.ordinal()] = PrimitiveType.STRING;
         compatibilityMatrix[STRING.ordinal()][AGG_STATE.ordinal()] = PrimitiveType.INVALID_TYPE;
@@ -1585,6 +1610,7 @@ public abstract class Type {
         compatibilityMatrix[JSONB.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[JSONB.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[JSONB.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.INVALID_TYPE;
+        compatibilityMatrix[JSONB.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[JSONB.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[JSONB.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[JSONB.ordinal()][TIMEV2.ordinal()] = PrimitiveType.INVALID_TYPE;
@@ -1601,6 +1627,7 @@ public abstract class Type {
         compatibilityMatrix[VARIANT.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[VARIANT.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[VARIANT.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.INVALID_TYPE;
+        compatibilityMatrix[VARIANT.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[VARIANT.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[VARIANT.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[VARIANT.ordinal()][TIMEV2.ordinal()] = PrimitiveType.INVALID_TYPE;
@@ -1627,6 +1654,7 @@ public abstract class Type {
         compatibilityMatrix[DECIMALV2.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[DECIMALV2.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[DECIMALV2.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.INVALID_TYPE;
+        compatibilityMatrix[DECIMALV2.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[DECIMALV2.ordinal()][AGG_STATE.ordinal()] = PrimitiveType.INVALID_TYPE;
 
         // DECIMAL32
@@ -1642,6 +1670,7 @@ public abstract class Type {
         compatibilityMatrix[DECIMAL32.ordinal()][DECIMALV2.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[DECIMAL32.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.DECIMAL64;
         compatibilityMatrix[DECIMAL32.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.DECIMAL128;
+        compatibilityMatrix[DECIMAL32.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.DECIMAL256;
         compatibilityMatrix[DECIMAL32.ordinal()][AGG_STATE.ordinal()] = PrimitiveType.INVALID_TYPE;
 
         // DECIMAL64
@@ -1657,6 +1686,7 @@ public abstract class Type {
         compatibilityMatrix[DECIMAL64.ordinal()][DECIMALV2.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[DECIMAL64.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.DECIMAL64;
         compatibilityMatrix[DECIMAL64.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.DECIMAL128;
+        compatibilityMatrix[DECIMAL64.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.DECIMAL256;
         compatibilityMatrix[DECIMAL64.ordinal()][AGG_STATE.ordinal()] = PrimitiveType.INVALID_TYPE;
 
         // DECIMAL128
@@ -1672,8 +1702,24 @@ public abstract class Type {
         compatibilityMatrix[DECIMAL128.ordinal()][DECIMALV2.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[DECIMAL128.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.DECIMAL128;
         compatibilityMatrix[DECIMAL128.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.DECIMAL128;
+        compatibilityMatrix[DECIMAL128.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.DECIMAL256;
         compatibilityMatrix[DECIMAL128.ordinal()][AGG_STATE.ordinal()] = PrimitiveType.INVALID_TYPE;
 
+        // DECIMAL256
+        compatibilityMatrix[DECIMAL256.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE;
+        compatibilityMatrix[DECIMAL256.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE;
+        compatibilityMatrix[DECIMAL256.ordinal()][TIMEV2.ordinal()] = PrimitiveType.INVALID_TYPE;
+        compatibilityMatrix[DECIMAL256.ordinal()][DATEV2.ordinal()] = PrimitiveType.DECIMAL256;
+        compatibilityMatrix[DECIMAL256.ordinal()][DATETIMEV2.ordinal()] = PrimitiveType.DECIMAL256;
+        compatibilityMatrix[DECIMAL256.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE;
+        compatibilityMatrix[DECIMAL256.ordinal()][STRING.ordinal()] = PrimitiveType.INVALID_TYPE;
+        compatibilityMatrix[DECIMAL256.ordinal()][QUANTILE_STATE.ordinal()] = PrimitiveType.INVALID_TYPE;
+        compatibilityMatrix[DECIMAL256.ordinal()][DECIMALV2.ordinal()] = PrimitiveType.INVALID_TYPE;
+        compatibilityMatrix[DECIMAL256.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.DECIMAL256;
+        compatibilityMatrix[DECIMAL256.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.DECIMAL256;
+        compatibilityMatrix[DECIMAL256.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.DECIMAL256;
+        compatibilityMatrix[DECIMAL256.ordinal()][AGG_STATE.ordinal()] = PrimitiveType.INVALID_TYPE;
+
         // HLL
         compatibilityMatrix[HLL.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[HLL.ordinal()][TIMEV2.ordinal()] = PrimitiveType.INVALID_TYPE;
@@ -1687,6 +1733,7 @@ public abstract class Type {
         compatibilityMatrix[HLL.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[HLL.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[HLL.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.INVALID_TYPE;
+        compatibilityMatrix[HLL.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[HLL.ordinal()][AGG_STATE.ordinal()] = PrimitiveType.INVALID_TYPE;
 
 
@@ -1702,6 +1749,7 @@ public abstract class Type {
         compatibilityMatrix[BITMAP.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[BITMAP.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[BITMAP.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.INVALID_TYPE;
+        compatibilityMatrix[BITMAP.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[BITMAP.ordinal()][AGG_STATE.ordinal()] = PrimitiveType.INVALID_TYPE;
 
         //QUANTILE_STATE
@@ -1713,6 +1761,7 @@ public abstract class Type {
         compatibilityMatrix[QUANTILE_STATE.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[QUANTILE_STATE.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[QUANTILE_STATE.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.INVALID_TYPE;
+        compatibilityMatrix[QUANTILE_STATE.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[QUANTILE_STATE.ordinal()][AGG_STATE.ordinal()] = PrimitiveType.INVALID_TYPE;
 
         //AGG_STATE
@@ -1724,6 +1773,7 @@ public abstract class Type {
         compatibilityMatrix[AGG_STATE.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[AGG_STATE.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[AGG_STATE.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.INVALID_TYPE;
+        compatibilityMatrix[AGG_STATE.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.INVALID_TYPE;
 
         // TIME why here not???
         compatibilityMatrix[TIME.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE;
@@ -1731,6 +1781,7 @@ public abstract class Type {
         compatibilityMatrix[TIME.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[TIME.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[TIME.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.INVALID_TYPE;
+        compatibilityMatrix[TIME.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[TIME.ordinal()][DATEV2.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[TIME.ordinal()][DATETIMEV2.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[TIME.ordinal()][AGG_STATE.ordinal()] = PrimitiveType.INVALID_TYPE;
@@ -1740,6 +1791,7 @@ public abstract class Type {
         compatibilityMatrix[TIMEV2.ordinal()][DECIMAL32.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[TIMEV2.ordinal()][DECIMAL64.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[TIMEV2.ordinal()][DECIMAL128.ordinal()] = PrimitiveType.INVALID_TYPE;
+        compatibilityMatrix[TIMEV2.ordinal()][DECIMAL256.ordinal()] = PrimitiveType.INVALID_TYPE;
         compatibilityMatrix[TIMEV2.ordinal()][AGG_STATE.ordinal()] = PrimitiveType.INVALID_TYPE;
 
         // Check all of the necessary entries that should be filled.
@@ -1801,6 +1853,8 @@ public abstract class Type {
                 return DECIMAL64;
             case DECIMAL128:
                 return DECIMAL128;
+            case DECIMAL256:
+                return DECIMAL256;
             case STRING:
                 return STRING;
             case JSONB:
@@ -1948,11 +2002,6 @@ public abstract class Type {
         }
     }
 
-    public Type getMaxResolutionType() {
-        Preconditions.checkState(true, "must implemented");
-        return null;
-    }
-
     public Type getNumResultType() {
         switch (getPrimitiveType()) {
             case BOOLEAN:
@@ -1984,6 +2033,8 @@ public abstract class Type {
                 return Type.DECIMAL64;
             case DECIMAL128:
                 return Type.DECIMAL128;
+            case DECIMAL256:
+                return Type.DECIMAL256;
             default:
                 return Type.INVALID;
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java
index 747f948c37..bf5c75c58f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java
@@ -514,6 +514,7 @@ public class CastExpr extends Expr {
             case DECIMAL32:
             case DECIMAL64:
             case DECIMAL128:
+            case DECIMAL256:
                 // normal decimal
                 if (targetType.getPrecision() != 0) {
                     newTargetType = targetType;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java
index 238f1471a9..83fdbf792d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java
@@ -484,6 +484,7 @@ public class ColumnDef {
             case DECIMAL32:
             case DECIMAL64:
             case DECIMAL128:
+            case DECIMAL256:
                 DecimalLiteral decimalLiteral = new DecimalLiteral(defaultValue);
                 decimalLiteral.checkPrecisionAndScale(scalarType.getScalarPrecision(), scalarType.getScalarScale());
                 break;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java
index d8749ad6cc..fe03afd02c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java
@@ -2528,6 +2528,8 @@ public abstract class Expr extends TreeNode<Expr> implements ParseNode, Cloneabl
             return Type.DECIMAL64;
         } else if (originType.getPrimitiveType() == PrimitiveType.DECIMAL128) {
             return Type.DECIMAL128;
+        } else if (originType.getPrimitiveType() == PrimitiveType.DECIMAL256) {
+            return Type.DECIMAL256;
         } else if (originType.getPrimitiveType() == PrimitiveType.DATETIMEV2) {
             return Type.DATETIMEV2;
         } else if (originType.getPrimitiveType() == PrimitiveType.DATEV2) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/LiteralExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/LiteralExpr.java
index babcc564c0..bf24955970 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/LiteralExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/LiteralExpr.java
@@ -75,6 +75,7 @@ public abstract class LiteralExpr extends Expr implements Comparable<LiteralExpr
             case DECIMAL32:
             case DECIMAL64:
             case DECIMAL128:
+            case DECIMAL256:
                 literalExpr = new DecimalLiteral(value);
                 break;
             case CHAR:
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/StringLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/StringLiteral.java
index 66747e0002..3119781c97 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/StringLiteral.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/StringLiteral.java
@@ -245,6 +245,7 @@ public class StringLiteral extends LiteralExpr {
                 case DECIMAL32:
                 case DECIMAL64:
                 case DECIMAL128:
+                case DECIMAL256:
                     try {
                         DecimalLiteral res = new DecimalLiteral(new BigDecimal(value).stripTrailingZeros());
                         res.setType(targetType);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java
index e00af4c676..333047ec01 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java
@@ -29,6 +29,8 @@ import org.apache.doris.catalog.StructType;
 import org.apache.doris.catalog.Type;
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.Config;
+import org.apache.doris.qe.ConnectContext;
+import org.apache.doris.qe.SessionVariable;
 import org.apache.doris.thrift.TColumnDesc;
 import org.apache.doris.thrift.TPrimitiveType;
 
@@ -301,6 +303,34 @@ public class TypeDef implements ParseNode {
                 }
                 break;
             }
+            case DECIMAL256: {
+                SessionVariable sessionVariable = ConnectContext.get().getSessionVariable();
+                boolean enableDecimal256 = sessionVariable.enableDecimal256();
+                boolean enableNereidsPlanner = sessionVariable.isEnableNereidsPlanner();
+                if (enableNereidsPlanner && enableDecimal256) {
+                    int precision = scalarType.decimalPrecision();
+                    int scale = scalarType.decimalScale();
+                    if (precision < 1 || precision > ScalarType.MAX_DECIMAL256_PRECISION) {
+                        throw new AnalysisException("Precision of decimal256 must between 1 and 76."
+                                + " Precision was set to: " + precision + ".");
+                    }
+                    // scale >= 0
+                    if (scale < 0) {
+                        throw new AnalysisException("Scale of decimal must not be less than 0." + " Scale was set to: "
+                                + scale + ".");
+                    }
+                    // scale < precision
+                    if (scale > precision) {
+                        throw new AnalysisException("Scale of decimal must be smaller than precision."
+                                + " Scale is " + scale + " and precision is " + precision);
+                    }
+                    break;
+                } else {
+                    int precision = scalarType.decimalPrecision();
+                    throw new AnalysisException(
+                            "Column of type Decimal256 with precision " + precision + " in not supported.");
+                }
+            }
             case TIMEV2:
             case DATETIMEV2: {
                 int precision = scalarType.decimalPrecision();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/AliasFunction.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/AliasFunction.java
index c2f6d466f2..882689dbfa 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/AliasFunction.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/AliasFunction.java
@@ -173,6 +173,7 @@ public class AliasFunction extends Function {
                     case DECIMAL32:
                     case DECIMAL64:
                     case DECIMAL128:
+                    case DECIMAL256:
                     case DECIMALV2:
                         if (!Strings.isNullOrEmpty(scalarType.getScalarPrecisionStr())) {
                             typeDefParams.add(scalarType.getScalarPrecisionStr());
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
index d9a0ed51ba..58b1e1dc5d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
@@ -898,6 +898,7 @@ public class Column implements Writable, GsonPostProcessable {
             case DECIMAL32:
             case DECIMAL64:
             case DECIMAL128:
+            case DECIMAL256:
                 sb.append(String.format(typeStringMap.get(dataType), getPrecision(), getScale()));
                 break;
             case ARRAY:
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java
index 7fd02926b7..3b0676118f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java
@@ -87,6 +87,7 @@ public class Util {
         TYPE_STRING_MAP.put(PrimitiveType.DECIMAL32, "decimal(%d, %d)");
         TYPE_STRING_MAP.put(PrimitiveType.DECIMAL64, "decimal(%d, %d)");
         TYPE_STRING_MAP.put(PrimitiveType.DECIMAL128, "decimal(%d, %d)");
+        TYPE_STRING_MAP.put(PrimitiveType.DECIMAL256, "decimal(%d, %d)");
         TYPE_STRING_MAP.put(PrimitiveType.HLL, "varchar(%d)");
         TYPE_STRING_MAP.put(PrimitiveType.BOOLEAN, "bool");
         TYPE_STRING_MAP.put(PrimitiveType.BITMAP, "bitmap");
diff --git a/fe/fe-core/src/main/java/org/apache/doris/mysql/MysqlSerializer.java b/fe/fe-core/src/main/java/org/apache/doris/mysql/MysqlSerializer.java
index 59375ec4b6..228f3891ce 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/mysql/MysqlSerializer.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/mysql/MysqlSerializer.java
@@ -265,7 +265,8 @@ public class MysqlSerializer {
             case DECIMALV2:
             case DECIMAL32:
             case DECIMAL64:
-            case DECIMAL128: {
+            case DECIMAL128:
+            case DECIMAL256: {
                 // https://github.com/mysql/mysql-connector-j/blob/release/5.1/src/com/mysql/jdbc/ResultSetMetaData.java
                 // in function: int getPrecision(int column)
                 // f.getDecimals() > 0 ? clampedGetLength(f) - 1 + f.getPrecisionAdjustFactor()
@@ -296,6 +297,7 @@ public class MysqlSerializer {
             case DECIMAL32:
             case DECIMAL64:
             case DECIMAL128:
+            case DECIMAL256:
                 return ((ScalarType) type).decimalScale();
             case FLOAT:
             case DOUBLE:
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/exceptions/NotSupportedException.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/exceptions/NotSupportedException.java
new file mode 100644
index 0000000000..bb707b6562
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/exceptions/NotSupportedException.java
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.exceptions;
+
+/**
+ * Exception for calling function only implement in bound expression or plan.
+ */
+public class NotSupportedException extends RuntimeException {
+    public NotSupportedException(String msg) {
+        super(String.format("Not Supported: %s", msg));
+    }
+}
+
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java
index 6ed045a300..689bfc442b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java
@@ -209,8 +209,9 @@ public class FoldConstantRuleOnBE extends AbstractExpressionRewriteRule {
                                     type = DateTimeV2Type.of(pScalarType.getScale());
                                 } else if (primitiveType == PrimitiveType.DECIMAL32
                                         || primitiveType == PrimitiveType.DECIMAL64
-                                        || primitiveType == PrimitiveType.DECIMAL128) {
-                                    type = DecimalV3Type.createDecimalV3Type(
+                                        || primitiveType == PrimitiveType.DECIMAL128
+                                        || primitiveType == PrimitiveType.DECIMAL256) {
+                                    type = DecimalV3Type.createDecimalV3TypeLooseCheck(
                                             pScalarType.getPrecision(), pScalarType.getScale());
                                 } else {
                                     type = DataType.fromCatalogType(ScalarType.createType(
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyDecimalV3Comparison.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyDecimalV3Comparison.java
index fc1ee0cb91..6b0426adaa 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyDecimalV3Comparison.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyDecimalV3Comparison.java
@@ -71,7 +71,7 @@ public class SimplifyDecimalV3Comparison extends AbstractExpressionRewriteRule {
         if (scale <= leftType.getScale() && precision - scale <= leftType.getPrecision() - leftType.getScale()) {
             // precision and scale of literal all smaller than left, we don't need the cast
             DecimalV3Literal newRight = new DecimalV3Literal(
-                    DecimalV3Type.createDecimalV3Type(leftType.getPrecision(), leftType.getScale()),
+                    DecimalV3Type.createDecimalV3TypeLooseCheck(leftType.getPrecision(), leftType.getScale()),
                     trailingZerosValue);
             return cp.withChildren(castChild, newRight);
         } else {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Divide.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Divide.java
index eaa3c1ad2a..002849bb81 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Divide.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Divide.java
@@ -68,9 +68,9 @@ public class Divide extends BinaryArithmetic implements AlwaysNullable {
     @Override
     public DecimalV3Type getDataTypeForDecimalV3(DecimalV3Type t1, DecimalV3Type t2) {
         int retPercision = t1.getPrecision() + t2.getScale() + Config.div_precision_increment;
-        Preconditions.checkState(retPercision <= DecimalV3Type.MAX_DECIMAL128_PRECISION,
+        Preconditions.checkState(retPercision <= DecimalV3Type.MAX_DECIMAL256_PRECISION,
                 "target precision " + retPercision + " larger than precision "
-                        + DecimalV3Type.MAX_DECIMAL128_PRECISION + " in Divide return type");
+                        + DecimalV3Type.MAX_DECIMAL256_PRECISION + " in Divide return type");
         int retScale = t1.getScale() + t2.getScale()
                 + Config.div_precision_increment;
         int targetPercision = retPercision;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Multiply.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Multiply.java
index a42c3fa5c2..ebf984362b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Multiply.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Multiply.java
@@ -22,6 +22,7 @@ import org.apache.doris.nereids.trees.expressions.functions.CheckOverflowNullabl
 import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
 import org.apache.doris.nereids.types.DataType;
 import org.apache.doris.nereids.types.DecimalV3Type;
+import org.apache.doris.qe.ConnectContext;
 
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
@@ -52,7 +53,14 @@ public class Multiply extends BinaryArithmetic implements CheckOverflowNullable
         int retPercision = t1.getPrecision() + t2.getPrecision();
         int retScale = t1.getScale() + t2.getScale();
         if (retPercision > DecimalV3Type.MAX_DECIMAL128_PRECISION) {
-            retPercision = DecimalV3Type.MAX_DECIMAL128_PRECISION;
+            boolean enableDecimal256 = ConnectContext.get().getSessionVariable().enableDecimal256();
+            if (enableDecimal256) {
+                if (retPercision > DecimalV3Type.MAX_DECIMAL256_PRECISION) {
+                    retPercision = DecimalV3Type.MAX_DECIMAL256_PRECISION;
+                }
+            } else {
+                retPercision = DecimalV3Type.MAX_DECIMAL128_PRECISION;
+            }
         }
         Preconditions.checkState(retPercision >= retScale,
                 "scale " + retScale + " larger than precision " + retPercision
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/ComputePrecisionForSum.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/ComputePrecisionForSum.java
index 1409a1d559..7480c510ba 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/ComputePrecisionForSum.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/ComputePrecisionForSum.java
@@ -20,6 +20,7 @@ package org.apache.doris.nereids.trees.expressions.functions;
 import org.apache.doris.catalog.FunctionSignature;
 import org.apache.doris.nereids.types.DataType;
 import org.apache.doris.nereids.types.DecimalV3Type;
+import org.apache.doris.qe.ConnectContext;
 
 /** ComputePrecisionForSum */
 public interface ComputePrecisionForSum extends ComputePrecision {
@@ -28,9 +29,12 @@ public interface ComputePrecisionForSum extends ComputePrecision {
         DataType argumentType = getArgumentType(0);
         if (signature.getArgType(0) instanceof DecimalV3Type) {
             DecimalV3Type decimalV3Type = DecimalV3Type.forType(argumentType);
+            boolean enableDecimal256 = ConnectContext.get().getSessionVariable().enableDecimal256();
             return signature.withArgumentType(0, decimalV3Type)
                     .withReturnType(DecimalV3Type.createDecimalV3Type(
-                            DecimalV3Type.MAX_DECIMAL128_PRECISION, decimalV3Type.getScale()));
+                            enableDecimal256 ? DecimalV3Type.MAX_DECIMAL256_PRECISION
+                                    : DecimalV3Type.MAX_DECIMAL128_PRECISION,
+                            decimalV3Type.getScale()));
         } else {
             return signature;
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Avg.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Avg.java
index 6eea3d9e41..cc75c5767c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Avg.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Avg.java
@@ -35,6 +35,7 @@ import org.apache.doris.nereids.types.IntegerType;
 import org.apache.doris.nereids.types.LargeIntType;
 import org.apache.doris.nereids.types.SmallIntType;
 import org.apache.doris.nereids.types.TinyIntType;
+import org.apache.doris.qe.ConnectContext;
 
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
@@ -88,6 +89,7 @@ public class Avg extends NullableAggregateFunction
     public FunctionSignature computePrecision(FunctionSignature signature) {
         DataType argumentType = getArgumentType(0);
         if (signature.getArgType(0) instanceof DecimalV3Type) {
+            boolean enableDecimal256 = ConnectContext.get().getSessionVariable().enableDecimal256();
             DecimalV3Type decimalV3Type = DecimalV3Type.forType(argumentType);
             // DecimalV3 scale lower than DEFAULT_MIN_AVG_DECIMAL128_SCALE should do cast
             int precision = decimalV3Type.getPrecision();
@@ -95,14 +97,22 @@ public class Avg extends NullableAggregateFunction
             if (decimalV3Type.getScale() < ScalarType.DEFAULT_MIN_AVG_DECIMAL128_SCALE) {
                 scale = ScalarType.DEFAULT_MIN_AVG_DECIMAL128_SCALE;
                 precision = precision - decimalV3Type.getScale() + scale;
-                if (precision > DecimalV3Type.MAX_DECIMAL128_PRECISION) {
-                    precision = DecimalV3Type.MAX_DECIMAL128_PRECISION;
+                if (enableDecimal256) {
+                    if (precision > DecimalV3Type.MAX_DECIMAL256_PRECISION) {
+                        precision = DecimalV3Type.MAX_DECIMAL256_PRECISION;
+                    }
+                } else {
+                    if (precision > DecimalV3Type.MAX_DECIMAL128_PRECISION) {
+                        precision = DecimalV3Type.MAX_DECIMAL128_PRECISION;
+                    }
                 }
             }
             decimalV3Type = DecimalV3Type.createDecimalV3Type(precision, scale);
             return signature.withArgumentType(0, decimalV3Type)
                     .withReturnType(DecimalV3Type.createDecimalV3Type(
-                    DecimalV3Type.MAX_DECIMAL128_PRECISION, decimalV3Type.getScale()
+                            enableDecimal256 ? DecimalV3Type.MAX_DECIMAL256_PRECISION
+                                    : DecimalV3Type.MAX_DECIMAL128_PRECISION,
+                            decimalV3Type.getScale()
             ));
         } else {
             return signature;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/NumericArithmetic.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/NumericArithmetic.java
index 28e34af5f0..228c8be3d0 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/NumericArithmetic.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/NumericArithmetic.java
@@ -555,7 +555,7 @@ public class NumericArithmetic {
         DecimalV3Type t2 = (DecimalV3Type) second.getDataType();
         int precision = t1.getPrecision() + t2.getPrecision();
         int scale = t1.getScale() + t2.getScale();
-        return new DecimalV3Literal(DecimalV3Type.createDecimalV3Type(precision, scale), result);
+        return new DecimalV3Literal(DecimalV3Type.createDecimalV3TypeLooseCheck(precision, scale), result);
     }
 
     /**
@@ -590,7 +590,7 @@ public class NumericArithmetic {
         DecimalV3Type t1 = (DecimalV3Type) first.getDataType();
         DecimalV3Type t2 = (DecimalV3Type) second.getDataType();
         BigDecimal result = first.getValue().divide(second.getValue());
-        return new DecimalV3Literal(DecimalV3Type.createDecimalV3Type(
+        return new DecimalV3Literal(DecimalV3Type.createDecimalV3TypeLooseCheck(
                 t1.getPrecision(), t1.getScale() - t2.getScale()), result);
     }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DecimalV3Literal.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DecimalV3Literal.java
index 3a96bc5b44..741d2c3c4c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DecimalV3Literal.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DecimalV3Literal.java
@@ -41,7 +41,7 @@ public class DecimalV3Literal extends Literal {
      * Constructor for DecimalV3Literal
      */
     public DecimalV3Literal(DecimalV3Type dataType, BigDecimal value) {
-        super(DecimalV3Type.createDecimalV3Type(dataType.getPrecision(), dataType.getScale()));
+        super(DecimalV3Type.createDecimalV3TypeLooseCheck(dataType.getPrecision(), dataType.getScale()));
         Objects.requireNonNull(value, "value not be null");
         DecimalLiteral.checkPrecisionAndScale(dataType.getPrecision(), dataType.getScale(), value);
         BigDecimal adjustedValue = value.scale() < 0 ? value
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java
index 2ef2f911de..c55e8dfc66 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java
@@ -333,7 +333,7 @@ public abstract class DataType {
             ScalarType scalarType = (ScalarType) type;
             int precision = scalarType.getScalarPrecision();
             int scale = scalarType.getScalarScale();
-            return DecimalV3Type.createDecimalV3Type(precision, scale);
+            return DecimalV3Type.createDecimalV3TypeNoCheck(precision, scale);
         } else if (type.isDecimalV2()) {
             ScalarType scalarType = (ScalarType) type;
             int precision = scalarType.getScalarPrecision();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DecimalV3Type.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DecimalV3Type.java
index 7501d3b8db..7aa3da9470 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DecimalV3Type.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DecimalV3Type.java
@@ -20,7 +20,9 @@ package org.apache.doris.nereids.types;
 import org.apache.doris.catalog.ScalarType;
 import org.apache.doris.catalog.Type;
 import org.apache.doris.nereids.annotation.Developing;
+import org.apache.doris.nereids.exceptions.NotSupportedException;
 import org.apache.doris.nereids.types.coercion.FractionalType;
+import org.apache.doris.qe.ConnectContext;
 
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableMap;
@@ -37,6 +39,7 @@ public class DecimalV3Type extends FractionalType {
     public static final int MAX_DECIMAL32_PRECISION = 9;
     public static final int MAX_DECIMAL64_PRECISION = 18;
     public static final int MAX_DECIMAL128_PRECISION = 38;
+    public static final int MAX_DECIMAL256_PRECISION = 76;
 
     public static final DecimalV3Type WILDCARD = new DecimalV3Type(-1, -1);
     public static final DecimalV3Type SYSTEM_DEFAULT = new DecimalV3Type(MAX_DECIMAL128_PRECISION, DEFAULT_SCALE);
@@ -99,18 +102,49 @@ public class DecimalV3Type extends FractionalType {
 
     /** createDecimalV3Type. */
     public static DecimalV3Type createDecimalV3Type(int precision, int scale) {
-        Preconditions.checkArgument(precision > 0 && precision <= MAX_DECIMAL128_PRECISION,
-                "precision should in (0, " + MAX_DECIMAL128_PRECISION + "], but real precision is " + precision);
+        Preconditions.checkArgument(precision > 0 && precision <= MAX_DECIMAL256_PRECISION,
+                "precision should in (0, " + MAX_DECIMAL256_PRECISION + "], but real precision is " + precision);
+        Preconditions.checkArgument(scale >= 0, "scale should not smaller than 0, but real scale is " + scale);
+        Preconditions.checkArgument(precision >= scale, "precision should not smaller than scale,"
+                + " but precision is " + precision, ", scale is " + scale);
+        boolean enableDecimal256 = ConnectContext.get().getSessionVariable().enableDecimal256();
+        if (precision > MAX_DECIMAL128_PRECISION && !enableDecimal256) {
+            throw new NotSupportedException("Datatype DecimalV3 with precision " + precision
+                    + ", which is greater than 38 is disabled by default. set enable_decimal256 = true to enable it.");
+        } else {
+            return new DecimalV3Type(precision, scale);
+        }
+    }
+
+    public static DecimalV3Type createDecimalV3Type(BigDecimal bigDecimal) {
+        int precision = org.apache.doris.analysis.DecimalLiteral.getBigDecimalPrecision(bigDecimal);
+        int scale = org.apache.doris.analysis.DecimalLiteral.getBigDecimalScale(bigDecimal);
+        return createDecimalV3TypeLooseCheck(precision, scale);
+    }
+
+    /**
+     * create DecimalV3Type, not throwing NotSupportedException.
+     */
+    public static DecimalV3Type createDecimalV3TypeLooseCheck(int precision, int scale) {
+        boolean enableDecimal256 = ConnectContext.get().getSessionVariable().enableDecimal256();
+        if (enableDecimal256) {
+            Preconditions.checkArgument(precision > 0 && precision <= MAX_DECIMAL256_PRECISION,
+                    "precision should in (0, " + MAX_DECIMAL256_PRECISION + "], but real precision is " + precision);
+        } else {
+            Preconditions.checkArgument(precision > 0 && precision <= MAX_DECIMAL128_PRECISION,
+                    "precision should in (0, " + MAX_DECIMAL128_PRECISION + "], but real precision is " + precision);
+        }
         Preconditions.checkArgument(scale >= 0, "scale should not smaller than 0, but real scale is " + scale);
         Preconditions.checkArgument(precision >= scale, "precision should not smaller than scale,"
                 + " but precision is " + precision, ", scale is " + scale);
         return new DecimalV3Type(precision, scale);
     }
 
-    public static DecimalV3Type createDecimalV3Type(BigDecimal bigDecimal) {
-        int precision = org.apache.doris.analysis.DecimalLiteral.getBigDecimalPrecision(bigDecimal);
-        int scale = org.apache.doris.analysis.DecimalLiteral.getBigDecimalScale(bigDecimal);
-        return createDecimalV3Type(precision, scale);
+    /**
+     * create DecimalV3Type, without checking precision and scale, e.g. for DataType.fromCatalogType
+     */
+    public static DecimalV3Type createDecimalV3TypeNoCheck(int precision, int scale) {
+        return new DecimalV3Type(precision, scale);
     }
 
     public static DataType widerDecimalV3Type(DecimalV3Type left, DecimalV3Type right, boolean overflowToDouble) {
@@ -124,7 +158,9 @@ public class DecimalV3Type extends FractionalType {
             boolean overflowToDouble) {
         int scale = Math.max(leftScale, rightScale);
         int range = Math.max(leftPrecision - leftScale, rightPrecision - rightScale);
-        if (range + scale > MAX_DECIMAL128_PRECISION && overflowToDouble) {
+        boolean enableDecimal256 = ConnectContext.get().getSessionVariable().enableDecimal256();
+        if (range + scale > (enableDecimal256 ? MAX_DECIMAL256_PRECISION : MAX_DECIMAL128_PRECISION)
+                && overflowToDouble) {
             return DoubleType.INSTANCE;
         }
         return DecimalV3Type.createDecimalV3Type(range + scale, scale);
@@ -193,8 +229,15 @@ public class DecimalV3Type extends FractionalType {
             return 4;
         } else if (precision <= MAX_DECIMAL64_PRECISION) {
             return 8;
-        } else {
+        } else if (precision <= MAX_DECIMAL128_PRECISION) {
             return 16;
+        } else {
+            boolean enableDecimal256 = ConnectContext.get().getSessionVariable().enableDecimal256();
+            if (enableDecimal256) {
+                return 32;
+            } else {
+                return 16;
+            }
         }
     }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectProcessor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectProcessor.java
index 296cac55bc..5640a8c034 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectProcessor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectProcessor.java
@@ -50,6 +50,7 @@ import org.apache.doris.mysql.MysqlPacket;
 import org.apache.doris.mysql.MysqlProto;
 import org.apache.doris.mysql.MysqlSerializer;
 import org.apache.doris.mysql.MysqlServerStatusFlag;
+import org.apache.doris.nereids.exceptions.NotSupportedException;
 import org.apache.doris.nereids.glue.LogicalPlanAdapter;
 import org.apache.doris.nereids.minidump.MinidumpUtils;
 import org.apache.doris.nereids.parser.NereidsParser;
@@ -306,6 +307,10 @@ public class ConnectProcessor {
         if (mysqlCommand == MysqlCommand.COM_QUERY && ctx.getSessionVariable().isEnableNereidsPlanner()) {
             try {
                 stmts = new NereidsParser().parseSQL(originStmt);
+            } catch (NotSupportedException e) {
+                // Parse sql failed, audit it and return
+                handleQueryException(e, originStmt, null, null);
+                return;
             } catch (Exception e) {
                 // TODO: We should catch all exception here until we support all query syntax.
                 LOG.debug("Nereids parse sql failed. Reason: {}. Statement: \"{}\".",
@@ -390,6 +395,11 @@ public class ConnectProcessor {
             ctx.getState().setError(((UserException) throwable).getMysqlErrorCode(), throwable.getMessage());
             // set it as ANALYSIS_ERR so that it won't be treated as a query failure.
             ctx.getState().setErrType(QueryState.ErrType.ANALYSIS_ERR);
+        } else if (throwable instanceof NotSupportedException) {
+            LOG.warn("Process one query failed because.", throwable);
+            ctx.getState().setError(ErrorCode.ERR_NOT_SUPPORTED_YET, throwable.getMessage());
+            // set it as ANALYSIS_ERR so that it won't be treated as a query failure.
+            ctx.getState().setErrType(QueryState.ErrType.ANALYSIS_ERR);
         } else {
             // Catch all throwable.
             // If reach here, maybe palo bug.
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 70882c3bb6..d467b3131d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -423,6 +423,8 @@ public class SessionVariable implements Serializable, Writable {
 
     public static final String FASTER_FLOAT_CONVERT = "faster_float_convert";
 
+    public static final String ENABLE_DECIMAL256 = "enable_decimal256";
+
     public static final List<String> DEBUG_VARIABLES = ImmutableList.of(
             SKIP_DELETE_PREDICATE,
             SKIP_DELETE_BITMAP,
@@ -1261,6 +1263,10 @@ public class SessionVariable implements Serializable, Writable {
                     "the plan node type which is ignored in 'explain shape plan' command"})
     public String ignoreShapePlanNodes = "";
 
+    @VariableMgr.VarAttr(name = ENABLE_DECIMAL256, needForward = true, description = { "控制是否在计算过程中使用Decimal256类型",
+            "Set to true to enable Decimal256 type" })
+    public boolean enableDecimal256 = false;
+
     // If this fe is in fuzzy mode, then will use initFuzzyModeVariables to generate some variables,
     // not the default value set in the code.
     public void initFuzzyModeVariables() {
@@ -2395,6 +2401,8 @@ public class SessionVariable implements Serializable, Writable {
 
         tResult.setFasterFloatConvert(fasterFloatConvert);
 
+        tResult.setEnableDecimal256(enableNereidsPlanner && enableDecimal256);
+
         return tResult;
     }
 
@@ -2724,6 +2732,10 @@ public class SessionVariable implements Serializable, Writable {
         return connectContext.getSessionVariable().enableAggState;
     }
 
+    public boolean enableDecimal256() {
+        return enableDecimal256;
+    }
+
     public void checkAnalyzeTimeFormat(String time) {
         try {
             DateTimeFormatter timeFormatter = DateTimeFormatter.ofPattern("HH:mm:ss");
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/cache/PartitionRange.java b/fe/fe-core/src/main/java/org/apache/doris/qe/cache/PartitionRange.java
index 0c36cb69e7..0500814ed5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/cache/PartitionRange.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/cache/PartitionRange.java
@@ -189,6 +189,7 @@ public class PartitionRange {
                 case DECIMAL32:
                 case DECIMAL64:
                 case DECIMAL128:
+                case DECIMAL256:
                 case CHAR:
                 case VARCHAR:
                 case STRING:
diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/FoldConstantsRule.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/FoldConstantsRule.java
index 3bd22cdbf2..509e78ffb8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/rewrite/FoldConstantsRule.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/FoldConstantsRule.java
@@ -404,7 +404,8 @@ public class FoldConstantsRule implements ExprRewriteRule {
                                 type = ScalarType.createDatetimeV2Type(scalarType.getScale());
                             } else if (ttype == TPrimitiveType.DECIMAL32
                                     || ttype == TPrimitiveType.DECIMAL64
-                                    || ttype == TPrimitiveType.DECIMAL128I) {
+                                    || ttype == TPrimitiveType.DECIMAL128I
+                                    || ttype == TPrimitiveType.DECIMAL256) {
                                 type = ScalarType.createDecimalV3Type(scalarType.getPrecision(),
                                         scalarType.getScale());
                             } else {
diff --git a/gensrc/proto/internal_service.proto b/gensrc/proto/internal_service.proto
index 90fa7017b0..8f0dc34e9d 100644
--- a/gensrc/proto/internal_service.proto
+++ b/gensrc/proto/internal_service.proto
@@ -475,6 +475,7 @@ enum PColumnType {
     COLUMN_TYPE_DECIMAL32 = 17;
     COLUMN_TYPE_DECIMAL64 = 18;
     COLUMN_TYPE_DECIMAL128I = 19;
+    COLUMN_TYPE_DECIMAL256 = 20;
 }
 
 message PMinMaxFilter {
diff --git a/gensrc/proto/types.proto b/gensrc/proto/types.proto
index 0bc9f46fa1..240b68c89d 100644
--- a/gensrc/proto/types.proto
+++ b/gensrc/proto/types.proto
@@ -112,6 +112,7 @@ message PGenericType {
         TIME = 35;
         AGG_STATE = 36;
         TIMEV2 = 37;
+        DECIMAL256 = 38;
         UNKNOWN = 999;
     }
     required TypeId id = 2;
diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift
index a56e4f98a4..03d026daba 100644
--- a/gensrc/thrift/PaloInternalService.thrift
+++ b/gensrc/thrift/PaloInternalService.thrift
@@ -249,6 +249,8 @@ struct TQueryOptions {
   86: optional i32 analyze_timeout = 43200;
 
   87: optional bool faster_float_convert = false;
+
+  88: optional bool enable_decimal256 = false
 }
 
 
diff --git a/gensrc/thrift/Types.thrift b/gensrc/thrift/Types.thrift
index f6a138976c..75266e3ced 100644
--- a/gensrc/thrift/Types.thrift
+++ b/gensrc/thrift/Types.thrift
@@ -94,7 +94,8 @@ enum TPrimitiveType {
   UNSUPPORTED,
   VARIANT,
   LAMBDA_FUNCTION,
-  AGG_STATE
+  AGG_STATE,
+  DECIMAL256
 }
 
 enum TTypeNodeType {
diff --git a/regression-test/data/datatype_p0/decimalv3/test_arithmetic_expressions.out b/regression-test/data/datatype_p0/decimalv3/test_arithmetic_expressions.out
index 9596f55a66..72db6bf6f2 100644
--- a/regression-test/data/datatype_p0/decimalv3/test_arithmetic_expressions.out
+++ b/regression-test/data/datatype_p0/decimalv3/test_arithmetic_expressions.out
@@ -35,27 +35,118 @@
 -- !select_all --
 999999.999	999999.999	999999.999	999999.999	999999.999	999999.999	999999.999	999999.999	999999.999	999999.999	999999.999
 
--- !select --
+-- !select_mix_calc_0 --
 2999999.997
 
--- !select --
+-- !select_mix_calc_1 --
 2999999994000.000003
 
--- !select --
+-- !select_mix_calc_2 --
 3.0000000
 
--- !select --
+-- !select_mix_calc_3 --
 10999999.989
 
--- !select --
-1	629.0287029333568	629.0287029333568
-2	722.8102124296118	722.8102124296118
+-- !select_div_mix_v2_v3 --
+1	629.028702933357	629.028702933357
+2	722.8102124296119	722.8102124296119
 3	724.2919760003956	724.2919760003956
-4	688.8901831550861	688.8901831550861
+4	688.8901831550862	688.8901831550862
 
--- !select --
+-- !select_mod --
 92594283.129196000	1	0.129196000	0.129196000
 107684988.257976000	3	0.257976000	0.257976000
 76891560.464178000	5	0.464178000	0.464178000
 277170831.851350000	7	0.851350000	0.851350000
 
+-- !decimal128_select_all --
+1.000000	99999999999999999999999999999999.999999	99999999999999999999999999999999.999999
+2.000000	49999999999999999999999999999999.999999	49999999999999999999999999999999.999999
+3.000000	33333333333333333333333333333333.333333	33333333333333333333333333333333.333333
+4.444444	2.222222	3.333333
+
+-- !decimal128_select_all_2 --
+999999.999	999999.999	999999.999	999999.999	999999.999	999999.999	999999.999	999999.999	999999.999	999999.999	999999.999
+
+-- !decimal128_cast256_cast --
+3.333333	3.3333330000
+33333333333333333333333333333333.333333	33333333333333333333333333333333.3333330000
+49999999999999999999999999999999.999999	49999999999999999999999999999999.9999990000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.9999990000
+
+-- !decimal128_cast256_calc_0 --
+6.666665999999999
+3.3333333333333334E31
+5.0E31
+1.0E32
+
+-- !decimal128_cast256_calc_1 --
+-2.222222
+3.3333333333333334E31
+5.0E31
+1.0E32
+
+-- !decimal128_cast256_calc_2 --
+9.876541234568
+99999999999999999999999999999999.999998000000
+99999999999999999999999999999999.999999000000
+99999999999999999999999999999999.999999000000
+
+-- !decimal128_cast256_calc_4 --
+2.222222	4.444444	0.5
+33333333333333333333333333333333.333333	3.000000	1.1111111111111112E31
+49999999999999999999999999999999.999999	2.000000	2.5E31
+99999999999999999999999999999999.999999	1.000000	1.0E32
+
+-- !decimal128_cast256_calc_5 --
+2.222222	4.444444	2.222222
+33333333333333333333333333333333.333333	3.000000	0.333333
+49999999999999999999999999999999.999999	2.000000	1.999999
+99999999999999999999999999999999.999999	1.000000	0.999999
+
+-- !decimal128_cast256_calc_6 --
+3.333333000000
+9.876541234568
+33333333333333333333333333333333.333333000000
+49999999999999999999999999999999.999999000000
+99999999999999999999999999999999.999998000000
+99999999999999999999999999999999.999999000000
+99999999999999999999999999999999.999999000000
+99999999999999999999999999999999.999999000000
+
+-- !decimal128_cast256_calc_7 --
+32.921800823046255144
+9999999999999999999999999999999999999999999999999999999999.999999999999999999
+9999999999999999999999999999999999999999999999999999999999.999999999999999999
+9999999999999999999999999999999999999999999999999999999999.999999999999999999
+
+-- !decimal128_cast256_calc_8 --
+1083.844969432329082604616506562346460736
+9999999999999999999999999999999999999999.999999999999999999999999999999999999
+9999999999999999999999999999999999999999.999999999999999999999999999999999999
+9999999999999999999999999999999999999999.999999999999999999999999999999999999
+
+-- !decimal128_cast256_mixed_calc_0 --
+2999999.9970
+
+-- !decimal128_cast256_mixed_calc_1 --
+2999999994000.0000030
+
+-- !decimal128_cast256_mixed_calc_2 --
+3.00000000
+
+-- !decimal128_cast256_mixed_calc_3 --
+10999999.9890
+
+-- !decimal128_cast256_mixed_calc_4 --
+2999999.997
+
+-- !decimal128_cast256_mixed_calc_5 --
+2999999994000.000003000
+
+-- !decimal128_cast256_mixed_calc_6 --
+3.0
+
+-- !decimal128_cast256_mixed_calc_7 --
+1.0999999989E7
+
diff --git a/regression-test/data/datatype_p0/decimalv3/test_decimalv3.out b/regression-test/data/datatype_p0/decimalv3/test_decimalv3.out
index 0482cc4164..026424e76f 100644
--- a/regression-test/data/datatype_p0/decimalv3/test_decimalv3.out
+++ b/regression-test/data/datatype_p0/decimalv3/test_decimalv3.out
@@ -23,3 +23,69 @@
 -- !aEb_test6 --
 1234450000
 
+-- !decimal256_cast_0 --
+999999.999999
+
+-- !decimal256_cast_1 --
+9999999999999999999999999999999999999999999999999999999999999999999999.999999
+
+-- !decimal256_const_0 --
+1.4E-45
+
+-- !decimal256_const_1 --
+1.4E-80
+
+-- !decimal256_const_2 --
+1.4E-45
+
+-- !decimal256_const_3 --
+1.4E-80
+
+-- !decimal256_const_4 --
+1.4E-45
+
+-- !decimal256_const_5 --
+1.4E-80
+
+-- !decimal256_const_6 --
+1.4E-45
+
+-- !decimal256_const_7 --
+1.4E-80
+
+-- !decimal256_const_8 --
+1.4E-45
+
+-- !decimal256_const_9 --
+1.4E-80
+
+-- !decimal256_const_10 --
+1.4E-45
+
+-- !decimal256_const_11 --
+1.4E-80
+
+-- !decimal256_cast_from_str_0 --
+1	9999999999999999999999999999999999999999999999999999999999999999999999.999999	9999999999999999999999999999999999999999999999999999999999999999999999.999999
+2	-9999999999999999999999999999999999999999999999999999999999999999999999.999999	-9999999999999999999999999999999999999999999999999999999999999999999999.999999
+3	0.999999	0.999999
+4	-0.999999	-0.999999
+
+-- !decimal256_cast_dec_0 --
+1.000000	99999999999999999999999999999999.999999	99999999999999999999999999999999.999999
+2.000000	-99999999999999999999999999999999.999999	-99999999999999999999999999999999.999999
+3.000000	1234567890.123456	1234567890.123456
+4.000000	-1234567890.123456	-1234567890.123456
+
+-- !decimal256_cast_dec_1 --
+1.000000	99999999999999999999999999999999.999999	99999999999999999999999999999999.9999990000
+2.000000	-99999999999999999999999999999999.999999	-99999999999999999999999999999999.9999990000
+3.000000	1234567890.123456	1234567890.1234560000
+4.000000	-1234567890.123456	-1234567890.1234560000
+
+-- !decimal256_cast_dec_2 --
+1.000000	99999999999999999999999999999999.999999	99999999999999999999999999999999.999999
+2.000000	-99999999999999999999999999999999.999999	-99999999999999999999999999999999.999999
+3.000000	1234567890.123456	1234567890.123456
+4.000000	-1234567890.123456	-1234567890.123456
+
diff --git a/regression-test/data/datatype_p0/decimalv3/test_predicate.out b/regression-test/data/datatype_p0/decimalv3/test_predicate.out
index 99787dfd4b..ab074a78d7 100644
--- a/regression-test/data/datatype_p0/decimalv3/test_predicate.out
+++ b/regression-test/data/datatype_p0/decimalv3/test_predicate.out
@@ -11,3 +11,80 @@ true
 1.200000000000000000	1.200000000000000000	1.300000000000000000
 1.500000000000000000	1.200000000000000000	1.300000000000000000
 
+-- !select4 --
+true
+
+-- !select5 --
+1
+1
+1
+
+-- !select6 --
+1.200000000000000000	1.200000000000000000	1.300000000000000000
+1.500000000000000000	1.200000000000000000	1.300000000000000000
+
+-- !select256_1 --
+true
+
+-- !select256_2 --
+false
+
+-- !select256_3 --
+true
+
+-- !select256_4 --
+false
+
+-- !select256_5 --
+true
+
+-- !select256_6 --
+false
+
+-- !select256_7 --
+true
+
+-- !select256_8 --
+false
+
+-- !select256_9 --
+true
+
+-- !select256_10 --
+false
+
+-- !select256_11 --
+true
+
+-- !select256_12 --
+false
+
+-- !decimal256_select_all --
+1.000000	99999999999999999999999999999999.999999	99999999999999999999999999999999.999999
+2.000000	49999999999999999999999999999999.999999	49999999999999999999999999999999.999999
+3.000000	33333333333333333333333333333333.333333	33333333333333333333333333333333.333333
+4.444444	2.222222	3.333333
+
+-- !decimal256_predicate_0 --
+1.000000	99999999999999999999999999999999.999999	99999999999999999999999999999999.999999
+2.000000	49999999999999999999999999999999.999999	49999999999999999999999999999999.999999
+
+-- !decimal256_predicate_1 --
+1.000000	99999999999999999999999999999999.999999	99999999999999999999999999999999.999999
+
+-- !decimal256_predicate_2 --
+3.000000	33333333333333333333333333333333.333333	33333333333333333333333333333333.333333
+4.444444	2.222222	3.333333
+
+-- !decimal256_predicate_3 --
+3.000000	33333333333333333333333333333333.333333	33333333333333333333333333333333.333333
+4.444444	2.222222	3.333333
+
+-- !decimal256_predicate_4 --
+1.000000	99999999999999999999999999999999.999999	99999999999999999999999999999999.999999
+
+-- !decimal256_predicate_5 --
+2.000000	49999999999999999999999999999999.999999	49999999999999999999999999999999.999999
+3.000000	33333333333333333333333333333333.333333	33333333333333333333333333333333.333333
+4.444444	2.222222	3.333333
+
diff --git a/regression-test/data/query_p0/aggregate/aggregate_decimal256.out b/regression-test/data/query_p0/aggregate/aggregate_decimal256.out
new file mode 100644
index 0000000000..95d30e8337
--- /dev/null
+++ b/regression-test/data/query_p0/aggregate/aggregate_decimal256.out
@@ -0,0 +1,97 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !sql_sum_1 --
+1	199999999999999999999999999999999.999998
+
+-- !sql_sum_2 --
+2	199999999999999999999999999999999.999998
+
+-- !sql_sum_3 --
+1	199999999999999999999999999999999.999998
+2	199999999999999999999999999999999.999998
+
+-- !sql_sum_4 --
+1	199999999999999999999999999999999.999998
+2	199999999999999999999999999999999.999998
+
+-- !sql_sum_5 --
+-999999.200002	3
+999999.200002	3
+99999999999999999999999999999999.999999	6
+
+-- !sql_sum_6 --
+1.000000	-999999.200002	2
+1.000000	999999.200002	2
+1.000000	99999999999999999999999999999999.999999	2
+11.000000	-999999.200002	4
+11.000000	999999.200002	4
+11.000000	99999999999999999999999999999999.999999	4
+
+-- !sql_sum_7 --
+72.000000
+
+-- !sql_sum_8 --
+399999999999999999999999999999999.999996
+
+-- !sql_sum_9 --
+72.000000	399999999999999999999999999999999.999996
+
+-- !sql_avg_1 --
+1	49999999999999999999999999999999.999999
+
+-- !sql_avg_2 --
+2	49999999999999999999999999999999.999999
+
+-- !sql_avg_3 --
+1	49999999999999999999999999999999.999999
+2	49999999999999999999999999999999.999999
+
+-- !sql_avg_4 --
+1	49999999999999999999999999999999.999999
+2	49999999999999999999999999999999.999999
+
+-- !sql_avg_5 --
+6.000000
+
+-- !sql_avg_6 --
+49999999999999999999999999999999.999999
+
+-- !sql_avg_7 --
+6.000000	49999999999999999999999999999999.999999
+
+-- !sql_max_1 --
+1	99999999999999999999999999999999.999999
+2	99999999999999999999999999999999.999999
+
+-- !sql_max_2 --
+11.000000
+
+-- !sql_max_3 --
+99999999999999999999999999999999.999999
+
+-- !sql_max_4 --
+11.000000	99999999999999999999999999999999.999999
+
+-- !sql_min_1 --
+1	-999999.200002
+2	-999999.200002
+
+-- !sql_min_2 --
+1.000000
+
+-- !sql_min_3 --
+-999999.200002
+
+-- !sql_min_4 --
+1.000000	-999999.200002
+
+-- !sql_count_1 --
+1	4
+2	4
+
+-- !sql_count_2 --
+1	4	4
+2	4	4
+
+-- !sql_count_3 --
+8	8
+
diff --git a/regression-test/data/query_p0/join/test_join_decimal256.out b/regression-test/data/query_p0/join/test_join_decimal256.out
new file mode 100644
index 0000000000..a5312a52bd
--- /dev/null
+++ b/regression-test/data/query_p0/join/test_join_decimal256.out
@@ -0,0 +1,41 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !join_1 --
+-0.000001	-0.000001	10	-10.000000	11	-110.000000
+-0.000001	-0.000001	10	-10.000000	11	-10.000000
+-0.000001	-0.000001	10	-10.000000	111	-110.000000
+-0.000001	-0.000001	10	-10.000000	111	-10.000000
+-0.000001	-0.000001	110	-110.000000	11	-110.000000
+-0.000001	-0.000001	110	-110.000000	11	-10.000000
+-0.000001	-0.000001	110	-110.000000	111	-110.000000
+-0.000001	-0.000001	110	-110.000000	111	-10.000000
+0.000001	0.000001	10	10.000000	11	10.000000
+0.000001	0.000001	10	10.000000	11	110.000000
+0.000001	0.000001	10	10.000000	111	10.000000
+0.000001	0.000001	10	10.000000	111	110.000000
+0.000001	0.000001	110	110.000000	11	10.000000
+0.000001	0.000001	110	110.000000	11	110.000000
+0.000001	0.000001	110	110.000000	111	10.000000
+0.000001	0.000001	110	110.000000	111	110.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	10	10.000000	11	10.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	10	10.000000	11	110.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	10	10.000000	111	10.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	10	10.000000	111	110.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	110	110.000000	11	10.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	110	110.000000	11	110.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	110	110.000000	111	10.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	110	110.000000	111	110.000000
+
+-- !join_2 --
+-110.000000	-0.000001	-110.000000	-0.000001	110	11
+-110.000000	-0.000001	-110.000000	-0.000001	110	111
+-10.000000	-0.000001	-10.000000	-0.000001	10	11
+-10.000000	-0.000001	-10.000000	-0.000001	10	111
+10.000000	0.000001	10.000000	0.000001	10	11
+10.000000	0.000001	10.000000	0.000001	10	111
+10.000000	99999999999999999999999999999999.999999	10.000000	99999999999999999999999999999999.999999	10	11
+10.000000	99999999999999999999999999999999.999999	10.000000	99999999999999999999999999999999.999999	10	111
+110.000000	0.000001	110.000000	0.000001	110	11
+110.000000	0.000001	110.000000	0.000001	110	111
+110.000000	99999999999999999999999999999999.999999	110.000000	99999999999999999999999999999999.999999	110	11
+110.000000	99999999999999999999999999999999.999999	110.000000	99999999999999999999999999999999.999999	110	111
+
diff --git a/regression-test/data/query_p0/join/test_runtime_filter_decimal256.out b/regression-test/data/query_p0/join/test_runtime_filter_decimal256.out
new file mode 100644
index 0000000000..05f187e938
--- /dev/null
+++ b/regression-test/data/query_p0/join/test_runtime_filter_decimal256.out
@@ -0,0 +1,201 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !rf_in_1 --
+-0.000001	-0.000001	10	-10.000000	11	-110.000000
+-0.000001	-0.000001	10	-10.000000	11	-10.000000
+-0.000001	-0.000001	10	-10.000000	111	-110.000000
+-0.000001	-0.000001	10	-10.000000	111	-10.000000
+-0.000001	-0.000001	110	-110.000000	11	-110.000000
+-0.000001	-0.000001	110	-110.000000	11	-10.000000
+-0.000001	-0.000001	110	-110.000000	111	-110.000000
+-0.000001	-0.000001	110	-110.000000	111	-10.000000
+0.000001	0.000001	10	10.000000	11	10.000000
+0.000001	0.000001	10	10.000000	11	110.000000
+0.000001	0.000001	10	10.000000	111	10.000000
+0.000001	0.000001	10	10.000000	111	110.000000
+0.000001	0.000001	110	110.000000	11	10.000000
+0.000001	0.000001	110	110.000000	11	110.000000
+0.000001	0.000001	110	110.000000	111	10.000000
+0.000001	0.000001	110	110.000000	111	110.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	10	10.000000	11	10.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	10	10.000000	11	110.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	10	10.000000	111	10.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	10	10.000000	111	110.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	110	110.000000	11	10.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	110	110.000000	11	110.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	110	110.000000	111	10.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	110	110.000000	111	110.000000
+
+-- !rf_in_2 --
+-110.000000	-0.000001	-110.000000	-0.000001	110	11
+-110.000000	-0.000001	-110.000000	-0.000001	110	111
+-10.000000	-0.000001	-10.000000	-0.000001	10	11
+-10.000000	-0.000001	-10.000000	-0.000001	10	111
+10.000000	0.000001	10.000000	0.000001	10	11
+10.000000	0.000001	10.000000	0.000001	10	111
+10.000000	99999999999999999999999999999999.999999	10.000000	99999999999999999999999999999999.999999	10	11
+10.000000	99999999999999999999999999999999.999999	10.000000	99999999999999999999999999999999.999999	10	111
+110.000000	0.000001	110.000000	0.000001	110	11
+110.000000	0.000001	110.000000	0.000001	110	111
+110.000000	99999999999999999999999999999999.999999	110.000000	99999999999999999999999999999999.999999	110	11
+110.000000	99999999999999999999999999999999.999999	110.000000	99999999999999999999999999999999.999999	110	111
+
+-- !rf_bf_1 --
+-0.000001	-0.000001	10	-10.000000	11	-110.000000
+-0.000001	-0.000001	10	-10.000000	11	-10.000000
+-0.000001	-0.000001	10	-10.000000	111	-110.000000
+-0.000001	-0.000001	10	-10.000000	111	-10.000000
+-0.000001	-0.000001	110	-110.000000	11	-110.000000
+-0.000001	-0.000001	110	-110.000000	11	-10.000000
+-0.000001	-0.000001	110	-110.000000	111	-110.000000
+-0.000001	-0.000001	110	-110.000000	111	-10.000000
+0.000001	0.000001	10	10.000000	11	10.000000
+0.000001	0.000001	10	10.000000	11	110.000000
+0.000001	0.000001	10	10.000000	111	10.000000
+0.000001	0.000001	10	10.000000	111	110.000000
+0.000001	0.000001	110	110.000000	11	10.000000
+0.000001	0.000001	110	110.000000	11	110.000000
+0.000001	0.000001	110	110.000000	111	10.000000
+0.000001	0.000001	110	110.000000	111	110.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	10	10.000000	11	10.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	10	10.000000	11	110.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	10	10.000000	111	10.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	10	10.000000	111	110.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	110	110.000000	11	10.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	110	110.000000	11	110.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	110	110.000000	111	10.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	110	110.000000	111	110.000000
+
+-- !rf_bf_2 --
+-110.000000	-0.000001	-110.000000	-0.000001	110	11
+-110.000000	-0.000001	-110.000000	-0.000001	110	111
+-10.000000	-0.000001	-10.000000	-0.000001	10	11
+-10.000000	-0.000001	-10.000000	-0.000001	10	111
+10.000000	0.000001	10.000000	0.000001	10	11
+10.000000	0.000001	10.000000	0.000001	10	111
+10.000000	99999999999999999999999999999999.999999	10.000000	99999999999999999999999999999999.999999	10	11
+10.000000	99999999999999999999999999999999.999999	10.000000	99999999999999999999999999999999.999999	10	111
+110.000000	0.000001	110.000000	0.000001	110	11
+110.000000	0.000001	110.000000	0.000001	110	111
+110.000000	99999999999999999999999999999999.999999	110.000000	99999999999999999999999999999999.999999	110	11
+110.000000	99999999999999999999999999999999.999999	110.000000	99999999999999999999999999999999.999999	110	111
+
+-- !rf_minmax_1 --
+-0.000001	-0.000001	10	-10.000000	11	-110.000000
+-0.000001	-0.000001	10	-10.000000	11	-10.000000
+-0.000001	-0.000001	10	-10.000000	111	-110.000000
+-0.000001	-0.000001	10	-10.000000	111	-10.000000
+-0.000001	-0.000001	110	-110.000000	11	-110.000000
+-0.000001	-0.000001	110	-110.000000	11	-10.000000
+-0.000001	-0.000001	110	-110.000000	111	-110.000000
+-0.000001	-0.000001	110	-110.000000	111	-10.000000
+0.000001	0.000001	10	10.000000	11	10.000000
+0.000001	0.000001	10	10.000000	11	110.000000
+0.000001	0.000001	10	10.000000	111	10.000000
+0.000001	0.000001	10	10.000000	111	110.000000
+0.000001	0.000001	110	110.000000	11	10.000000
+0.000001	0.000001	110	110.000000	11	110.000000
+0.000001	0.000001	110	110.000000	111	10.000000
+0.000001	0.000001	110	110.000000	111	110.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	10	10.000000	11	10.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	10	10.000000	11	110.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	10	10.000000	111	10.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	10	10.000000	111	110.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	110	110.000000	11	10.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	110	110.000000	11	110.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	110	110.000000	111	10.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	110	110.000000	111	110.000000
+
+-- !rf_minmax_2 --
+-110.000000	-0.000001	-110.000000	-0.000001	110	11
+-110.000000	-0.000001	-110.000000	-0.000001	110	111
+-10.000000	-0.000001	-10.000000	-0.000001	10	11
+-10.000000	-0.000001	-10.000000	-0.000001	10	111
+10.000000	0.000001	10.000000	0.000001	10	11
+10.000000	0.000001	10.000000	0.000001	10	111
+10.000000	99999999999999999999999999999999.999999	10.000000	99999999999999999999999999999999.999999	10	11
+10.000000	99999999999999999999999999999999.999999	10.000000	99999999999999999999999999999999.999999	10	111
+110.000000	0.000001	110.000000	0.000001	110	11
+110.000000	0.000001	110.000000	0.000001	110	111
+110.000000	99999999999999999999999999999999.999999	110.000000	99999999999999999999999999999999.999999	110	11
+110.000000	99999999999999999999999999999999.999999	110.000000	99999999999999999999999999999999.999999	110	111
+
+-- !rf_in_or_bf_1 --
+-0.000001	-0.000001	10	-10.000000	11	-110.000000
+-0.000001	-0.000001	10	-10.000000	11	-10.000000
+-0.000001	-0.000001	10	-10.000000	111	-110.000000
+-0.000001	-0.000001	10	-10.000000	111	-10.000000
+-0.000001	-0.000001	110	-110.000000	11	-110.000000
+-0.000001	-0.000001	110	-110.000000	11	-10.000000
+-0.000001	-0.000001	110	-110.000000	111	-110.000000
+-0.000001	-0.000001	110	-110.000000	111	-10.000000
+0.000001	0.000001	10	10.000000	11	10.000000
+0.000001	0.000001	10	10.000000	11	110.000000
+0.000001	0.000001	10	10.000000	111	10.000000
+0.000001	0.000001	10	10.000000	111	110.000000
+0.000001	0.000001	110	110.000000	11	10.000000
+0.000001	0.000001	110	110.000000	11	110.000000
+0.000001	0.000001	110	110.000000	111	10.000000
+0.000001	0.000001	110	110.000000	111	110.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	10	10.000000	11	10.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	10	10.000000	11	110.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	10	10.000000	111	10.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	10	10.000000	111	110.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	110	110.000000	11	10.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	110	110.000000	11	110.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	110	110.000000	111	10.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	110	110.000000	111	110.000000
+
+-- !rf_in_or_bf_2 --
+-110.000000	-0.000001	-110.000000	-0.000001	110	11
+-110.000000	-0.000001	-110.000000	-0.000001	110	111
+-10.000000	-0.000001	-10.000000	-0.000001	10	11
+-10.000000	-0.000001	-10.000000	-0.000001	10	111
+10.000000	0.000001	10.000000	0.000001	10	11
+10.000000	0.000001	10.000000	0.000001	10	111
+10.000000	99999999999999999999999999999999.999999	10.000000	99999999999999999999999999999999.999999	10	11
+10.000000	99999999999999999999999999999999.999999	10.000000	99999999999999999999999999999999.999999	10	111
+110.000000	0.000001	110.000000	0.000001	110	11
+110.000000	0.000001	110.000000	0.000001	110	111
+110.000000	99999999999999999999999999999999.999999	110.000000	99999999999999999999999999999999.999999	110	11
+110.000000	99999999999999999999999999999999.999999	110.000000	99999999999999999999999999999999.999999	110	111
+
+-- !rf_bitmap_1 --
+-0.000001	-0.000001	10	-10.000000	11	-110.000000
+-0.000001	-0.000001	10	-10.000000	11	-10.000000
+-0.000001	-0.000001	10	-10.000000	111	-110.000000
+-0.000001	-0.000001	10	-10.000000	111	-10.000000
+-0.000001	-0.000001	110	-110.000000	11	-110.000000
+-0.000001	-0.000001	110	-110.000000	11	-10.000000
+-0.000001	-0.000001	110	-110.000000	111	-110.000000
+-0.000001	-0.000001	110	-110.000000	111	-10.000000
+0.000001	0.000001	10	10.000000	11	10.000000
+0.000001	0.000001	10	10.000000	11	110.000000
+0.000001	0.000001	10	10.000000	111	10.000000
+0.000001	0.000001	10	10.000000	111	110.000000
+0.000001	0.000001	110	110.000000	11	10.000000
+0.000001	0.000001	110	110.000000	11	110.000000
+0.000001	0.000001	110	110.000000	111	10.000000
+0.000001	0.000001	110	110.000000	111	110.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	10	10.000000	11	10.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	10	10.000000	11	110.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	10	10.000000	111	10.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	10	10.000000	111	110.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	110	110.000000	11	10.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	110	110.000000	11	110.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	110	110.000000	111	10.000000
+99999999999999999999999999999999.999999	99999999999999999999999999999999.999999	110	110.000000	111	110.000000
+
+-- !rf_bitmap_2 --
+-110.000000	-0.000001	-110.000000	-0.000001	110	11
+-110.000000	-0.000001	-110.000000	-0.000001	110	111
+-10.000000	-0.000001	-10.000000	-0.000001	10	11
+-10.000000	-0.000001	-10.000000	-0.000001	10	111
+10.000000	0.000001	10.000000	0.000001	10	11
+10.000000	0.000001	10.000000	0.000001	10	111
+10.000000	99999999999999999999999999999999.999999	10.000000	99999999999999999999999999999999.999999	10	11
+10.000000	99999999999999999999999999999999.999999	10.000000	99999999999999999999999999999999.999999	10	111
+110.000000	0.000001	110.000000	0.000001	110	11
+110.000000	0.000001	110.000000	0.000001	110	111
+110.000000	99999999999999999999999999999999.999999	110.000000	99999999999999999999999999999999.999999	110	11
+110.000000	99999999999999999999999999999999.999999	110.000000	99999999999999999999999999999999.999999	110	111
+
diff --git a/regression-test/suites/datatype_p0/decimalv3/test_arithmetic_expressions.groovy b/regression-test/suites/datatype_p0/decimalv3/test_arithmetic_expressions.groovy
index 1d7fbb39b0..cfac94774f 100644
--- a/regression-test/suites/datatype_p0/decimalv3/test_arithmetic_expressions.groovy
+++ b/regression-test/suites/datatype_p0/decimalv3/test_arithmetic_expressions.groovy
@@ -18,6 +18,7 @@
 suite("test_arithmetic_expressions") {
 
     def table1 = "test_arithmetic_expressions"
+    sql "set enable_decimal256 = false;"
 
     sql "drop table if exists ${table1}"
 
@@ -60,10 +61,12 @@ suite("test_arithmetic_expressions") {
     """
     qt_select_all "select * from ${table1} order by a"
 
-    qt_select "select a + b + c from ${table1};"
-    qt_select "select (a + b + c) * d from ${table1};"
-    qt_select "select (a + b + c) / d from ${table1};"
-    qt_select "select a + b + c + d + e + f + g + h + i + j + k from ${table1};"
+    // TODO: test result is wrong, need to fix
+    qt_select_mix_calc_0 "select a + b + c from ${table1};"
+    qt_select_mix_calc_1 "select (a + b + c) * d from ${table1};"
+    qt_select_mix_calc_2 "select (a + b + c) / d from ${table1};"
+    qt_select_mix_calc_3 "select a + b + c + d + e + f + g + h + i + j + k from ${table1};"
+
     sql "drop table if exists ${table1}"
 
     def table2 = "test_arithmetic_expressions"
@@ -85,7 +88,13 @@ suite("test_arithmetic_expressions") {
     sql """ insert into ${table2} values (3,76891560.464178000,76891560.464178000,106161.0000000000); """
     sql """ insert into ${table2} values (4,277170831.851350000,277170831.851350000,402344.0000000000); """
 
-    qt_select """ select id, fz/fm as dec,fzv3/fm as decv3 from ${table2} ORDER BY id; """
+    // pg 16:
+    // select 92594283.129196000 / 147202.0000000000
+    // 629.0287029333568837
+    // MySQL 8.0:
+    // select 92594283.129196000 / 147202.0000000000; 
+    // 629.0287029333569
+    qt_select_div_mix_v2_v3 """ select id, fz/fm as dec,fzv3/fm as decv3 from ${table2} ORDER BY id; """
     sql "drop table if exists ${table2}"
 
     def table3 = "test_mod_expressions"
@@ -107,6 +116,259 @@ suite("test_arithmetic_expressions") {
     sql """ insert into ${table3} values (3,76891560.464178000,5,5); """
     sql """ insert into ${table3} values (4,277170831.851350000,7,7); """
 
-    qt_select """ select v1, v2, v1 % v2, v1 % v3 from ${table3} ORDER BY id; """
+    qt_select_mod """ select v1, v2, v1 % v2, v1 % v3 from ${table3} ORDER BY id; """
     sql "drop table if exists ${table3}"
+
+    // decimal64
+    sql "DROP TABLE IF EXISTS `test_arithmetic_expressions_64`";
+    sql """
+    CREATE TABLE IF NOT EXISTS `test_arithmetic_expressions_64` (
+      `k1` decimalv3(18, 6) NULL COMMENT "",
+      `k2` decimalv3(18, 6) NULL COMMENT "",
+      `k3` decimalv3(18, 6) NULL COMMENT ""
+    ) ENGINE=OLAP
+    COMMENT "OLAP"
+    DISTRIBUTED BY HASH(`k1`, `k2`, `k3`) BUCKETS 8
+    PROPERTIES (
+    "replication_allocation" = "tag.location.default: 1"
+    );
+    """
+    sql """insert into test_arithmetic_expressions_64 values(1, 999999999999.999999, 999999999999.999999),
+            (2, 499999999999.999999, 499999999999.999999),
+            (3, 333333333333.333333, 333333333333.333333),
+            (4, 4, 4);"""
+    // TODO: fix decimal cast
+    // sql "select k3, CAST(k3 AS DECIMALV3(18, 10)) from test_arithmetic_expressions_64;"
+/*
+mysql [test]>select k3, CAST(k3 AS DECIMALV3(18, 10)) from test_arithmetic_expressions_64;
++---------------------+-------------------------------+
+| k3                  | cast(k3 as DECIMALV3(18, 10)) |
++---------------------+-------------------------------+
+| 333333333333.333333 |         -552734400.8095512496 |
+| 499999999999.999999 |           93235602.4711502064 |
+| 999999999999.999999 |          186471204.9423014128 |
+|            4.000000 |                  4.0000000000 |
++---------------------+-------------------------------+
+4 rows in set (0.39 sec)
+*/
+
+    // decimal128
+    sql "DROP TABLE IF EXISTS `test_arithmetic_expressions_128_1`";
+    sql """
+    CREATE TABLE IF NOT EXISTS `test_arithmetic_expressions_128_1` (
+      `k1` decimalv3(38, 6) NULL COMMENT "",
+      `k2` decimalv3(38, 6) NULL COMMENT "",
+      `k3` decimalv3(38, 6) NULL COMMENT ""
+    ) ENGINE=OLAP
+    COMMENT "OLAP"
+    DISTRIBUTED BY HASH(`k1`, `k2`, `k3`) BUCKETS 8
+    PROPERTIES (
+    "replication_allocation" = "tag.location.default: 1"
+    );
+    """
+    sql """insert into test_arithmetic_expressions_128_1 values(1, 99999999999999999999999999999999.999999, 99999999999999999999999999999999.999999),
+            (2, 49999999999999999999999999999999.999999, 49999999999999999999999999999999.999999),
+            (3, 33333333333333333333333333333333.333333, 33333333333333333333333333333333.333333),
+            (4.444444, 2.222222, 3.333333);"""
+    qt_decimal128_select_all "select * from test_arithmetic_expressions_128_1 order by k1, k2;"
+    // fix cast
+    // qt_decimal128_cast "select k3, CAST(k3 AS DECIMALV3(38, 10)) from test_arithmetic_expressions_128_1 order by 1, 2;"
+    /*
+    qt_decimal128_multiply_0 "select k1 * k2 a from test_arithmetic_expressions_128_1 order by 1;"
+    qt_decimal128_arith_union "select * from (select k1 * k2 from test_arithmetic_expressions_128_1 union all select k3 from test_arithmetic_expressions_128_1) a order by 1"
+    qt_decimal128_multiply_1 "select k1 * k2 * k3 a from test_arithmetic_expressions_128_1 order by 1;"
+    qt_decimal128_multiply_2 "select k1 * k2 * k3 * k1 * k2 * k3 from test_arithmetic_expressions_128_1 order by k1"
+    qt_decimal128_multiply_div "select k1 * k2 / k3 * k1 * k2 * k3 from test_arithmetic_expressions_128_1 order by k1"
+    */
+
+    sql "DROP TABLE IF EXISTS `test_arithmetic_expressions_128_2`";
+    sql """
+    CREATE TABLE IF NOT EXISTS test_arithmetic_expressions_128_2 (
+        `a` DECIMALV3(38, 3) NOT NULL,
+        `b` DECIMALV3(38, 3) NOT NULL,
+        `c` DECIMALV3(38, 3) NOT NULL,
+        `d` DECIMALV3(38, 3) NOT NULL,
+        `e` DECIMALV3(38, 3) NOT NULL,
+        `f` DECIMALV3(38, 3) NOT NULL,
+        `g` DECIMALV3(38, 3) NOT NULL,
+        `h` DECIMALV3(38, 3) NOT NULL,
+        `i` DECIMALV3(38, 3) NOT NULL,
+        `j` DECIMALV3(38, 3) NOT NULL,
+        `k` DECIMALV3(38, 3) NOT NULL
+    ) DISTRIBUTED BY HASH(a) PROPERTIES("replication_num" = "1");
+    """
+
+    sql """
+    insert into test_arithmetic_expressions_128_2 values(999999.999,999999.999,999999.999,999999.999,999999.999,999999.999,999999.999,999999.999,999999.999,999999.999,999999.999);
+    """
+    qt_decimal128_select_all_2 "select * from test_arithmetic_expressions_128_2 order by a"
+    /*
+    qt_decimal128_mixed_calc_0 "select a + b + c from test_arithmetic_expressions_128_2;"
+    qt_decimal128_mixed_calc_1 "select (a + b + c) * d from test_arithmetic_expressions_128_2;"
+    qt_decimal128_mixed_calc_2 "select (a + b + c) / d from test_arithmetic_expressions_128_2;"
+    qt_decimal128_mixed_calc_3 "select a + b + c + d + e + f + g + h + i + j + k from test_arithmetic_expressions_128_2;"
+    */
+
+    sql "set enable_nereids_planner = true;"
+    sql "set enable_decimal256 = true;"
+    qt_decimal128_cast256_cast "select k3, CAST(k3 AS DECIMALV3(76, 10)) from test_arithmetic_expressions_128_1 order by 1, 2;"
+    qt_decimal128_cast256_calc_0 "select cast(k1 as decimalv3(76, 6)) + k2 a from test_arithmetic_expressions_128_1 order by 1;"
+    qt_decimal128_cast256_calc_1 "select cast(k2 as decimalv3(76, 6)) - k1 a from test_arithmetic_expressions_128_1 order by 1;"
+    qt_decimal128_cast256_calc_2 "select cast(k1 as decimalv3(76, 6)) * k2 a from test_arithmetic_expressions_128_1 order by 1;"
+    qt_decimal128_cast256_calc_4 "select k2, k1, cast(k2 as decimalv3(76, 6)) / k1 a from test_arithmetic_expressions_128_1 order by 1, 2;"
+    qt_decimal128_cast256_calc_5 "select k2, k1, cast(k2 as decimalv3(76, 6)) % k1 a from test_arithmetic_expressions_128_1 order by 1, 2;"
+
+    qt_decimal128_cast256_calc_6 "select * from (select cast(k1 as decimalv3(76, 6)) * k2 from test_arithmetic_expressions_128_1 union all select k3 from test_arithmetic_expressions_128_1) a order by 1"
+    // overflow
+    qt_decimal128_cast256_calc_7 "select cast(k1 as decimalv3(76, 6)) * k2 * k3 a from test_arithmetic_expressions_128_1 order by 1;"
+    qt_decimal128_cast256_calc_8 "select cast(k1 as decimalv3(76, 6)) * k2 * k3 * k1 * k2 * k3 from test_arithmetic_expressions_128_1 order by 1"
+    // qt_decimal128_cast256_calc_9 "select cast(k1 as decimalv3(76, 6)) * k2 / k3 * k1 * k2 * k3 from test_arithmetic_expressions_128_1 order by 1"
+
+    qt_decimal128_cast256_mixed_calc_0 "select cast(a as decimalv3(39, 4)) + b + c from test_arithmetic_expressions_128_2 order by 1;"
+    qt_decimal128_cast256_mixed_calc_1 "select cast((a + b + c) as decimalv3(39, 4)) * d from test_arithmetic_expressions_128_2 order by 1;"
+    qt_decimal128_cast256_mixed_calc_2 "select cast((a + b + c) as decimalv3(39, 4)) / d from test_arithmetic_expressions_128_2 order by 1;"
+    qt_decimal128_cast256_mixed_calc_3 "select cast(a as decimalv3(39, 4)) + b + c + d + e + f + g + h + i + j + k from test_arithmetic_expressions_128_2 order by 1;"
+
+    qt_decimal128_cast256_mixed_calc_4 "select cast(a as decimalv3(76, 6)) + b + c from test_arithmetic_expressions_128_2 order by 1;"
+    qt_decimal128_cast256_mixed_calc_5 "select cast((a + b + c) as decimalv3(76, 6)) * d from test_arithmetic_expressions_128_2 order by 1;"
+    qt_decimal128_cast256_mixed_calc_6 "select cast((a + b + c) as decimalv3(76, 6)) / d from test_arithmetic_expressions_128_2 order by 1;"
+    qt_decimal128_cast256_mixed_calc_7 "select cast(a as decimalv3(76, 6)) + b + c + d + e + f + g + h + i + j + k from test_arithmetic_expressions_128_2 order by 1;"
+
+/*
+mysql [test]>select k3, CAST(k3 AS DECIMALV3(38, 10)) from test_arithmetic_expressions_128_1;
++-----------------------------------------+------------------------------------------+
+| k3                                      | cast(k3 as DECIMALV3(38, 10))            |
++-----------------------------------------+------------------------------------------+
+| 33333333333333333333333333333333.333333 | -9999999999999999999999999999.9999999999 |
+| 99999999999999999999999999999999.999999 | -9999999999999999999999999999.9999999999 |
+| 49999999999999999999999999999999.999999 |  9999999999999999999999999999.9999999999 |
+|                                4.000000 |                             4.0000000000 |
++-----------------------------------------+------------------------------------------+
+4 rows in set (0.07 sec)
+*/
+
+    // decimal256
+    /*
+    mysql [regression_test_datatype_p0_decimalv3]>select CAST(k3 AS DECIMALV3(76, 19)) from test_arithmetic_expressions_256_0;
++---------------------------------------------------------------------------------+
+| cast(k3 as DECIMALV3(76, 19))                                                   |
++---------------------------------------------------------------------------------+
+|  3213777273360060490676974488410532053153505213067024283781.6774441511766597376 |
+| -1717218125670499520334383174682575559673329346810002612127.4678374963165481728 |
+| -5151654377011498561003149524047726679019988040430007836382.4035124889496445184 |
++---------------------------------------------------------------------------------+
+    */
+    /*
+    sql "DROP TABLE IF EXISTS `test_arithmetic_expressions_256_1`"
+    sql """
+    CREATE TABLE IF NOT EXISTS `test_arithmetic_expressions_256_1` (
+      `k1` decimalv3(76, 9) NULL COMMENT "",
+      `k2` decimalv3(76, 10) NULL COMMENT "",
+      `k3` decimalv3(76, 11) NULL COMMENT ""
+    ) ENGINE=OLAP
+    DISTRIBUTED BY HASH(`k1`, `k2`, `k3`) BUCKETS 8
+    PROPERTIES (
+    "replication_allocation" = "tag.location.default: 1"
+    );
+    """
+
+    sql """insert into test_arithmetic_expressions_256_1 values(1, 999999999999999999999999999999999999999999999999999999999999999999.9999999999, 99999999999999999999999999999999999999999999999999999999999999999.99999999999),
+            (2, 499999999999999999999999999999999999999999999999999999999999999999.9999999999, 49999999999999999999999999999999999999999999999999999999999999999.99999999999),
+            (3, 333333333333333333333333333333333333333333333333333333333333333333.3333333333, 33333333333333333333333333333333333333333333333333333333333333333.33333333333);"""
+    qt_decimal256_arith_select_all "select * from test_arithmetic_expressions_256_1 order by k1, k2, k3;"
+    qt_decimal256_arith_plus "select k1 + k2 from test_arithmetic_expressions_256_1 order by 1;"
+    qt_decimal256_arith_minus "select k2 - k1 from test_arithmetic_expressions_256_1 order by 1;"
+    qt_decimal256_arith_multiply "select k1 * k2 from test_arithmetic_expressions_256_1 order by 1;"
+    qt_decimal256_arith_div "select k2 / k1 from test_arithmetic_expressions_256_1 order by 1;"
+    qt_decimal256_arith_union "select * from (select k1 * k2 from test_arithmetic_expressions_256_1 union all select k3 from test_arithmetic_expressions_256_1) a order by 1"
+
+    qt_decimal256_multiply_1 "select k1 * k2 * k3 a from test_arithmetic_expressions_256_1 order by 1;"
+    qt_decimal256_multiply_2 "select k1 * k2 * k3 * k1 * k2 * k3 from test_arithmetic_expressions_256_1 order by k1"
+    qt_decimal256_multiply_div "select k1 * k2 / k3 * k1 * k2 * k3 from test_arithmetic_expressions_256_1 order by k1"
+
+    qt_decimal256_arith_multiply_const "select k1 * 2.0 from test_arithmetic_expressions_256_1 order by 1;"
+
+    sql "DROP TABLE IF EXISTS `test_arithmetic_expressions_256_2`";
+    sql """
+    CREATE TABLE IF NOT EXISTS test_arithmetic_expressions_256_2 (
+        `a` DECIMALV3(76, 3) NOT NULL,
+        `b` DECIMALV3(76, 3) NOT NULL,
+        `c` DECIMALV3(76, 3) NOT NULL,
+        `d` DECIMALV3(76, 3) NOT NULL,
+        `e` DECIMALV3(76, 3) NOT NULL,
+        `f` DECIMALV3(76, 3) NOT NULL,
+        `g` DECIMALV3(76, 3) NOT NULL,
+        `h` DECIMALV3(76, 3) NOT NULL,
+        `i` DECIMALV3(76, 3) NOT NULL,
+        `j` DECIMALV3(76, 3) NOT NULL,
+        `k` DECIMALV3(76, 3) NOT NULL
+    ) DISTRIBUTED BY HASH(a) PROPERTIES("replication_num" = "1");
+    """
+
+    sql """
+    insert into test_arithmetic_expressions_256_2 values(999999.999,999999.999,999999.999,999999.999,999999.999,999999.999,999999.999,999999.999,999999.999,999999.999,999999.999);
+    """
+    qt_decimal256_select_all_2 "select * from test_arithmetic_expressions_256_2 order by a"
+
+    qt_decimal256_mixed_calc_0 "select a + b + c from test_arithmetic_expressions_256_2;"
+    qt_decimal256_mixed_calc_1 "select (a + b + c) * d from test_arithmetic_expressions_256_2;"
+    qt_decimal256_mixed_calc_2 "select (a + b + c) / d from test_arithmetic_expressions_256_2;"
+    qt_decimal256_mixed_calc_3 "select a + b + c + d + e + f + g + h + i + j + k from test_arithmetic_expressions_256_2;"
+
+    sql "DROP TABLE IF EXISTS `test_arithmetic_expressions_256_3`"
+    sql """
+    CREATE TABLE IF NOT EXISTS `test_arithmetic_expressions_256_3` (
+      `k1` decimalv3(76, 0) NULL COMMENT "",
+      `k2` decimalv3(76, 1) NULL COMMENT "",
+      `k3` decimalv3(76, 2) NULL COMMENT ""
+    ) ENGINE=OLAP
+    DISTRIBUTED BY HASH(`k1`, `k2`, `k3`) BUCKETS 8
+    PROPERTIES (
+    "replication_allocation" = "tag.location.default: 1"
+    );
+    """
+    sql """insert into test_arithmetic_expressions_256_3 values(1, 999999999999999999999999999999999999999999999999999999999999999999999999999.9, 99999999999999999999999999999999999999999999999999999999999999999999999999.99),
+            (2, 499999999999999999999999999999999999999999999999999999999999999999999999999.9, 49999999999999999999999999999999999999999999999999999999999999999999999999.99),
+            (3, 333333333333333333333333333333333333333333333333333333333333333333333333333.3, 33333333333333333333333333333333333333333333333333333333333333333333333333.33);"""
+    qt_decimal256_arith_3 "select k1, k2, k1 * k2 a from test_arithmetic_expressions_256_3 order by k1, k2;"
+
+    sql "DROP TABLE IF EXISTS `test_arithmetic_expressions_256_4`"
+    sql """ create table test_arithmetic_expressions_256_4 (
+            id smallint,
+            fz decimal(27,9),
+            fzv3 decimalv3(76,9),
+            fm decimalv3(76,10))
+            DISTRIBUTED BY HASH(`id`) BUCKETS auto
+            PROPERTIES
+            (
+                "replication_num" = "1"
+            ); """
+
+    sql """ insert into test_arithmetic_expressions_256_4 values (1,92594283.129196000,92594283.129196000,147202.0000000000); """
+    sql """ insert into test_arithmetic_expressions_256_4 values (2,107684988.257976000,107684988.257976000,148981.0000000000); """
+    sql """ insert into test_arithmetic_expressions_256_4 values (3,76891560.464178000,76891560.464178000,106161.0000000000); """
+    sql """ insert into test_arithmetic_expressions_256_4 values (4,277170831.851350000,277170831.851350000,402344.0000000000); """
+
+    qt_decimal256_div_v2_v3 """ select id, fz/fm as dec,fzv3/fm as decv3 from test_arithmetic_expressions_256_4 ORDER BY id; """
+
+    sql "drop table if exists test_arithmetic_expressions_256_5"
+    sql """ create table test_arithmetic_expressions_256_5 (
+            id smallint,
+            v1 decimalv3(27,9),
+            v2 decimalv3(9,0),
+            v3 int )
+            DISTRIBUTED BY HASH(`id`) BUCKETS auto
+            PROPERTIES
+            (
+                "replication_num" = "1"
+            ); """
+
+    sql """ insert into test_arithmetic_expressions_256_5 values (1,92594283.129196000,1,1); """
+    sql """ insert into test_arithmetic_expressions_256_5 values (2,107684988.257976000,3,3); """
+    sql """ insert into test_arithmetic_expressions_256_5 values (3,76891560.464178000,5,5); """
+    sql """ insert into test_arithmetic_expressions_256_5 values (4,277170831.851350000,7,7); """
+
+    qt_decimal256_mod """ select v1, v2, v1 % v2, v1 % v3 from test_arithmetic_expressions_256_5 ORDER BY id; """
+    */
+
 }
diff --git a/regression-test/suites/datatype_p0/decimalv3/test_decimalv3.groovy b/regression-test/suites/datatype_p0/decimalv3/test_decimalv3.groovy
index 2b72c36867..f0535ad868 100644
--- a/regression-test/suites/datatype_p0/decimalv3/test_decimalv3.groovy
+++ b/regression-test/suites/datatype_p0/decimalv3/test_decimalv3.groovy
@@ -20,19 +20,103 @@ suite("test_decimalv3") {
     sql "CREATE DATABASE IF NOT EXISTS ${db}"
     sql "use ${db}"
     sql "drop table if exists test5"
-	sql '''CREATE  TABLE test5 (   `a` decimalv3(38,18),   `b` decimalv3(38,18) ) ENGINE=OLAP DUPLICATE KEY(`a`) COMMENT 'OLAP' DISTRIBUTED BY HASH(`a`) BUCKETS 1 PROPERTIES ( "replication_allocation" = "tag.location.default: 1" ) '''
-	sql "insert into test5 values(50,2)"
-	sql "drop view if exists test5_v"
-	sql "create view test5_v (amout) as select cast(a*b as decimalv3(38,18)) from test5"
+    sql '''CREATE  TABLE test5 (   `a` decimalv3(38,18),   `b` decimalv3(38,18) ) ENGINE=OLAP DUPLICATE KEY(`a`) COMMENT 'OLAP' DISTRIBUTED BY HASH(`a`) BUCKETS 1 PROPERTIES ( "replication_allocation" = "tag.location.default: 1" ) '''
+    sql "insert into test5 values(50,2)"
+    sql "drop view if exists test5_v"
+    sql "create view test5_v (amout) as select cast(a*b as decimalv3(38,18)) from test5"
 
-	qt_decimalv3 "select * from test5_v"
-	qt_decimalv3 "select cast(a as decimalv3(12,10)) * cast(b as decimalv3(18,10)) from test5"
+    qt_decimalv3 "select * from test5_v"
+    qt_decimalv3 "select cast(a as decimalv3(12,10)) * cast(b as decimalv3(18,10)) from test5"
+
+    /*
+    sql "drop table if exists test_decimal256;"
+    sql """ create table test_decimal256(k1 decimal(76, 6), v1 decimal(76, 6))
+                DUPLICATE KEY(`k1`, `v1`)
+                DISTRIBUTED BY HASH(`k1`) BUCKETS 10
+                properties("replication_num" = "1"); """
+    sql """insert into test_decimal256 values(1, 9999999999999999999999999999999999999999999999999999999999999999999999.999999),
+            (2, 4999999999999999999999999999999999999999999999999999999999999999999999.999999);"""
+    qt_decimalv3_0 "select * from test_decimal256 order by k1, v1; "
+    qt_decimalv3_1 "select * from test_decimal256 where v1 = 9999999999999999999999999999999999999999999999999999999999999999999999.999999 order by k1, v1; "
+    qt_decimalv3_2 "select * from test_decimal256 where v1 != 9999999999999999999999999999999999999999999999999999999999999999999999.999999 order by k1, v1; "
+    qt_decimalv3_3 "select * from test_decimal256 where v1 > 4999999999999999999999999999999999999999999999999999999999999999999999.999999 order by k1, v1; "
+    qt_decimalv3_4 "select * from test_decimal256 where v1 >= 4999999999999999999999999999999999999999999999999999999999999999999999.999999 order by k1, v1; "
+    qt_decimalv3_5 "select * from test_decimal256 where v1 < 9999999999999999999999999999999999999999999999999999999999999999999999.999999 order by k1, v1; "
+    qt_decimalv3_6 "select * from test_decimal256 where v1 <= 9999999999999999999999999999999999999999999999999999999999999999999999.999999 order by k1, v1; "
+	*/
+
+    sql "set experimental_enable_nereids_planner =false;"
+    qt_aEb_test1 "select 0e0;"
+    qt_aEb_test2 "select 1e-1"
+    qt_aEb_test3 "select -1e-2"
+    qt_aEb_test4 "select 10.123456e10;"
+    qt_aEb_test5 "select 123456789e-10"
+    qt_aEb_test6 "select 0.123445e10;"
+
+    sql "set enable_nereids_planner = true;"
+    sql "set enable_decimal256 = true;"
+    qt_decimal256_cast_0 """ select cast("999999.999999" as decimal(76,6));"""
+    qt_decimal256_cast_1 """select cast("9999999999999999999999999999999999999999999999999999999999999999999999.999999" as decimal(76,6));"""
+
+    // test const
+
+    // nereids
+    sql "set enable_nereids_planner = true;"
+
+    sql """ set enable_fallback_to_original_planner=false """
+    sql "set enable_decimal256 = true;"
+    qt_decimal256_const_0 "select 1.4E-45;"
+    qt_decimal256_const_1 "select 1.4E-80;"
+    sql "set enable_decimal256 = false;"
+    qt_decimal256_const_2 "select 1.4E-45;"
+    qt_decimal256_const_3 "select 1.4E-80;"
+
+    sql """ set enable_fallback_to_original_planner=true """
+    sql "set enable_decimal256 = true;"
+    qt_decimal256_const_4 "select 1.4E-45;"
+    qt_decimal256_const_5 "select 1.4E-80;"
+    sql "set enable_decimal256 = false;"
+    qt_decimal256_const_6 "select 1.4E-45;"
+    qt_decimal256_const_7 "select 1.4E-80;"
+
+    // not nereids
+    sql "set enable_nereids_planner = false;"
+    sql "set enable_decimal256 = true;"
+    qt_decimal256_const_8 "select 1.4E-45;"
+    qt_decimal256_const_9 "select 1.4E-80;"
+    sql "set enable_decimal256 = false;"
+    qt_decimal256_const_10 "select 1.4E-45;"
+    qt_decimal256_const_11 "select 1.4E-80;"
+
+    sql "set enable_nereids_planner = true;"
+    sql "set enable_decimal256 = true;"
+    sql "drop table if exists test_decimal256_cast_str;"
+    sql """ create table test_decimal256_cast_str(k1 int, v1 char(128))
+                DUPLICATE KEY(`k1`, `v1`)
+                DISTRIBUTED BY HASH(`k1`) BUCKETS 10
+                properties("replication_num" = "1"); """
+    sql """ insert into test_decimal256_cast_str values
+        (1, "9999999999999999999999999999999999999999999999999999999999999999999999.999999"),
+        (2, "-9999999999999999999999999999999999999999999999999999999999999999999999.999999"),
+        (3, "0.999999"),
+        (4, "-0.999999")
+    """
+    sql "sync"
+    qt_decimal256_cast_from_str_0 """ select k1, v1, cast(v1 as decimalv3(76, 6)) from test_decimal256_cast_str order by 1, 2, 3; """
+
+    sql "drop table if exists test_decimal256_cast_dec;"
+    sql """ create table test_decimal256_cast_dec(k1 decimal(38, 6), v1 decimal(38, 6))
+                DUPLICATE KEY(`k1`, `v1`)
+                DISTRIBUTED BY HASH(`k1`) BUCKETS 10
+                properties("replication_num" = "1"); """
+    sql """insert into test_decimal256_cast_dec values
+            (1, 99999999999999999999999999999999.999999),
+            (2, -99999999999999999999999999999999.999999),
+            (3, 1234567890.123456),
+            (4, -1234567890.123456);
+    """
+    qt_decimal256_cast_dec_0 """ select k1, v1, cast(v1 as decimalv3(76, 6)) from test_decimal256_cast_dec order by 1, 2, 3; """
+    qt_decimal256_cast_dec_1 """ select k1, v1, cast(v1 as decimalv3(76, 10)) from test_decimal256_cast_dec order by 1, 2, 3; """
+    qt_decimal256_cast_dec_2 """ select k1, v1, cast( cast(v1 as decimalv3(76, 6)) as decimalv3(38, 6) ) from test_decimal256_cast_dec order by 1, 2, 3; """
 
-	sql "set experimental_enable_nereids_planner =false;"
-	qt_aEb_test1 "select 0e0;"
-	qt_aEb_test2 "select 1e-1"
-	qt_aEb_test3 "select -1e-2"
-	qt_aEb_test4 "select 10.123456e10;"
-	qt_aEb_test5 "select 123456789e-10"
-	qt_aEb_test6 "select 0.123445e10;"
 }
diff --git a/regression-test/suites/datatype_p0/decimalv3/test_predicate.groovy b/regression-test/suites/datatype_p0/decimalv3/test_predicate.groovy
index 429f98b94a..a8e12dcb4e 100644
--- a/regression-test/suites/datatype_p0/decimalv3/test_predicate.groovy
+++ b/regression-test/suites/datatype_p0/decimalv3/test_predicate.groovy
@@ -43,5 +43,57 @@ suite("test_predicate") {
 
     qt_select2 "SELECT /*+ SET_VAR(enable_fold_constant_by_be = false) */ 1 FROM ${table1} WHERE CAST((CASE WHEN (TRUE IS NOT NULL) THEN '1.2' ELSE '1.2' END) AS FLOAT) = CAST(1.2 AS decimal(2,1));"
     qt_select3 "SELECT * FROM ${table1} WHERE k1 != 1.1 ORDER BY k1"
+
+    // decimal256
+    sql "set enable_nereids_planner = true;"
+    sql "set enable_decimal256 = true;"
+    qt_select4 "SELECT /*+ SET_VAR(enable_fold_constant_by_be = false) */ CAST((CASE WHEN (TRUE IS NOT NULL) THEN '1.2' ELSE '1.2' END) AS FLOAT) = CAST(1.2 AS decimal(76,1))"
+    qt_select5 "SELECT /*+ SET_VAR(enable_fold_constant_by_be = false) */ 1 FROM ${table1} WHERE CAST((CASE WHEN (TRUE IS NOT NULL) THEN '1.2' ELSE '1.2' END) AS FLOAT) = CAST(1.2 AS decimal(76,1));"
+    qt_select6 "SELECT * FROM ${table1} WHERE k1 != cast(1.1 as decimalv3(76, 1)) ORDER BY k1"
     sql "drop table if exists ${table1}"
+
+    qt_select256_1 "SELECT /*+ SET_VAR(enable_fold_constant_by_be = false) */ cast(999999999999999999999999999999999999999999999999999999999999999999.9999999999 as decimalv3(76,10)) > cast(999999999999999999999999999999999999999999999999999999999999999999.9999999998 as decimalv3(76,10))"
+    qt_select256_2 "SELECT /*+ SET_VAR(enable_fold_constant_by_be = false) */ cast(999999999999999999999999999999999999999999999999999999999999999999.9999999998 as decimalv3(76,10)) > cast(999999999999999999999999999999999999999999999999999999999999999999.9999999998 as decimalv3(76,10))"
+    qt_select256_3 "SELECT /*+ SET_VAR(enable_fold_constant_by_be = false) */ cast(999999999999999999999999999999999999999999999999999999999999999999.9999999999 as decimalv3(76,10)) >= cast(999999999999999999999999999999999999999999999999999999999999999999.9999999998 as decimalv3(76,10))"
+    qt_select256_4 "SELECT /*+ SET_VAR(enable_fold_constant_by_be = false) */ cast(999999999999999999999999999999999999999999999999999999999999999999.9999999997 as decimalv3(76,10)) >= cast(999999999999999999999999999999999999999999999999999999999999999999.9999999998 as decimalv3(76,10))"
+
+    qt_select256_5 "SELECT /*+ SET_VAR(enable_fold_constant_by_be = false) */ cast(999999999999999999999999999999999999999999999999999999999999999999.9999999998 as decimalv3(76,10)) < cast(999999999999999999999999999999999999999999999999999999999999999999.9999999999 as decimalv3(76,10))"
+    qt_select256_6 "SELECT /*+ SET_VAR(enable_fold_constant_by_be = false) */ cast(999999999999999999999999999999999999999999999999999999999999999999.9999999999 as decimalv3(76,10)) < cast(999999999999999999999999999999999999999999999999999999999999999999.9999999998 as decimalv3(76,10))"
+    qt_select256_7 "SELECT /*+ SET_VAR(enable_fold_constant_by_be = false) */ cast(999999999999999999999999999999999999999999999999999999999999999999.9999999998 as decimalv3(76,10)) <= cast(999999999999999999999999999999999999999999999999999999999999999999.9999999998 as decimalv3(76,10))"
+    qt_select256_8 "SELECT /*+ SET_VAR(enable_fold_constant_by_be = false) */ cast(999999999999999999999999999999999999999999999999999999999999999999.9999999999 as decimalv3(76,10)) <= cast(999999999999999999999999999999999999999999999999999999999999999999.9999999998 as decimalv3(76,10))"
+
+    qt_select256_9 "SELECT /*+ SET_VAR(enable_fold_constant_by_be = false) */ cast(999999999999999999999999999999999999999999999999999999999999999999.9999999999 as decimalv3(76,10)) = cast(999999999999999999999999999999999999999999999999999999999999999999.9999999999 as decimalv3(76,10))"
+    qt_select256_10 "SELECT /*+ SET_VAR(enable_fold_constant_by_be = false) */ cast(999999999999999999999999999999999999999999999999999999999999999999.9999999999 as decimalv3(76,10)) = cast(999999999999999999999999999999999999999999999999999999999999999999.9999999998 as decimalv3(76,10))"
+
+    qt_select256_11 "SELECT /*+ SET_VAR(enable_fold_constant_by_be = false) */ cast(999999999999999999999999999999999999999999999999999999999999999999.9999999999 as decimalv3(76,10)) != cast(999999999999999999999999999999999999999999999999999999999999999999.9999999998 as decimalv3(76,10))"
+    qt_select256_12 "SELECT /*+ SET_VAR(enable_fold_constant_by_be = false) */ cast(999999999999999999999999999999999999999999999999999999999999999999.9999999999 as decimalv3(76,10)) != cast(999999999999999999999999999999999999999999999999999999999999999999.9999999999 as decimalv3(76,10))"
+
+
+    sql "DROP TABLE IF EXISTS `test_predicate_128_1`";
+    sql """
+    CREATE TABLE IF NOT EXISTS `test_predicate_128_1` (
+      `k1` decimalv3(38, 6) NULL COMMENT "",
+      `k2` decimalv3(38, 6) NULL COMMENT "",
+      `k3` decimalv3(38, 6) NULL COMMENT ""
+    ) ENGINE=OLAP
+    COMMENT "OLAP"
+    DISTRIBUTED BY HASH(`k1`, `k2`, `k3`) BUCKETS 8
+    PROPERTIES (
+    "replication_allocation" = "tag.location.default: 1"
+    );
+    """
+    sql """insert into test_predicate_128_1 values(1, 99999999999999999999999999999999.999999, 99999999999999999999999999999999.999999),
+            (2, 49999999999999999999999999999999.999999, 49999999999999999999999999999999.999999),
+            (3, 33333333333333333333333333333333.333333, 33333333333333333333333333333333.333333),
+            (4.444444, 2.222222, 3.333333);"""
+    qt_decimal256_select_all "select * from test_predicate_128_1 order by k1, k2;"
+    qt_decimal256_predicate_0 "select * from test_predicate_128_1 where cast(k2 as decimalv3(76, 6)) > (cast(33333333333333333333333333333333.333333 as decimalv3(76,7))) order by k1, k2;"
+    qt_decimal256_predicate_1 "select * from test_predicate_128_1 where cast(k2 as decimalv3(76, 6)) >= (cast(999999999999999999999999999999990.999999 as decimalv3(76,6)) / 10)order by k1, k2;"
+
+    qt_decimal256_predicate_2 "select * from test_predicate_128_1 where cast(k2 as decimalv3(76, 6)) < (cast(49999999999999999999999999999999.999999 as decimalv3(76,7))) order by k1, k2;"
+    qt_decimal256_predicate_3 "select * from test_predicate_128_1 where cast(k2 as decimalv3(76, 6)) <= (cast(33333333333333333333333333333333.333333 as decimalv3(76,7))) order by k1, k2;"
+
+    qt_decimal256_predicate_4 "select * from test_predicate_128_1 where cast(k2 as decimalv3(76, 6)) = (cast(99999999999999999999999999999999.999999 as decimalv3(76,7))) order by k1, k2;"
+    qt_decimal256_predicate_5 "select * from test_predicate_128_1 where cast(k2 as decimalv3(76, 6)) != (cast(99999999999999999999999999999999.999999 as decimalv3(76,7))) order by k1, k2;"
+
 }
diff --git a/regression-test/suites/query_p0/aggregate/aggregate_decimal256.groovy b/regression-test/suites/query_p0/aggregate/aggregate_decimal256.groovy
new file mode 100644
index 0000000000..8ea45c68bf
--- /dev/null
+++ b/regression-test/suites/query_p0/aggregate/aggregate_decimal256.groovy
@@ -0,0 +1,154 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("aggregate_decimal256") {
+    sql "set enable_nereids_planner = true;"
+    sql "set enable_decimal256 = true;"
+    sql "drop table if exists test_aggregate_decimal256_sum;"
+    sql """ create table test_aggregate_decimal256_sum(k1 int, v1 decimal(38, 6), v2 decimal(38, 6))
+                DUPLICATE KEY(`k1`, `v1`, `v2`)
+                DISTRIBUTED BY HASH(`k1`) BUCKETS 10
+                properties("replication_num" = "1"); """
+    
+    sql """insert into test_aggregate_decimal256_sum values 
+            (1, 1.000000, 99999999999999999999999999999999.999999),
+            (1, 1.000000, 99999999999999999999999999999999.999999),
+            (1, 1.000000, -999999.200002),
+            (1, 1.000000, 999999.200002),
+            (2, 11.000000, 99999999999999999999999999999999.999999),
+            (2, 11.000000, 99999999999999999999999999999999.999999),
+            (2, 11.000000, -999999.200002),
+            (2, 11.000000, 999999.200002);"""
+    sql "sync"
+
+    qt_sql_sum_1 """ select k1, sum(cast(v2 as decimalv3(39, 6))) from test_aggregate_decimal256_sum where v1 = 1 group by k1 order by 1, 2; """
+    qt_sql_sum_2 """ select k1, sum(cast(v2 as decimalv3(39, 6))) from test_aggregate_decimal256_sum where v1 = 11 group by k1 order by 1, 2; """
+    qt_sql_sum_3 """ select k1, sum(cast(v2 as decimalv3(39, 6))) from test_aggregate_decimal256_sum group by k1 order by 1, 2; """
+    qt_sql_sum_4 """
+        select
+                k1,
+                sum(sum_val)
+        from
+                (
+                        (
+                                select
+                                        k1,
+                                        sum(cast(v2 as decimalv3(39, 6))) as sum_val
+                                from
+                                        test_aggregate_decimal256_sum
+                                where
+                                        v1 = 1
+                                group by k1
+                        )
+                        union
+                        all (
+                                select
+                                        k1,
+                                        sum(cast(v2 as decimalv3(39, 6))) as sum_val
+                                from
+                                        test_aggregate_decimal256_sum
+                                where
+                                        v1 = 11
+                                group by k1
+                        )
+                ) union1 group by k1
+        order by 1, 2;
+    """
+
+    qt_sql_sum_5 """ select cast(v2 as decimalv3(39, 6)) v2_cast, sum(k1) from test_aggregate_decimal256_sum group by v2_cast order by 1, 2; """
+
+    sql """insert into test_aggregate_decimal256_sum values 
+            (1, 1.000000, -999999.200002),
+            (1, 1.000000, 999999.200002),
+            (2, 11.000000, -999999.200002),
+            (2, 11.000000, 999999.200002);"""
+    sql "sync"
+    qt_sql_sum_6 """ select cast(v1 as decimalv3(39, 6)) v1_cast, cast(v2 as decimalv3(39, 6)) v2_cast, sum(k1) from test_aggregate_decimal256_sum group by v1_cast, v2_cast order by 1, 2, 3; """
+    qt_sql_sum_7 """ select sum(cast(v1 as decimalv3(39, 6))) from test_aggregate_decimal256_sum order by 1; """
+    qt_sql_sum_8 """ select sum(cast(v2 as decimalv3(39, 6))) from test_aggregate_decimal256_sum order by 1; """
+    qt_sql_sum_9 """ select sum(cast(v1 as decimalv3(39, 6))), sum(cast(v2 as decimalv3(39, 6))) from test_aggregate_decimal256_sum order by 1, 2; """
+
+    sql "drop table if exists test_aggregate_decimal256_avg;"
+    sql """ create table test_aggregate_decimal256_avg(k1 int, v1 decimal(38, 6), v2 decimal(38, 6))
+                DUPLICATE KEY(`k1`, `v1`, `v2`)
+                DISTRIBUTED BY HASH(`k1`) BUCKETS 10
+                properties("replication_num" = "1"); """
+    
+    sql """insert into test_aggregate_decimal256_avg values 
+            (1, 1.000000, 99999999999999999999999999999999.999999),
+            (1, 1.000000, 99999999999999999999999999999999.999999),
+            (1, 1.000000, -999999.200002),
+            (1, 1.000000, 999999.200002),
+            (2, 11.000000, 99999999999999999999999999999999.999999),
+            (2, 11.000000, 99999999999999999999999999999999.999999),
+            (2, 11.000000, -999999.200002),
+            (2, 11.000000, 999999.200002);"""
+    sql "sync"
+    qt_sql_avg_1 """ select k1, avg(cast(v2 as decimalv3(76, 6))) from test_aggregate_decimal256_avg where v1 = 1 group by k1 order by 1, 2; """
+    qt_sql_avg_2 """ select k1, avg(cast(v2 as decimalv3(76, 6))) from test_aggregate_decimal256_avg where v1 = 11 group by k1 order by 1, 2; """
+    qt_sql_avg_3 """ select k1, avg(cast(v2 as decimalv3(39, 6))) from test_aggregate_decimal256_avg group by k1 order by 1, 2; """
+    qt_sql_avg_4 """
+        select
+                k1,
+                avg(avg_val)
+        from
+                (
+                        (
+                                select
+                                        k1,
+                                        avg(cast(v2 as decimalv3(39, 6))) as avg_val
+                                from
+                                        test_aggregate_decimal256_avg
+                                where
+                                        v1 = 1
+                                group by k1
+                        )
+                        union
+                        all (
+                                select
+                                        k1,
+                                        avg(cast(v2 as decimalv3(39, 6))) as avg_val
+                                from
+                                        test_aggregate_decimal256_avg
+                                where
+                                        v1 = 11
+                                group by k1
+                        )
+                ) union1 group by k1
+        order by 1, 2;
+    """
+    qt_sql_avg_5 """ select avg(cast(v1 as decimalv3(39, 6))) from test_aggregate_decimal256_avg order by 1; """
+    qt_sql_avg_6 """ select avg(cast(v2 as decimalv3(39, 6))) from test_aggregate_decimal256_avg order by 1; """
+    qt_sql_avg_7 """ select avg(cast(v1 as decimalv3(39, 6))), avg(cast(v2 as decimalv3(39, 6))) from test_aggregate_decimal256_avg order by 1, 2; """
+
+    qt_sql_max_1 """ select k1, max(cast(v2 as decimalv3(39, 6))) from test_aggregate_decimal256_avg group by k1 order by 1, 2; """
+    qt_sql_max_2 """ select max(cast(v1 as decimalv3(39, 6))) from test_aggregate_decimal256_avg order by 1; """
+    qt_sql_max_3 """ select max(cast(v2 as decimalv3(39, 6))) from test_aggregate_decimal256_avg order by 1; """
+    qt_sql_max_4 """ select max(cast(v1 as decimalv3(39, 6))), max(cast(v2 as decimalv3(39, 6))) from test_aggregate_decimal256_avg order by 1; """
+
+    qt_sql_min_1 """ select k1, min(cast(v2 as decimalv3(39, 6))) from test_aggregate_decimal256_avg group by k1 order by 1, 2; """
+    qt_sql_min_2 """ select min(cast(v1 as decimalv3(39, 6))) from test_aggregate_decimal256_avg order by 1; """
+    qt_sql_min_3 """ select min(cast(v2 as decimalv3(39, 6))) from test_aggregate_decimal256_avg order by 1; """
+    qt_sql_min_4 """ select min(cast(v1 as decimalv3(39, 6))), min(cast(v2 as decimalv3(39, 6))) from test_aggregate_decimal256_avg order by 1; """
+
+    qt_sql_count_1 """ select k1, count(cast(v2 as decimalv3(39, 6))) from test_aggregate_decimal256_avg group by k1 order by 1, 2; """
+    qt_sql_count_2 """ select k1, count(cast(v1 as decimalv3(39, 6))), count(cast(v2 as decimalv3(39, 6))) from test_aggregate_decimal256_avg group by k1 order by 1, 2, 3; """
+    qt_sql_count_3 """ select count(cast(v1 as decimalv3(39, 6))), count(cast(v2 as decimalv3(39, 6))) from test_aggregate_decimal256_avg order by 1, 2; """
+
+    // qt_sql_distinct_count_1 """ select k1, count(distinct cast(v1 as decimalv3(39, 6))) from test_aggregate_decimal256_avg group by k1 order by 1, 2;"""
+    // qt_sql_distinct_count_2 """ select k1, count(distinct cast(v1 as decimalv3(39, 6))), count(distinct cast(v2 as decimalv3(39, 6))) from test_aggregate_decimal256_avg group by k1 order by 1, 2, 3;"""
+}
diff --git a/regression-test/suites/query_p0/join/test_join_decimal256.groovy b/regression-test/suites/query_p0/join/test_join_decimal256.groovy
new file mode 100644
index 0000000000..7f6768ee3b
--- /dev/null
+++ b/regression-test/suites/query_p0/join/test_join_decimal256.groovy
@@ -0,0 +1,97 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// The cases is copied from https://github.com/trinodb/trino/tree/master
+// /testing/trino-product-tests/src/main/resources/sql-tests/testcases/aggregate
+// and modified by Doris.
+
+suite("join_decimal256") {
+    sql "set enable_nereids_planner = true;"
+    sql "set enable_decimal256 = true;"
+    sql "drop table if exists test_join_decimal256_0;"
+    sql """ create table test_join_decimal256_0(k1 int, v1 decimal(38, 6), v2 decimal(38, 6))
+                DUPLICATE KEY(`k1`, `v1`, `v2`)
+                DISTRIBUTED BY HASH(`k1`) BUCKETS 10
+                properties("replication_num" = "1"); """
+    
+    sql """insert into test_join_decimal256_0 values 
+            (10, 10.000000, 99999999999999999999999999999999.999999), (10, 10.000000, 0.000001), (10, -10.000000, -0.000001),
+            (110, 110.000000, 99999999999999999999999999999999.999999), (110, 110.000000, 0.000001), (110, -110.000000, -0.000001);"""
+
+    sql "drop table if exists test_join_decimal256_1;"
+    sql """ create table test_join_decimal256_1(k1 int, v1 decimal(38, 6), v2 decimal(38, 6))
+                DUPLICATE KEY(`k1`, `v1`, `v2`)
+                DISTRIBUTED BY HASH(`k1`) BUCKETS 10
+                properties("replication_num" = "1"); """
+    
+    sql """insert into test_join_decimal256_1 values 
+            (11, 10.000000, 99999999999999999999999999999999.999999), (111, 10.000000, 99999999999999999999999999999999.999999),
+            (11, 10.000000, 0.000001), (111, 10.000000, 0.000001),
+            (11, -10.000000, -0.000001), (111, -10.000000, -0.000001),
+            (11, 110.000000, 99999999999999999999999999999999.999999),(111, 110.000000, 99999999999999999999999999999999.999999),
+            (11, 110.000000, 0.000001),(111, 110.000000, 0.000001),
+            (11, -110.000000, -0.000001), (111, -110.000000, -0.000001);"""
+    sql "sync"
+
+    qt_join_1 """
+        select
+                t0.v2_cast, t1.v2_cast, t0.k1, t0.v1, t1.k1, t1.v1
+        from
+                (
+                        select
+                                k1,
+                                v1,
+                                cast(v2 as decimal(76, 6)) v2_cast
+                        from
+                                test_join_decimal256_0
+                ) t0
+                inner join (
+                        select
+                                k1,
+                                v1,
+                                cast(v2 as decimal(76, 6)) v2_cast
+                        from
+                                test_join_decimal256_1
+                ) t1 on t0.v2_cast = t1.v2_cast
+        order by
+                1,2,3,4,5,6;
+    """
+
+    qt_join_2 """
+        select
+                t0.v1_cast, t0.v2_cast, t1.v1_cast, t1.v2_cast, t0.k1, t1.k1
+        from
+                (
+                        select
+                                k1,
+                                cast(v1 as decimal(76, 6)) v1_cast,
+                                cast(v2 as decimal(76, 6)) v2_cast
+                        from
+                                test_join_decimal256_0
+                ) t0
+                inner join (
+                        select
+                                k1,
+                                cast(v1 as decimal(76, 6)) v1_cast,
+                                cast(v2 as decimal(76, 6)) v2_cast
+                        from
+                                test_join_decimal256_1
+                ) t1 on t0.v1_cast = t1.v1_cast and t0.v2_cast = t1.v2_cast
+        order by
+                1,2,3,4,5,6;
+    """
+}
\ No newline at end of file
diff --git a/regression-test/suites/query_p0/join/test_runtime_filter_decimal256.groovy b/regression-test/suites/query_p0/join/test_runtime_filter_decimal256.groovy
new file mode 100644
index 0000000000..46d8a23982
--- /dev/null
+++ b/regression-test/suites/query_p0/join/test_runtime_filter_decimal256.groovy
@@ -0,0 +1,302 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_runtime_filter_decimal256", "query_p0") {
+    sql "set enable_nereids_planner = true;"
+    sql "set enable_decimal256 = true;"
+    sql "set parallel_fragment_exec_instance_num = 4;"
+
+    sql "drop table if exists test_runtime_filter_decimal256_0;"
+    sql """ create table test_runtime_filter_decimal256_0(k1 int, v1 decimal(38, 6), v2 decimal(38, 6))
+                DUPLICATE KEY(`k1`, `v1`, `v2`)
+                DISTRIBUTED BY HASH(`k1`) BUCKETS 10
+                properties("replication_num" = "1"); """
+    
+    sql """insert into test_runtime_filter_decimal256_0 values 
+            (10, 10.000000, 99999999999999999999999999999999.999999),
+            (10, 10.000000, 0.000001),
+            (10, -10.000000, -0.000001),
+            (110, 110.000000, 99999999999999999999999999999999.999999),
+            (110, 110.000000, 0.000001),
+            (110, -110.000000, -0.000001);"""
+
+    sql "drop table if exists test_runtime_filter_decimal256_1;"
+    sql """ create table test_runtime_filter_decimal256_1(k1 int, v1 decimal(38, 6), v2 decimal(38, 6))
+                DUPLICATE KEY(`k1`, `v1`, `v2`)
+                DISTRIBUTED BY HASH(`k1`) BUCKETS 10
+                properties("replication_num" = "1"); """
+    
+    sql """insert into test_runtime_filter_decimal256_1 values 
+            (11, 10.000000, 99999999999999999999999999999999.999999),
+            (111, 10.000000, 99999999999999999999999999999999.999999),
+            (11, 10.000000, 0.000001),
+            (111, 10.000000, 0.000001),
+            (11, -10.000000, -0.000001),
+            (111, -10.000000, -0.000001),
+            (11, 110.000000, 99999999999999999999999999999999.999999),
+            (111, 110.000000, 99999999999999999999999999999999.999999),
+            (11, 110.000000, 0.000001),
+            (111, 110.000000, 0.000001),
+            (11, -110.000000, -0.000001),
+            (111, -110.000000, -0.000001);"""
+    sql "sync"
+
+    sql """ set runtime_filter_type="IN"; """
+    qt_rf_in_1 """
+        select
+                t0.v2_cast, t1.v2_cast, t0.k1, t0.v1, t1.k1, t1.v1
+        from
+                (
+                        select
+                                k1,
+                                v1,
+                                cast(v2 as decimal(76, 6)) v2_cast
+                        from
+                                test_runtime_filter_decimal256_0
+                ) t0
+                inner join (
+                        select
+                                k1,
+                                v1,
+                                cast(v2 as decimal(76, 6)) v2_cast
+                        from
+                                test_runtime_filter_decimal256_1
+                ) t1 on t0.v2_cast = t1.v2_cast
+        order by
+                1,2,3,4,5,6;
+    """
+
+    qt_rf_in_2 """
+        select
+                t0.v1_cast, t0.v2_cast, t1.v1_cast, t1.v2_cast, t0.k1, t1.k1
+        from
+                (
+                        select
+                                k1,
+                                cast(v1 as decimal(76, 6)) v1_cast,
+                                cast(v2 as decimal(76, 6)) v2_cast
+                        from
+                                test_runtime_filter_decimal256_0
+                ) t0
+                inner join (
+                        select
+                                k1,
+                                cast(v1 as decimal(76, 6)) v1_cast,
+                                cast(v2 as decimal(76, 6)) v2_cast
+                        from
+                                test_runtime_filter_decimal256_1
+                ) t1 on t0.v1_cast = t1.v1_cast and t0.v2_cast = t1.v2_cast
+        order by
+                1,2,3,4,5,6;
+    """
+
+    sql """ set runtime_filter_type="BLOOM_FILTER"; """
+    qt_rf_bf_1 """
+        select
+                t0.v2_cast, t1.v2_cast, t0.k1, t0.v1, t1.k1, t1.v1
+        from
+                (
+                        select
+                                k1,
+                                v1,
+                                cast(v2 as decimal(76, 6)) v2_cast
+                        from
+                                test_runtime_filter_decimal256_0
+                ) t0
+                inner join (
+                        select
+                                k1,
+                                v1,
+                                cast(v2 as decimal(76, 6)) v2_cast
+                        from
+                                test_runtime_filter_decimal256_1
+                ) t1 on t0.v2_cast = t1.v2_cast
+        order by
+                1,2,3,4,5,6;
+    """
+
+    qt_rf_bf_2 """
+        select
+                t0.v1_cast, t0.v2_cast, t1.v1_cast, t1.v2_cast, t0.k1, t1.k1
+        from
+                (
+                        select
+                                k1,
+                                cast(v1 as decimal(76, 6)) v1_cast,
+                                cast(v2 as decimal(76, 6)) v2_cast
+                        from
+                                test_runtime_filter_decimal256_0
+                ) t0
+                inner join (
+                        select
+                                k1,
+                                cast(v1 as decimal(76, 6)) v1_cast,
+                                cast(v2 as decimal(76, 6)) v2_cast
+                        from
+                                test_runtime_filter_decimal256_1
+                ) t1 on t0.v1_cast = t1.v1_cast and t0.v2_cast = t1.v2_cast
+        order by
+                1,2,3,4,5,6;
+    """
+
+    sql """ set runtime_filter_type="MIN_MAX"; """
+    qt_rf_minmax_1 """
+        select
+                t0.v2_cast, t1.v2_cast, t0.k1, t0.v1, t1.k1, t1.v1
+        from
+                (
+                        select
+                                k1,
+                                v1,
+                                cast(v2 as decimal(76, 6)) v2_cast
+                        from
+                                test_runtime_filter_decimal256_0
+                ) t0
+                inner join (
+                        select
+                                k1,
+                                v1,
+                                cast(v2 as decimal(76, 6)) v2_cast
+                        from
+                                test_runtime_filter_decimal256_1
+                ) t1 on t0.v2_cast = t1.v2_cast
+        order by
+                1,2,3,4,5,6;
+    """
+
+    qt_rf_minmax_2 """
+        select
+                t0.v1_cast, t0.v2_cast, t1.v1_cast, t1.v2_cast, t0.k1, t1.k1
+        from
+                (
+                        select
+                                k1,
+                                cast(v1 as decimal(76, 6)) v1_cast,
+                                cast(v2 as decimal(76, 6)) v2_cast
+                        from
+                                test_runtime_filter_decimal256_0
+                ) t0
+                inner join (
+                        select
+                                k1,
+                                cast(v1 as decimal(76, 6)) v1_cast,
+                                cast(v2 as decimal(76, 6)) v2_cast
+                        from
+                                test_runtime_filter_decimal256_1
+                ) t1 on t0.v1_cast = t1.v1_cast and t0.v2_cast = t1.v2_cast
+        order by
+                1,2,3,4,5,6;
+    """
+
+    sql """ set runtime_filter_type="IN_OR_BLOOM_FILTER"; """
+    qt_rf_in_or_bf_1 """
+        select
+                t0.v2_cast, t1.v2_cast, t0.k1, t0.v1, t1.k1, t1.v1
+        from
+                (
+                        select
+                                k1,
+                                v1,
+                                cast(v2 as decimal(76, 6)) v2_cast
+                        from
+                                test_runtime_filter_decimal256_0
+                ) t0
+                inner join (
+                        select
+                                k1,
+                                v1,
+                                cast(v2 as decimal(76, 6)) v2_cast
+                        from
+                                test_runtime_filter_decimal256_1
+                ) t1 on t0.v2_cast = t1.v2_cast
+        order by
+                1,2,3,4,5,6;
+    """
+
+    qt_rf_in_or_bf_2 """
+        select
+                t0.v1_cast, t0.v2_cast, t1.v1_cast, t1.v2_cast, t0.k1, t1.k1
+        from
+                (
+                        select
+                                k1,
+                                cast(v1 as decimal(76, 6)) v1_cast,
+                                cast(v2 as decimal(76, 6)) v2_cast
+                        from
+                                test_runtime_filter_decimal256_0
+                ) t0
+                inner join (
+                        select
+                                k1,
+                                cast(v1 as decimal(76, 6)) v1_cast,
+                                cast(v2 as decimal(76, 6)) v2_cast
+                        from
+                                test_runtime_filter_decimal256_1
+                ) t1 on t0.v1_cast = t1.v1_cast and t0.v2_cast = t1.v2_cast
+        order by
+                1,2,3,4,5,6;
+    """
+
+    sql """ set runtime_filter_type="BITMAP_FILTER"; """
+    qt_rf_bitmap_1 """
+        select
+                t0.v2_cast, t1.v2_cast, t0.k1, t0.v1, t1.k1, t1.v1
+        from
+                (
+                        select
+                                k1,
+                                v1,
+                                cast(v2 as decimal(76, 6)) v2_cast
+                        from
+                                test_runtime_filter_decimal256_0
+                ) t0
+                inner join (
+                        select
+                                k1,
+                                v1,
+                                cast(v2 as decimal(76, 6)) v2_cast
+                        from
+                                test_runtime_filter_decimal256_1
+                ) t1 on t0.v2_cast = t1.v2_cast
+        order by
+                1,2,3,4,5,6;
+    """
+
+    qt_rf_bitmap_2 """
+        select
+                t0.v1_cast, t0.v2_cast, t1.v1_cast, t1.v2_cast, t0.k1, t1.k1
+        from
+                (
+                        select
+                                k1,
+                                cast(v1 as decimal(76, 6)) v1_cast,
+                                cast(v2 as decimal(76, 6)) v2_cast
+                        from
+                                test_runtime_filter_decimal256_0
+                ) t0
+                inner join (
+                        select
+                                k1,
+                                cast(v1 as decimal(76, 6)) v1_cast,
+                                cast(v2 as decimal(76, 6)) v2_cast
+                        from
+                                test_runtime_filter_decimal256_1
+                ) t1 on t0.v1_cast = t1.v1_cast and t0.v2_cast = t1.v2_cast
+        order by
+                1,2,3,4,5,6;
+    """
+}
\ No newline at end of file