From 384fb211f79cc0458f9cf96d4ff7742eed50414d Mon Sep 17 00:00:00 2001 From: Liu Gexian <1019630230@qq.com> Date: Wed, 12 Nov 2025 15:08:53 +0800 Subject: [PATCH] expression: support more locales for FORMAT() (#64316) ref pingcap/tidb#56167 --- pkg/expression/builtin_string.go | 10 +- pkg/expression/builtin_string_test.go | 124 +++++++++- pkg/expression/builtin_string_vec.go | 42 ++-- pkg/parser/mysql/const.go | 18 -- pkg/parser/mysql/locale_format.go | 232 ++++++++++++++++-- .../r/expression/builtin.result | 4 +- 6 files changed, 360 insertions(+), 70 deletions(-) diff --git a/pkg/expression/builtin_string.go b/pkg/expression/builtin_string.go index 154b8c9ec2..a7f947a077 100644 --- a/pkg/expression/builtin_string.go +++ b/pkg/expression/builtin_string.go @@ -3608,11 +3608,13 @@ func (b *builtinFormatWithLocaleSig) evalString(ctx EvalContext, row chunk.Row) tc := typeCtx(ctx) if isNull { tc.AppendWarning(errUnknownLocale.FastGenByArgs("NULL")) - } else if !strings.EqualFold(locale, "en_US") { // TODO: support other locales. + locale = "en_US" + } + formatString, found, err := mysql.FormatByLocale(x, d, locale) + // If locale was not NULL and not found, warn unknown locale. + if !isNull && !found { tc.AppendWarning(errUnknownLocale.FastGenByArgs(locale)) } - locale = "en_US" - formatString, err := mysql.GetLocaleFormatFunction(locale)(x, d) return formatString, false, err } @@ -3637,7 +3639,7 @@ func (b *builtinFormatSig) evalString(ctx EvalContext, row chunk.Row) (string, b if isNull || err != nil { return "", isNull, err } - formatString, err := mysql.GetLocaleFormatFunction("en_US")(x, d) + formatString, _, err := mysql.FormatByLocale(x, d, "en_US") return formatString, false, err } diff --git a/pkg/expression/builtin_string_test.go b/pkg/expression/builtin_string_test.go index e6ac5267d7..a88ff3ee8b 100644 --- a/pkg/expression/builtin_string_test.go +++ b/pkg/expression/builtin_string_test.go @@ -2039,8 +2039,8 @@ func TestFormat(t *testing.T) { require.NoError(t, err) testutil.DatumEqual(t, types.NewDatum(formatTests4.ret), r4) warnings := ctx.GetSessionVars().StmtCtx.GetWarnings() - require.Equal(t, 3, len(warnings)) - for i := range 3 { + require.Equal(t, 2, len(warnings)) + for i := range 2 { require.True(t, terror.ErrorEqual(errUnknownLocale, warnings[i].Err)) } ctx.GetSessionVars().StmtCtx.SetWarnings([]contextutil.SQLWarn{}) @@ -2763,3 +2763,123 @@ func TestCIWeightString(t *testing.T) { checkResult("utf8mb4_unicode_ci", unicodeTests) checkResult("utf8mb4_0900_ai_ci", unicode0900Tests) } + +// TestFormatWithLocale tests the 3-argument version of FORMAT(X, D, locale) +// with various locales and number formats. +func TestFormatWithLocale(t *testing.T) { + ctx := createContext(t) + fc := funcs[ast.Format] + + tests := []struct { + number any + precision any + locale any // Use 'any' to test NULL locale + ret any // Expected result + warning bool // True if we expect an 'Unknown locale' warning + desc string + }{ + // --- Style: CommaDot (123,456.78) --- + // This is the default fallback for most unhandled locales. + {1234567.89, 2, "en_US", "1,234,567.89", false, "CommaDot (en_US) - standard"}, + {-98765.432, 2, "zh_CN", "-98,765.43", false, "CommaDot (zh_CN) - negative, rounding"}, + {0.01, 4, "ja_JP", "0.0100", false, "CommaDot (ja_JP) - decimal padding"}, + {12345, 0, "en_GB", "12,345", false, "CommaDot (en_GB) - no decimal part"}, + {1.2, 2, "ko_KR", "1.20", false, "CommaDot (ko_KR) - extra locale"}, + {500.5, 1, "th_TH", "500.5", false, "CommaDot (th_TH) - extra locale"}, + {7777, 0, "en_AU", "7,777", false, "CommaDot (en_AU) - extra locale"}, + {-88.88, 2, "zh_TW", "-88.88", false, "CommaDot (zh_TW) - extra locale"}, + // Fallback locales that MySQL treats as en_US + {9876543.21, 1, "es_MX", "9,876,543.2", false, "CommaDot (es_MX) - MySQL fallback"}, + {3000.14, 2, "ce_RU", "3,000.14", false, "CommaDot (ce_RU) - MySQL fallback"}, + {4000.1, 1, "ky_KG", "4,000.1", false, "CommaDot (ky_KG) - MySQL fallback"}, + {200, 2, "aa_DJ", "200.00", false, "CommaDot (aa_DJ) - MySQL fallback"}, + {7890123.456, 2, "ps_AF", "7,890,123.46", false, "CommaDot (ps_AF) - MySQL fallback"}, + {12345.67, 2, "an_ES", "12,345.67", false, "CommaDot (an_ES) - MySQL fallback"}, + {12345.67, 2, "az_AZ", "12,345.67", false, "CommaDot (az_AZ) - MySQL fallback"}, + {12345.67, 2, "br_FR", "12,345.67", false, "CommaDot (br_FR) - MySQL fallback"}, + {3000.14, 2, "kv_RU", "3,000.14", false, "CommaDot (kv_RU) - MySQL fallback"}, + {12345.67, 3, "su_ID", "12,345.670", false, "CommaDot (su_ID) - MySQL fallback"}, + + // --- Style: DotComma (123.456,78) --- + {7654321.98, 2, "de_DE", "7.654.321,98", false, "DotComma (de_DE) - large number"}, + {-9.999, 2, "es_ES", "-10,00", false, "DotComma (es_ES) - negative, rounding up to 10"}, + {"-123.45", 1, "id_ID", "-123,5", false, "DotComma (id_ID) - string input"}, + {99, 1, "vi_VN", "99,0", false, "DotComma (vi_VN) - extra locale"}, + {8888.8, 0, "ro_RO", "8.889", false, "DotComma (ro_RO) - extra locale, rounding"}, + {1234.567, 2, "da_DK", "1.234,57", false, "DotComma (da_DK) - extra locale, rounding"}, + {555.55, 1, "tr_TR", "555,6", false, "DotComma (tr_TR) - extra locale, rounding"}, + {1234.56, 2, "nb_NO", "1.234,56", false, "DotComma (nb_NO) - MySQL behavior"}, + {1234.56, 2, "uk_UA", "1.234,56", false, "DotComma (uk_UA) - MySQL behavior"}, + {12345.67, 3, "no_NO", "12.345,670", false, "DotComma (no_NO) - MySQL behavior"}, + + // --- Style: SpaceComma (123 456,78) --- + {-0.88, 1, "ru_RU", "-0,9", false, "SpaceComma (ru_RU) - negative, rounding"}, + {98765, 0, "sv_SE", "98 765", false, "SpaceComma (sv_SE) - no decimal part"}, + {2000, 2, "cs_CZ", "2 000,00", false, "SpaceComma (cs_CZ) - extra locale, padding"}, + + // --- Style: NoneComma (123456,78) --- + {-2.23, 1, "el_GR", "-2,2", false, "NoneComma (el_GR) - negative, rounding"}, + {44.44, 1, "pt_PT", "44,4", false, "NoneComma (pt_PT) - extra locale"}, + {12345, 0, "it_IT", "12345", false, "NoneComma (it_IT) - MySQL behavior"}, + {100.5, 3, "pt_BR", "100,500", false, "NoneComma (pt_BR) - MySQL behavior"}, + {500000.1, 2, "fr_FR", "500000,10", false, "NoneComma (fr_FR) - MySQL behavior"}, + {1999.9, 0, "pl_PL", "2000", false, "NoneComma (pl_PL) - MySQL behavior"}, + {123, 2, "fr_CH", "123,00", false, "NoneComma (fr_CH) - MySQL behavior"}, + {12345, 0, "de_AT", "12345", false, "NoneComma (de_AT) - MySQL behavior"}, + {1000000, 2, "bg_BG", "1000000,00", false, "NoneComma (bg_BG) - MySQL behavior"}, + + // --- Style: AposDot (123'456.78) --- + {4567890.123, 2, "de_CH", "4'567'890.12", false, "AposDot (de_CH) - large number"}, + + // --- Style: AposComma (123'456,78) --- + {4567890.123, 2, "it_CH", "4'567'890,12", false, "AposComma (it_CH) - MySQL behavior"}, + + // --- Style: NoneDot (123456.78) --- + {1000000.5, 0, "ar_SA", "1000001", false, "NoneDot (ar_SA) - no grouping, rounding"}, + {12345.6, 1, "sr_RS", "12345.6", false, "NoneDot (sr_RS) - MySQL behavior"}, + + // --- Style: Indian (1,23,45,67,890.123) --- + {1234567890.123, 3, "en_IN", "1,23,45,67,890.123", false, "Indian (en_IN) - lakh/crore grouping"}, + {987654321, 0, "ta_IN", "98,76,54,321", false, "Indian (ta_IN) - no decimal"}, + {-5000.5, 1, "te_IN", "-5,000.5", false, "Indian (te_IN) - only one separator"}, + + // --- Special Cases (Case, NULL, Invalid) --- + {12345.67, 2, "dE_dE", "12.345,67", false, "DotComma (de_DE) - case insensitive"}, + {12345.67, 2, "en_us", "12,345.67", false, "CommaDot (en_US) - case insensitive"}, + + // Test NULL locale: should fallback to en_US and produce a warning + {12345.67, 2, nil, "12,345.67", true, "NULL locale fallback"}, + + // Test an invalid/unmapped locale + // Should fallback to en_US (styleCommaDot) and issue a warning. + {12345.67, 2, "de_GE", "12,345.67", true, "Invalid locale 'de_GE' fallback"}, + {12345.67, 2, "non_existent", "12,345.67", true, "Invalid locale 'non_existent' fallback"}, + } + + for _, tt := range tests { + // Clear warnings for each test run + ctx.GetSessionVars().StmtCtx.SetWarnings(nil) + + // Get function signature + f, err := fc.getFunction(ctx, datumsToConstants(types.MakeDatums(tt.number, tt.precision, tt.locale))) + require.NoError(t, err, "test: %s", tt.desc) + require.NotNil(t, f, "test: %s", tt.desc) + + // Evaluate + r, err := evalBuiltinFunc(f, ctx, chunk.Row{}) + require.NoError(t, err, "test: %s", tt.desc) + + // Check result + testutil.DatumEqual(t, types.NewDatum(tt.ret), r, "test: %s", tt.desc) + + // Check warnings + warnings := ctx.GetSessionVars().StmtCtx.GetWarnings() + if tt.warning { + require.Len(t, warnings, 1, "test: %s", tt.desc) + // Check if it's the 'Unknown locale' warning + require.True(t, terror.ErrorEqual(errUnknownLocale, warnings[0].Err), "test: %s", tt.desc) + } else { + require.Len(t, warnings, 0, "test: %s", tt.desc) + } + } +} diff --git a/pkg/expression/builtin_string_vec.go b/pkg/expression/builtin_string_vec.go index 7691c29bbc..c90e77090d 100644 --- a/pkg/expression/builtin_string_vec.go +++ b/pkg/expression/builtin_string_vec.go @@ -2989,30 +2989,34 @@ func formatDecimal(ctx EvalContext, xBuf *chunk.Column, dInt64s []int64, result } locale := "en_US" + isNull := false if localeBuf == nil { // FORMAT(x, d) } else if localeBuf.IsNull(i) { // FORMAT(x, d, NULL) + isNull = true tc := typeCtx(ctx) tc.AppendWarning(errUnknownLocale.FastGenByArgs("NULL")) - } else if !strings.EqualFold(localeBuf.GetString(i), "en_US") { - // TODO: support other locales. - tc := typeCtx(ctx) - + } else { // force copy of the string // https://github.com/pingcap/tidb/issues/56193 - locale := strings.Clone(localeBuf.GetString(i)) - tc.AppendWarning(errUnknownLocale.FastGenByArgs(locale)) + locale = strings.Clone(localeBuf.GetString(i)) } xStr := roundFormatArgs(x.String(), int(d)) dStr := strconv.FormatInt(d, 10) - localeFormatFunction := mysql.GetLocaleFormatFunction(locale) - - formatString, err := localeFormatFunction(xStr, dStr) + formatString, found, err := mysql.FormatByLocale(xStr, dStr, locale) if err != nil { return err } + // Check 'found' flag, only warn for unknown locales + if !isNull && !found { + tc := typeCtx(ctx) + if localeBuf != nil { + locale = strings.Clone(localeBuf.GetString(i)) + } + tc.AppendWarning(errUnknownLocale.FastGenByArgs(locale)) + } result.AppendString(formatString) } return nil @@ -3035,30 +3039,34 @@ func formatReal(ctx EvalContext, xBuf *chunk.Column, dInt64s []int64, result *ch } locale := "en_US" + isNull := false if localeBuf == nil { // FORMAT(x, d) } else if localeBuf.IsNull(i) { // FORMAT(x, d, NULL) + isNull = true tc := typeCtx(ctx) tc.AppendWarning(errUnknownLocale.FastGenByArgs("NULL")) - } else if !strings.EqualFold(localeBuf.GetString(i), "en_US") { - // TODO: support other locales. - tc := typeCtx(ctx) - + } else { // force copy of the string // https://github.com/pingcap/tidb/issues/56193 - locale := strings.Clone(localeBuf.GetString(i)) - tc.AppendWarning(errUnknownLocale.FastGenByArgs(locale)) + locale = strings.Clone(localeBuf.GetString(i)) } xStr := roundFormatArgs(strconv.FormatFloat(x, 'f', -1, 64), int(d)) dStr := strconv.FormatInt(d, 10) - localeFormatFunction := mysql.GetLocaleFormatFunction(locale) - formatString, err := localeFormatFunction(xStr, dStr) + formatString, found, err := mysql.FormatByLocale(xStr, dStr, locale) if err != nil { return err } + if !isNull && !found { + tc := typeCtx(ctx) + if localeBuf != nil { + locale = strings.Clone(localeBuf.GetString(i)) + } + tc.AppendWarning(errUnknownLocale.FastGenByArgs(locale)) + } result.AppendString(formatString) } return nil diff --git a/pkg/parser/mysql/const.go b/pkg/parser/mysql/const.go index 38daff29be..54746ac923 100644 --- a/pkg/parser/mysql/const.go +++ b/pkg/parser/mysql/const.go @@ -580,24 +580,6 @@ var CombinationSQLMode = map[string][]string{ "TRADITIONAL": {"STRICT_TRANS_TABLES", "STRICT_ALL_TABLES", "NO_ZERO_IN_DATE", "NO_ZERO_DATE", "ERROR_FOR_DIVISION_BY_ZERO", "NO_AUTO_CREATE_USER", "NO_ENGINE_SUBSTITUTION"}, } -// FormatFunc is the locale format function signature. -type FormatFunc func(string, string) (string, error) - -// GetLocaleFormatFunction get the format function for sepcific locale. -func GetLocaleFormatFunction(loc string) FormatFunc { - locale, exist := locale2FormatFunction[loc] - if !exist { - return formatNotSupport - } - return locale -} - -// locale2FormatFunction is the string represent of locale format function. -var locale2FormatFunction = map[string]FormatFunc{ - "en_US": formatENUS, - "zh_CN": formatZHCN, -} - // PriorityEnum is defined for Priority const values. type PriorityEnum int diff --git a/pkg/parser/mysql/locale_format.go b/pkg/parser/mysql/locale_format.go index 9d92c2a465..674b29739e 100644 --- a/pkg/parser/mysql/locale_format.go +++ b/pkg/parser/mysql/locale_format.go @@ -5,11 +5,195 @@ import ( "strconv" "strings" "unicode" - - "github.com/pingcap/errors" ) -func formatENUS(number string, precision string) (string, error) { +// LocaleFormatStyle defines the rules for number formatting. +type LocaleFormatStyle struct { + ThousandsSep string // Thousands separator + DecimalPoint string // Decimal point + IsIndianGrouping bool // Special grouping for en_IN, etc. (3,2,2,...) +} + +// Formatting style IDs (descriptive names) +const ( + styleCommaDot = "CommaDot" // 123,456.78 (en_US) + styleDotComma = "DotComma" // 123.456,78 (de_DE) + styleSpaceComma = "SpaceComma" // 123 456,78 (fr_FR) + styleNoneComma = "NoneComma" // 123456,78 (bg_BG) + styleAposDot = "AposDot" // 123'456.78 (de_CH) + styleAposComma = "AposComma" // 123'456,78 (it_CH) + styleNoneDot = "NoneDot" // 123456.78 (ar_SA) + styleIndian = "Indian" // 1,23,45,67,890.123 (en_IN) +) + +// formatStyleMap maps a style ID to its separator definitions. +var formatStyleMap = map[string]LocaleFormatStyle{ + // IsIndianGrouping is false by default + styleCommaDot: {ThousandsSep: ",", DecimalPoint: "."}, + styleDotComma: {ThousandsSep: ".", DecimalPoint: ","}, + styleSpaceComma: {ThousandsSep: " ", DecimalPoint: ","}, + styleNoneComma: {ThousandsSep: "", DecimalPoint: ","}, + styleAposDot: {ThousandsSep: "'", DecimalPoint: "."}, + styleAposComma: {ThousandsSep: "'", DecimalPoint: ","}, + styleNoneDot: {ThousandsSep: "", DecimalPoint: "."}, + styleIndian: {ThousandsSep: ",", DecimalPoint: ".", IsIndianGrouping: true}, +} + +// localeToStyleMap maps locale names (lowercase) to their corresponding format style ID. +var localeToStyleMap = map[string]string{ + // styleCommaDot (123,456.78): Default/fallback format (e.g., en_US) or where MySQL behavior matches. + "aa_et": styleCommaDot, "af_za": styleCommaDot, "ak_gh": styleCommaDot, "am_et": styleCommaDot, "ar_ae": styleCommaDot, "ar_bh": styleCommaDot, + "ar_dz": styleCommaDot, "ar_eg": styleCommaDot, "ar_in": styleCommaDot, "ar_iq": styleCommaDot, "ar_jo": styleCommaDot, "ar_kw": styleCommaDot, + "ar_lb": styleCommaDot, "ar_ly": styleCommaDot, "ar_ma": styleCommaDot, "ar_om": styleCommaDot, "ar_qa": styleCommaDot, "ar_sd": styleCommaDot, + "ar_ss": styleCommaDot, "ar_sy": styleCommaDot, "ar_tn": styleCommaDot, "ar_ye": styleCommaDot, "az_ir": styleCommaDot, "bi_vu": styleCommaDot, + "bo_cn": styleCommaDot, "bo_in": styleCommaDot, "cy_gb": styleCommaDot, "dv_mv": styleCommaDot, "en_ag": styleCommaDot, "en_au": styleCommaDot, + "en_bw": styleCommaDot, "en_ca": styleCommaDot, "en_gb": styleCommaDot, "en_hk": styleCommaDot, "en_ie": styleCommaDot, "en_il": styleCommaDot, + "en_ng": styleCommaDot, "en_nz": styleCommaDot, "en_ph": styleCommaDot, "en_sg": styleCommaDot, "en_us": styleCommaDot, "en_za": styleCommaDot, + "en_zm": styleCommaDot, "en_zw": styleCommaDot, "es_do": styleCommaDot, "es_gt": styleCommaDot, "es_hn": styleCommaDot, "es_ni": styleCommaDot, + "es_pa": styleCommaDot, "es_pr": styleCommaDot, "es_sv": styleCommaDot, "es_us": styleCommaDot, "fa_ir": styleCommaDot, "ga_ie": styleCommaDot, + "gd_gb": styleCommaDot, "gu_in": styleCommaDot, "gv_gb": styleCommaDot, "ha_ng": styleCommaDot, "he_il": styleCommaDot, "hi_in": styleCommaDot, + "hy_am": styleCommaDot, "ig_ng": styleCommaDot, "ik_ca": styleCommaDot, "iu_ca": styleCommaDot, "ja_jp": styleCommaDot, "km_kh": styleCommaDot, + "kn_in": styleCommaDot, "ko_kr": styleCommaDot, "ks_in": styleCommaDot, "kw_gb": styleCommaDot, "lg_ug": styleCommaDot, "lo_la": styleCommaDot, + "mi_nz": styleCommaDot, "mr_in": styleCommaDot, "ms_my": styleCommaDot, "mt_mt": styleCommaDot, "my_mm": styleCommaDot, "ne_np": styleCommaDot, + "nr_za": styleCommaDot, "om_et": styleCommaDot, "om_ke": styleCommaDot, "pa_in": styleCommaDot, "pa_pk": styleCommaDot, "sa_in": styleCommaDot, + "sd_in": styleCommaDot, "si_lk": styleCommaDot, "sm_ws": styleCommaDot, "so_et": styleCommaDot, "so_ke": styleCommaDot, "so_so": styleCommaDot, + "ss_za": styleCommaDot, "st_za": styleCommaDot, "sw_ke": styleCommaDot, "sw_tz": styleCommaDot, "th_th": styleCommaDot, "ti_et": styleCommaDot, + "tk_tm": styleCommaDot, "tl_ph": styleCommaDot, "tn_za": styleCommaDot, "to_to": styleCommaDot, "ts_za": styleCommaDot, "ug_cn": styleCommaDot, + "ur_in": styleCommaDot, "ur_pk": styleCommaDot, "ve_za": styleCommaDot, "xh_za": styleCommaDot, "yi_us": styleCommaDot, "yo_ng": styleCommaDot, + "zh_cn": styleCommaDot, "zh_hk": styleCommaDot, "zh_sg": styleCommaDot, "zh_tw": styleCommaDot, "zu_za": styleCommaDot, + "an_es": styleCommaDot, "az_az": styleCommaDot, "ca_ad": styleCommaDot, "ca_fr": styleCommaDot, "ca_it": styleCommaDot, "de_it": styleCommaDot, + "en_dk": styleCommaDot, "es_pe": styleCommaDot, "ff_sn": styleCommaDot, "fy_de": styleCommaDot, "fy_nl": styleCommaDot, "ka_ge": styleCommaDot, + "kl_gl": styleCommaDot, "ku_tr": styleCommaDot, "lb_lu": styleCommaDot, "li_be": styleCommaDot, "li_nl": styleCommaDot, "nl_aw": styleCommaDot, + "sc_it": styleCommaDot, "se_no": styleCommaDot, "sq_mk": styleCommaDot, "tg_tj": styleCommaDot, "tr_cy": styleCommaDot, "wa_be": styleCommaDot, + "br_fr": styleCommaDot, "kk_kz": styleCommaDot, "nn_no": styleCommaDot, "oc_fr": styleCommaDot, "uz_uz": styleCommaDot, + "bs_ba": styleCommaDot, "el_cy": styleCommaDot, "es_cu": styleCommaDot, "ln_cd": styleCommaDot, "mg_mg": styleCommaDot, "rw_rw": styleCommaDot, "sr_me": styleCommaDot, "wo_sn": styleCommaDot, + "es_mx": styleCommaDot, + "ce_ru": styleCommaDot, "cv_ru": styleCommaDot, "ht_ht": styleCommaDot, "ia_fr": styleCommaDot, "ky_kg": styleCommaDot, "os_ru": styleCommaDot, "tt_ru": styleCommaDot, + "aa_dj": styleCommaDot, "aa_er": styleCommaDot, "so_dj": styleCommaDot, "ti_er": styleCommaDot, + "ps_af": styleCommaDot, + "kv_ru": styleCommaDot, + "su_id": styleCommaDot, + + // styleDotComma (123.456,78): Common in Europe and South America. + "be_by": styleDotComma, "da_dk": styleDotComma, "de_be": styleDotComma, "de_de": styleDotComma, "de_lu": styleDotComma, + "es_ar": styleDotComma, "es_bo": styleDotComma, "es_cl": styleDotComma, "es_co": styleDotComma, + "es_ec": styleDotComma, "es_es": styleDotComma, "es_py": styleDotComma, "es_uy": styleDotComma, "es_ve": styleDotComma, + "fo_fo": styleDotComma, "hu_hu": styleDotComma, "id_id": styleDotComma, "is_is": styleDotComma, + "lt_lt": styleDotComma, "mn_mn": styleDotComma, "ro_ro": styleDotComma, "ru_ua": styleDotComma, "sq_al": styleDotComma, + "tr_tr": styleDotComma, "vi_vn": styleDotComma, + "nb_no": styleDotComma, "uk_ua": styleDotComma, + "no_no": styleDotComma, + + // styleSpaceComma (123 456,78): Uses space as thousands separator. + "cs_cz": styleSpaceComma, "es_cr": styleSpaceComma, "et_ee": styleSpaceComma, "fi_fi": styleSpaceComma, + "lv_lv": styleSpaceComma, "mk_mk": styleSpaceComma, + "ru_ru": styleSpaceComma, "sk_sk": styleSpaceComma, "sv_fi": styleSpaceComma, "sv_se": styleSpaceComma, + + // styleNoneComma (123456,78): No thousands separator. + "el_gr": styleNoneComma, "gl_es": styleNoneComma, "pt_pt": styleNoneComma, "sl_si": styleNoneComma, + "ca_es": styleNoneComma, "de_at": styleNoneComma, "eu_es": styleNoneComma, "fr_be": styleNoneComma, "hr_hr": styleNoneComma, "it_it": styleNoneComma, "nl_be": styleNoneComma, "nl_nl": styleNoneComma, "pt_br": styleNoneComma, + "fr_ca": styleNoneComma, "fr_fr": styleNoneComma, "fr_lu": styleNoneComma, "pl_pl": styleNoneComma, + "fr_ch": styleNoneComma, + "bg_bg": styleNoneComma, + + // styleAposDot (123'456.78): Uses apostrophe as thousands separator. + "de_ch": styleAposDot, + + // styleAposComma (123'456,78): Uses apostrophe separator, comma decimal. + "it_ch": styleAposComma, + + // styleNoneDot (123456.78): No thousands separator, dot decimal. + "ar_sa": styleNoneDot, + "sr_rs": styleNoneDot, + + // styleIndian (1,23,45,67,890.123): Special Indian grouping (3,2,2,...). + "en_in": styleIndian, + "ta_in": styleIndian, + "te_in": styleIndian, +} + +// GetLocaleFormatStyle returns the formatting rules and a bool indicating if the locale was found. +func GetLocaleFormatStyle(locale string) (LocaleFormatStyle, bool) { + styleID, ok := localeToStyleMap[strings.ToLower(locale)] + if !ok { + // Not found. Return default style, but also return 'false'. + return formatStyleMap[styleCommaDot], false + } + // Found. Return style and 'true'. + return formatStyleMap[styleID], true +} + +// FormatByLocale returns (string, bool, error) +// The bool (found) is true if the locale was found in the map, false otherwise. +func FormatByLocale(number, precision, locale string) (string, bool, error) { + // 'locale' is guaranteed to be non-empty by the caller (builtin_string.go). + style, found := GetLocaleFormatStyle(locale) + // if not found, style is set to default (en_US) + formattedString, err := formatWithStyle(number, precision, style) + return formattedString, found, err +} + +// formatWithStandardGrouping applies standard 3-digit grouping (e.g., 1,234,567) +func formatWithStandardGrouping(integerPart string, thousandsSep string) string { + var buffer bytes.Buffer + partLen := len(integerPart) + // Find position of first separator + pos := partLen % 3 + if pos == 0 && partLen > 0 { + pos = 3 + } + + // Write the first group (1-3 digits) + buffer.WriteString(integerPart[:pos]) + + // Write subsequent 3-digit groups + for ; pos < partLen; pos += 3 { + buffer.WriteString(thousandsSep) + buffer.WriteString(integerPart[pos : pos+3]) + } + return buffer.String() +} + +// formatWithIndianGrouping applies Indian grouping (e.g., 1,23,45,67,890) +func formatWithIndianGrouping(integerPart string, thousandsSep string) string { + var buffer bytes.Buffer + s := integerPart + l := len(s) + if l <= 3 { + return s // No grouping needed + } + + // Get the rightmost 3 digits (e.g., 890) + rightmost3 := s[l-3:] + // Get the remaining digits on the left (e.g., 1234567) + remaining := s[:l-3] + remLen := len(remaining) + + // Get the first part (1 or 2 digits) (e.g., 1) + firstPartLen := remLen % 2 + if firstPartLen == 0 && remLen > 0 { + firstPartLen = 2 + } + + if firstPartLen > 0 { + buffer.WriteString(remaining[:firstPartLen]) + } + + // Loop through the remaining 2-digit groups (e.g., 23, 45, 67) + for pos := firstPartLen; pos < remLen; pos += 2 { + buffer.WriteString(thousandsSep) + buffer.WriteString(remaining[pos : pos+2]) + } + + // Add the last separator and the rightmost 3 digits + buffer.WriteString(thousandsSep) + buffer.WriteString(rightmost3) + + return buffer.String() +} + +// formatWithStyle is the generic formatting function. +func formatWithStyle(number string, precision string, style LocaleFormatStyle) (string, error) { var buffer bytes.Buffer if unicode.IsDigit(rune(precision[0])) { for i, v := range precision { @@ -30,15 +214,16 @@ func formatENUS(number string, precision string) (string, error) { if (number[:1] == "-" && !unicode.IsDigit(rune(number[1]))) || (!unicode.IsDigit(rune(number[0])) && number[:1] != "-") { - buffer.Write([]byte{'0'}) + buffer.WriteString("0") position, err := strconv.ParseUint(precision, 10, 64) if err == nil && position > 0 { - buffer.Write([]byte{'.'}) + // Use style-defined decimal point + buffer.WriteString(style.DecimalPoint) buffer.WriteString(strings.Repeat("0", int(position))) } return buffer.String(), nil } else if number[:1] == "-" { - buffer.Write([]byte{'-'}) + buffer.WriteString("-") number = number[1:] } @@ -54,24 +239,27 @@ func formatENUS(number string, precision string) (string, error) { break } - comma := []byte{','} parts := strings.Split(number, ".") - pos := 0 - if len(parts[0])%3 != 0 { - pos += len(parts[0]) % 3 - buffer.WriteString(parts[0][:pos]) - buffer.Write(comma) + integerPart := parts[0] + var formattedIntegerPart string + + // Apply grouping logic based on the locale style. + if len(style.ThousandsSep) == 0 { + // No separator (e.g., styleNoneComma), just use the integer part + formattedIntegerPart = integerPart + } else if style.IsIndianGrouping { + // Use 3,2,2... grouping for Indian locales. + formattedIntegerPart = formatWithIndianGrouping(integerPart, style.ThousandsSep) + } else { + // Use standard 3-digit grouping. + formattedIntegerPart = formatWithStandardGrouping(integerPart, style.ThousandsSep) } - for ; pos < len(parts[0]); pos += 3 { - buffer.WriteString(parts[0][pos : pos+3]) - buffer.Write(comma) - } - buffer.Truncate(buffer.Len() - 1) + buffer.WriteString(formattedIntegerPart) position, err := strconv.ParseUint(precision, 10, 64) if err == nil { if position > 0 { - buffer.Write([]byte{'.'}) + buffer.WriteString(style.DecimalPoint) // Use style-defined decimal point if len(parts) == 2 { if uint64(len(parts[1])) >= position { buffer.WriteString(parts[1][:position]) @@ -87,11 +275,3 @@ func formatENUS(number string, precision string) (string, error) { return buffer.String(), nil } - -func formatZHCN(_ string, _ string) (string, error) { - return "", errors.New("not implemented") -} - -func formatNotSupport(_ string, _ string) (string, error) { - return "", errors.New("not support for the specific locale") -} diff --git a/tests/integrationtest/r/expression/builtin.result b/tests/integrationtest/r/expression/builtin.result index 38500f943c..59c612eda1 100644 --- a/tests/integrationtest/r/expression/builtin.result +++ b/tests/integrationtest/r/expression/builtin.result @@ -2103,9 +2103,7 @@ format(NULL, 4) format(12332.2, NULL) NULL NULL select format(12332.2, 2,'es_EC'); format(12332.2, 2,'es_EC') -12,332.20 -Level Code Message -Warning 1649 Unknown locale: 'es_EC' +12.332,20 select field(1, 2, 1), field(1, 0, NULL), field(1, NULL, 2, 1), field(NULL, 1, 2, NULL); field(1, 2, 1) field(1, 0, NULL) field(1, NULL, 2, 1) field(NULL, 1, 2, NULL) 2 0 3 0