Files
loongoffice/sc/source/core/tool/stringutil.cxx
Caolán McNamara b38974391e Related: tdf#160056 do calc NumberFormatting via ScInterpreterContext
and for the duration of Threaded calculation where there will be
no new formats required we can drive number formatting with the
unlocked RO policy.

Change-Id: Ic0e449acdcf834bc569d13b4a984f13c55316801
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/165160
Tested-by: Jenkins
Reviewed-by: Caolán McNamara <caolan.mcnamara@collabora.com>
2024-03-23 01:14:13 +01:00

471 lines
14 KiB
C++

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This file incorporates work covered by the following license notice:
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
#include <interpretercontext.hxx>
#include <stringutil.hxx>
#include <svl/numformat.hxx>
#include <svl/zforlist.hxx>
#include <rtl/ustrbuf.hxx>
#include <rtl/strbuf.hxx>
#include <rtl/math.hxx>
ScSetStringParam::ScSetStringParam() :
mpNumFormatter(nullptr),
mbDetectNumberFormat(true),
mbDetectScientificNumberFormat(true),
meSetTextNumFormat(Never),
mbHandleApostrophe(true),
meStartListening(sc::SingleCellListening),
mbCheckLinkFormula(false)
{
}
void ScSetStringParam::setTextInput()
{
mbDetectNumberFormat = false;
mbDetectScientificNumberFormat = false;
mbHandleApostrophe = false;
meSetTextNumFormat = Always;
}
void ScSetStringParam::setNumericInput()
{
mbDetectNumberFormat = true;
mbDetectScientificNumberFormat = true;
mbHandleApostrophe = true;
meSetTextNumFormat = Never;
}
bool ScStringUtil::parseSimpleNumber(
const OUString& rStr, sal_Unicode dsep, sal_Unicode gsep, sal_Unicode dsepa, double& rVal, bool bDetectScientificNumber)
{
// Actually almost the entire pre-check is unnecessary and we could call
// rtl::math::stringToDouble() just after having exchanged ascii space with
// non-breaking space, if it wasn't for check of grouped digits. The NaN
// and Inf cases that are accepted by stringToDouble() could be detected
// using std::isfinite() on the result.
/* TODO: The grouped digits check isn't even valid for locales that do not
* group in thousands ... e.g. Indian locales. But that's something also
* the number scanner doesn't implement yet, only the formatter. */
OUStringBuffer aBuf;
sal_Int32 i = 0;
sal_Int32 n = rStr.getLength();
const sal_Unicode* p = rStr.getStr();
const sal_Unicode* pLast = p + (n-1);
sal_Int32 nPosDSep = -1, nPosGSep = -1;
sal_uInt32 nDigitCount = 0;
bool haveSeenDigit = false;
sal_Int32 nPosExponent = -1;
// Skip preceding spaces.
for (i = 0; i < n; ++i, ++p)
{
sal_Unicode c = *p;
if (c != 0x0020 && c != 0x00A0)
// first non-space character. Exit.
break;
}
if (i == n)
// the whole string is space. Fail.
return false;
n -= i; // Subtract the length of the preceding spaces.
// Determine the last non-space character.
for (; p != pLast; --pLast, --n)
{
sal_Unicode c = *pLast;
if (c != 0x0020 && c != 0x00A0)
// Non space character. Exit.
break;
}
for (i = 0; i < n; ++i, ++p)
{
sal_Unicode c = *p;
if (c == 0x0020 && gsep == 0x00A0)
// ascii space to unicode space if that is group separator
c = 0x00A0;
if ('0' <= c && c <= '9')
{
// this is a digit.
aBuf.append(c);
haveSeenDigit = true;
++nDigitCount;
}
else if (c == dsep || (dsepa && c == dsepa))
{
// this is a decimal separator.
if (nPosDSep >= 0)
// a second decimal separator -> not a valid number.
return false;
if (nPosGSep >= 0 && i - nPosGSep != 4)
// the number has a group separator and the decimal sep is not
// positioned correctly.
return false;
nPosDSep = i;
nPosGSep = -1;
aBuf.append(dsep); // append the separator that is parsed in stringToDouble() below
nDigitCount = 0;
}
else if (c == gsep)
{
// this is a group (thousand) separator.
if (!haveSeenDigit)
// not allowed before digits.
return false;
if (nPosDSep >= 0)
// not allowed after the decimal separator.
return false;
if (nPosGSep >= 0 && nDigitCount != 3)
// must be exactly 3 digits since the last group separator.
return false;
if (nPosExponent >= 0)
// not allowed in exponent.
return false;
nPosGSep = i;
nDigitCount = 0;
}
else if (c == '-' || c == '+')
{
// A sign must be the first character if it's given, or immediately
// follow the exponent character if present.
if (i == 0 || (nPosExponent >= 0 && i == nPosExponent + 1))
aBuf.append(c);
else
return false;
}
else if (c == 'E' || c == 'e')
{
// this is an exponent designator.
if (nPosExponent >= 0 || !bDetectScientificNumber)
// Only one exponent allowed.
return false;
if (nPosGSep >= 0 && nDigitCount != 3)
// must be exactly 3 digits since the last group separator.
return false;
aBuf.append(c);
nPosExponent = i;
nPosDSep = -1;
nPosGSep = -1;
nDigitCount = 0;
}
else
return false;
}
// finished parsing the number.
if (nPosGSep >= 0 && nDigitCount != 3)
// must be exactly 3 digits since the last group separator.
return false;
rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
sal_Int32 nParseEnd = 0;
rVal = ::rtl::math::stringToDouble( aBuf, dsep, gsep, &eStatus, &nParseEnd);
if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aBuf.getLength())
// Not a valid number or not entire string consumed.
return false;
return true;
}
bool ScStringUtil::parseSimpleNumber(
const char* p, size_t n, char dsep, char gsep, double& rVal)
{
// Actually almost the entire pre-check is unnecessary and we could call
// rtl::math::stringToDouble() just after having exchanged ascii space with
// non-breaking space, if it wasn't for check of grouped digits. The NaN
// and Inf cases that are accepted by stringToDouble() could be detected
// using std::isfinite() on the result.
/* TODO: The grouped digits check isn't even valid for locales that do not
* group in thousands ... e.g. Indian locales. But that's something also
* the number scanner doesn't implement yet, only the formatter. */
OStringBuffer aBuf;
size_t i = 0;
const char* pLast = p + (n-1);
sal_Int32 nPosDSep = -1, nPosGSep = -1;
sal_uInt32 nDigitCount = 0;
bool haveSeenDigit = false;
sal_Int32 nPosExponent = -1;
// Skip preceding spaces.
for (i = 0; i < n; ++i, ++p)
{
char c = *p;
if (c != ' ')
// first non-space character. Exit.
break;
}
if (i == n)
// the whole string is space. Fail.
return false;
n -= i; // Subtract the length of the preceding spaces.
// Determine the last non-space character.
for (; p != pLast; --pLast, --n)
{
char c = *pLast;
if (c != ' ')
// Non space character. Exit.
break;
}
for (i = 0; i < n; ++i, ++p)
{
char c = *p;
if ('0' <= c && c <= '9')
{
// this is a digit.
aBuf.append(c);
haveSeenDigit = true;
++nDigitCount;
}
else if (c == dsep)
{
// this is a decimal separator.
if (nPosDSep >= 0)
// a second decimal separator -> not a valid number.
return false;
if (nPosGSep >= 0 && i - nPosGSep != 4)
// the number has a group separator and the decimal sep is not
// positioned correctly.
return false;
nPosDSep = i;
nPosGSep = -1;
aBuf.append(c);
nDigitCount = 0;
}
else if (c == gsep)
{
// this is a group (thousand) separator.
if (!haveSeenDigit)
// not allowed before digits.
return false;
if (nPosDSep >= 0)
// not allowed after the decimal separator.
return false;
if (nPosGSep >= 0 && nDigitCount != 3)
// must be exactly 3 digits since the last group separator.
return false;
if (nPosExponent >= 0)
// not allowed in exponent.
return false;
nPosGSep = i;
nDigitCount = 0;
}
else if (c == '-' || c == '+')
{
// A sign must be the first character if it's given, or immediately
// follow the exponent character if present.
if (i == 0 || (nPosExponent >= 0 && i == static_cast<size_t>(nPosExponent+1)))
aBuf.append(c);
else
return false;
}
else if (c == 'E' || c == 'e')
{
// this is an exponent designator.
if (nPosExponent >= 0)
// Only one exponent allowed.
return false;
if (nPosGSep >= 0 && nDigitCount != 3)
// must be exactly 3 digits since the last group separator.
return false;
aBuf.append(c);
nPosExponent = i;
nPosDSep = -1;
nPosGSep = -1;
nDigitCount = 0;
}
else
return false;
}
// finished parsing the number.
if (nPosGSep >= 0 && nDigitCount != 3)
// must be exactly 3 digits since the last group separator.
return false;
rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
sal_Int32 nParseEnd = 0;
rVal = ::rtl::math::stringToDouble( aBuf, dsep, gsep, &eStatus, &nParseEnd);
if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aBuf.getLength())
// Not a valid number or not entire string consumed.
return false;
return true;
}
OUString ScStringUtil::GetQuotedToken(const OUString &rIn, sal_Int32 nToken, const OUString& rQuotedPairs,
sal_Unicode cTok, sal_Int32& rIndex )
{
assert( !(rQuotedPairs.getLength()%2) );
assert( rQuotedPairs.indexOf(cTok) == -1 );
const sal_Unicode* pStr = rIn.getStr();
const sal_Unicode* pQuotedStr = rQuotedPairs.getStr();
sal_Unicode cQuotedEndChar = 0;
sal_Int32 nQuotedLen = rQuotedPairs.getLength();
sal_Int32 nLen = rIn.getLength();
sal_Int32 nTok = 0;
sal_Int32 nFirstChar = rIndex;
sal_Int32 i = nFirstChar;
// detect token position and length
pStr += i;
while ( i < nLen )
{
sal_Unicode c = *pStr;
if ( cQuotedEndChar )
{
// end of the quote reached ?
if ( c == cQuotedEndChar )
cQuotedEndChar = 0;
}
else
{
// Is the char a quote-begin char ?
sal_Int32 nQuoteIndex = 0;
while ( nQuoteIndex < nQuotedLen )
{
if ( pQuotedStr[nQuoteIndex] == c )
{
cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
break;
}
else
nQuoteIndex += 2;
}
// If the token-char matches then increase TokCount
if ( c == cTok )
{
++nTok;
if ( nTok == nToken )
nFirstChar = i+1;
else
{
if ( nTok > nToken )
break;
}
}
}
++pStr;
++i;
}
if ( nTok >= nToken )
{
if ( i < nLen )
rIndex = i+1;
else
rIndex = -1;
return rIn.copy( nFirstChar, i-nFirstChar );
}
else
{
rIndex = -1;
return OUString();
}
}
bool ScStringUtil::isMultiline( std::u16string_view rStr )
{
return rStr.find_first_of(u"\n\r") != std::u16string_view::npos;
}
ScInputStringType ScStringUtil::parseInputString(
ScInterpreterContext& rContext, const OUString& rStr, LanguageType eLang )
{
ScInputStringType aRet;
aRet.mnFormatType = SvNumFormatType::ALL;
aRet.meType = ScInputStringType::Unknown;
aRet.maText = rStr;
aRet.mfValue = 0.0;
if (rStr.getLength() > 1 && rStr[0] == '=')
{
aRet.meType = ScInputStringType::Formula;
}
else if (rStr.getLength() > 1 && rStr[0] == '\'')
{
// for bEnglish, "'" at the beginning is always interpreted as text
// marker and stripped
aRet.maText = rStr.copy(1);
aRet.meType = ScInputStringType::Text;
}
else // test for English number format (only)
{
sal_uInt32 nNumFormat = rContext.NFGetStandardIndex(eLang);
if (rContext.NFIsNumberFormat(rStr, nNumFormat, aRet.mfValue))
{
aRet.meType = ScInputStringType::Number;
aRet.mnFormatType = rContext.NFGetType(nNumFormat);
}
else if (!rStr.isEmpty())
aRet.meType = ScInputStringType::Text;
// the (English) number format is not set
//TODO: find and replace with matching local format???
}
return aRet;
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */