forked from amazingfate/loongoffice
69e9925ded584113e52f84ef0ed7c224079fa061 "sdext.pdfimport: resolves tdf#104597: RTL script text runs are reversed" and f6004e1c457ddab5e0c91e6159875d25130b108a "tdf#151546: RTL text is reversed (Writer pdfimport)" had introduced two calls to comphelper::string::reverseString into sdext. That function reverts on the basis of individual UTF-16 code units, not on the basis of Unicode code points. And while at least some pre-existing callers of that function want the former semantics (see below), these two new callers in sdext apparently want the latter semantics. Therefore, introduce an additional function comphelper::string::reverseCodePoints with the latter semantics. I identified three other places that call comphelper::string::reverseString: * SbRtl_StrReverse in basic/source/runtime/methods1.cxx apparently implements some StrReverse Basic function, where a (presumably non-existing) Basic spec would need to decide which of the two semantics is called for. So leave it alone for now. * SvtFileDialog::IsolateFilterFromPath_Impl in fpicker/source/office/iodlg.cxx reverts a string, operates on it, then reverts (parts of) it back. Whether or not that is the most elegant code, using the latter semantics here would apparently be wrong, as double invocation of comphelper::string::reverseCodePoints is not idempotent when the input is a malformed sequence of UTF-16 code units containing a low surrogate followed by a high surrogate. * AccessibleCell::getCellName in svx/source/table/accessiblecell.cxx apparently always operates on a string consisting only of Latin uppercase letters A--Z, for which both semantics are equivalent. (So we can just as well stick with the simpler comphelper::string::reverseString here.) (Extending the tests in comphelper/qa/string/test_string.cxx ran into an issue where loplugin:stringliteralvar warns about deliberate uses of sal_Unicode arrays rather than UTF-16 string literals wrapped in OUStringLiteral, as those arrays deliberately contain malformed UTF-16 code unit sequences and thus converting them into UTF-16 string literals might be considered inappropriate, see the newly added comment at StringLiteralVar::isPotentiallyInitializedWithMalformedUtf16 in compilerplugins/clang/stringliteralvar.cxx for details. So that loplugin had to be improved here, too.) Change-Id: I641cc32c76b0c5f6339ae44d8aa85df0022ffb05 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/142949 Tested-by: Jenkins Reviewed-by: Stephan Bergmann <sbergman@redhat.com>
247 lines
7.8 KiB
C++
247 lines
7.8 KiB
C++
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
|
/*
|
|
* This file is part of the LibreOffice project.
|
|
*
|
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
*
|
|
* This file incorporates work covered by the following license notice:
|
|
*
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
* contributor license agreements. See the NOTICE file distributed
|
|
* with this work for additional information regarding copyright
|
|
* ownership. The ASF licenses this file to you under the Apache
|
|
* License, Version 2.0 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy of
|
|
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
|
|
*/
|
|
|
|
#include <sal/config.h>
|
|
|
|
#include <iterator>
|
|
|
|
#include <comphelper/string.hxx>
|
|
#include <cppuhelper/implbase.hxx>
|
|
#include <com/sun/star/i18n/CharType.hpp>
|
|
#include <com/sun/star/i18n/XBreakIterator.hpp>
|
|
#include <com/sun/star/i18n/XCollator.hpp>
|
|
|
|
#include <cppunit/TestAssert.h>
|
|
#include <cppunit/TestFixture.h>
|
|
#include <cppunit/extensions/HelperMacros.h>
|
|
#include <cppunit/plugin/TestPlugIn.h>
|
|
#include <rtl/string.hxx>
|
|
#include <rtl/ustring.hxx>
|
|
|
|
namespace {
|
|
|
|
class TestString: public CppUnit::TestFixture
|
|
{
|
|
public:
|
|
void testStripStart();
|
|
void testStripEnd();
|
|
void testStrip();
|
|
void testToken();
|
|
void testTokenCount();
|
|
void testDecimalStringToNumber();
|
|
void testIsdigitAsciiString();
|
|
void testReverseString();
|
|
void testReverseCodePoints();
|
|
void testSplit();
|
|
void testRemoveAny();
|
|
|
|
CPPUNIT_TEST_SUITE(TestString);
|
|
CPPUNIT_TEST(testStripStart);
|
|
CPPUNIT_TEST(testStripEnd);
|
|
CPPUNIT_TEST(testStrip);
|
|
CPPUNIT_TEST(testToken);
|
|
CPPUNIT_TEST(testTokenCount);
|
|
CPPUNIT_TEST(testDecimalStringToNumber);
|
|
CPPUNIT_TEST(testIsdigitAsciiString);
|
|
CPPUNIT_TEST(testReverseString);
|
|
CPPUNIT_TEST(testReverseCodePoints);
|
|
CPPUNIT_TEST(testSplit);
|
|
CPPUNIT_TEST(testRemoveAny);
|
|
CPPUNIT_TEST_SUITE_END();
|
|
};
|
|
|
|
void TestString::testDecimalStringToNumber()
|
|
{
|
|
OUString s1("1234");
|
|
CPPUNIT_ASSERT_EQUAL(sal_uInt32(1234), comphelper::string::decimalStringToNumber(s1));
|
|
s1 += u"\u07C6";
|
|
CPPUNIT_ASSERT_EQUAL(sal_uInt32(12346), comphelper::string::decimalStringToNumber(s1));
|
|
// Codepoints on 2 16bits words
|
|
s1 = u"\U0001D7FE\U0001D7F7"; // MATHEMATICAL MONOSPACE DIGIT EIGHT and ONE
|
|
CPPUNIT_ASSERT_EQUAL(sal_uInt32(81), comphelper::string::decimalStringToNumber(s1));
|
|
}
|
|
|
|
void TestString::testIsdigitAsciiString()
|
|
{
|
|
CPPUNIT_ASSERT_EQUAL(true, comphelper::string::isdigitAsciiString("1234"));
|
|
|
|
CPPUNIT_ASSERT_EQUAL(false, comphelper::string::isdigitAsciiString("1A34"));
|
|
|
|
CPPUNIT_ASSERT_EQUAL(true, comphelper::string::isdigitAsciiString(""));
|
|
}
|
|
|
|
void TestString::testStripStart()
|
|
{
|
|
OString aIn("abc");
|
|
OString aOut;
|
|
|
|
aOut = ::comphelper::string::stripStart(aIn, 'b');
|
|
CPPUNIT_ASSERT_EQUAL(OString("abc"), aOut);
|
|
|
|
aOut = ::comphelper::string::stripStart(aIn, 'a');
|
|
CPPUNIT_ASSERT_EQUAL(OString("bc"), aOut);
|
|
|
|
aIn = "aaa";
|
|
aOut = ::comphelper::string::stripStart(aIn, 'a');
|
|
CPPUNIT_ASSERT(aOut.isEmpty());
|
|
|
|
aIn = "aba";
|
|
aOut = ::comphelper::string::stripStart(aIn, 'a');
|
|
CPPUNIT_ASSERT_EQUAL(OString("ba"), aOut);
|
|
}
|
|
|
|
void TestString::testStripEnd()
|
|
{
|
|
OString aIn("abc");
|
|
OString aOut;
|
|
|
|
aOut = ::comphelper::string::stripEnd(aIn, 'b');
|
|
CPPUNIT_ASSERT_EQUAL(OString("abc"), aOut);
|
|
|
|
aOut = ::comphelper::string::stripEnd(aIn, 'c');
|
|
CPPUNIT_ASSERT_EQUAL(OString("ab"), aOut);
|
|
|
|
aIn = "aaa";
|
|
aOut = ::comphelper::string::stripEnd(aIn, 'a');
|
|
CPPUNIT_ASSERT(aOut.isEmpty());
|
|
|
|
aIn = "aba";
|
|
aOut = ::comphelper::string::stripEnd(aIn, 'a');
|
|
CPPUNIT_ASSERT_EQUAL(OString("ab"), aOut);
|
|
}
|
|
|
|
void TestString::testStrip()
|
|
{
|
|
OString aIn("abc");
|
|
OString aOut;
|
|
|
|
aOut = ::comphelper::string::strip(aIn, 'b');
|
|
CPPUNIT_ASSERT_EQUAL(OString("abc"), aOut);
|
|
|
|
aOut = ::comphelper::string::strip(aIn, 'c');
|
|
CPPUNIT_ASSERT_EQUAL(OString("ab"), aOut);
|
|
|
|
aIn = "aaa";
|
|
aOut = ::comphelper::string::strip(aIn, 'a');
|
|
CPPUNIT_ASSERT(aOut.isEmpty());
|
|
|
|
aIn = "aba";
|
|
aOut = ::comphelper::string::strip(aIn, 'a');
|
|
CPPUNIT_ASSERT_EQUAL(OString("b"), aOut);
|
|
}
|
|
|
|
void TestString::testToken()
|
|
{
|
|
OString aIn("10.11.12");
|
|
OString aOut;
|
|
|
|
aOut = aIn.getToken(-1, '.');
|
|
CPPUNIT_ASSERT(aOut.isEmpty());
|
|
|
|
aOut = aIn.getToken(0, '.');
|
|
CPPUNIT_ASSERT_EQUAL(OString("10"), aOut);
|
|
|
|
aOut = aIn.getToken(1, '.');
|
|
CPPUNIT_ASSERT_EQUAL(OString("11"), aOut);
|
|
|
|
aOut = aIn.getToken(2, '.');
|
|
CPPUNIT_ASSERT_EQUAL(OString("12"), aOut);
|
|
|
|
aOut = aIn.getToken(3, '.');
|
|
CPPUNIT_ASSERT(aOut.isEmpty());
|
|
}
|
|
|
|
void TestString::testTokenCount()
|
|
{
|
|
OString aIn("10.11.12");
|
|
sal_Int32 nOut;
|
|
|
|
nOut = ::comphelper::string::getTokenCount(aIn, '.');
|
|
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(3), nOut);
|
|
|
|
nOut = ::comphelper::string::getTokenCount(aIn, 'X');
|
|
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), nOut);
|
|
|
|
nOut = ::comphelper::string::getTokenCount("", 'X');
|
|
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), nOut);
|
|
}
|
|
|
|
void TestString::testReverseString()
|
|
{
|
|
CPPUNIT_ASSERT_EQUAL(OUString(), comphelper::string::reverseString(u""));
|
|
CPPUNIT_ASSERT_EQUAL(OUString("cba"), comphelper::string::reverseString(u"abc"));
|
|
static sal_Unicode const rev[] = {'w', 0xDFFF, 0xDBFF, 'v', 0xDC00, 0xD800, 'u'};
|
|
CPPUNIT_ASSERT_EQUAL(
|
|
OUString(rev, std::size(rev)),
|
|
comphelper::string::reverseString(u"u\U00010000v\U0010FFFFw"));
|
|
static sal_Unicode const malformed[] = {0xDC00, 0xD800};
|
|
CPPUNIT_ASSERT_EQUAL(
|
|
OUString(u"\U00010000"),
|
|
comphelper::string::reverseString(std::u16string_view(malformed, std::size(malformed))));
|
|
}
|
|
|
|
void TestString::testReverseCodePoints() {
|
|
CPPUNIT_ASSERT_EQUAL(OUString(), comphelper::string::reverseCodePoints(""));
|
|
CPPUNIT_ASSERT_EQUAL(OUString("cba"), comphelper::string::reverseCodePoints("abc"));
|
|
CPPUNIT_ASSERT_EQUAL(
|
|
OUString(u"w\U0010FFFFv\U00010000u"),
|
|
comphelper::string::reverseCodePoints(u"u\U00010000v\U0010FFFFw"));
|
|
static sal_Unicode const malformed[] = {0xDC00, 0xD800};
|
|
CPPUNIT_ASSERT_EQUAL(
|
|
OUString(u"\U00010000"),
|
|
comphelper::string::reverseCodePoints(OUString(malformed, std::size(malformed))));
|
|
}
|
|
|
|
void TestString::testSplit()
|
|
{
|
|
std::vector<OUString> aRet = ::comphelper::string::split(u"CTRL+ALT+F1", '+');
|
|
CPPUNIT_ASSERT_EQUAL(size_t(3), aRet.size());
|
|
CPPUNIT_ASSERT_EQUAL(OUString("CTRL"), aRet[0]);
|
|
CPPUNIT_ASSERT_EQUAL(OUString("ALT"), aRet[1]);
|
|
CPPUNIT_ASSERT_EQUAL(OUString("F1"), aRet[2]);
|
|
}
|
|
|
|
void TestString::testRemoveAny()
|
|
{
|
|
using namespace ::comphelper::string;
|
|
OUString in("abcAAAbbC");
|
|
sal_Unicode const test1 [] = { 'a', 0 };
|
|
CPPUNIT_ASSERT_EQUAL(OUString("bcAAAbbC"), removeAny(in, test1));
|
|
sal_Unicode const test2 [] = { 0 };
|
|
CPPUNIT_ASSERT_EQUAL(in, removeAny(in, test2));
|
|
sal_Unicode const test3 [] = { 'A', 0 };
|
|
CPPUNIT_ASSERT_EQUAL(OUString("abcbbC"), removeAny(in, test3));
|
|
sal_Unicode const test4 [] = { 'A', 'a', 0 };
|
|
CPPUNIT_ASSERT_EQUAL(OUString("bcbbC"), removeAny(in, test4));
|
|
sal_Unicode const test5 [] = { 'C', 0 };
|
|
CPPUNIT_ASSERT_EQUAL(OUString("abcAAAbb"), removeAny(in, test5));
|
|
sal_Unicode const test6 [] = { 'X', 0 };
|
|
CPPUNIT_ASSERT_EQUAL(in, removeAny(in, test6));
|
|
sal_Unicode const test7 [] = { 'A', 'B', 'C', 'a', 'b', 'c', 0 };
|
|
CPPUNIT_ASSERT_EQUAL(OUString(), removeAny(in, test7));
|
|
}
|
|
|
|
CPPUNIT_TEST_SUITE_REGISTRATION(TestString);
|
|
|
|
}
|
|
|
|
CPPUNIT_PLUGIN_IMPLEMENT();
|
|
|
|
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|