Quelle svxacorr.cxx

Sprache: C

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This file incorporates work covered by the following license notice:
*
*   Licensed to the Apache Software Foundation (ASF) under one or more
*   contributor license agreements. See the NOTICE file distributed
*   with this work for additional information regarding copyright
*   ownership. The ASF licenses this file to you under the Apache
*   License, Version 2.0 (the "License"); you may not use this file
*   except in compliance with the License. You may obtain a copy of
*   the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/

#include <memory>
#include <utility>
#include <algorithm>
#include <string_view>
#include <sal/config.h>

#include <com/sun/star/linguistic2/XSpellChecker1.hpp>
#include <com/sun/star/embed/XStorage.hpp>
#include <com/sun/star/io/IOException.hpp>
#include <com/sun/star/io/XStream.hpp>
#include <tools/urlobj.hxx>
#include <i18nlangtag/mslangid.hxx>
#include <i18nutil/transliteration.hxx>
#include <sal/log.hxx>
#include <osl/diagnose.h>
#include <vcl/svapp.hxx>
#include <vcl/settings.hxx>
#include <svl/fstathelper.hxx>
#include <svl/urihelper.hxx>
#include <unotools/charclass.hxx>
#include <com/sun/star/i18n/UnicodeType.hpp>
#include <unotools/collatorwrapper.hxx>
#include <com/sun/star/i18n/UnicodeScript.hpp>
#include <com/sun/star/i18n/OrdinalSuffix.hpp>
#include <unotools/localedatawrapper.hxx>
#include <unotools/transliterationwrapper.hxx>
#include <comphelper/processfactory.hxx>
#include <comphelper/sequence.hxx>
#include <comphelper/storagehelper.hxx>
#include <o3tl/string_view.hxx>
#include <editeng/editids.hrc>
#include <sot/storage.hxx>
#include <editeng/udlnitem.hxx>
#include <editeng/wghtitem.hxx>
#include <editeng/postitem.hxx>
#include <editeng/crossedoutitem.hxx>
#include <editeng/escapementitem.hxx>
#include <editeng/svxacorr.hxx>
#include <editeng/unolingu.hxx>
#include <vcl/window.hxx>
#include <com/sun/star/xml/sax/InputSource.hpp>
#include <com/sun/star/xml/sax/FastParser.hpp>
#include <com/sun/star/xml/sax/Writer.hpp>
#include <com/sun/star/xml/sax/SAXParseException.hpp>
#include <unotools/streamwrap.hxx>
#include "SvXMLAutoCorrectImport.hxx"
#include "SvXMLAutoCorrectExport.hxx"
#include "SvXMLAutoCorrectTokenHandler.hxx"
#include <ucbhelper/content.hxx>
#include <com/sun/star/ucb/ContentCreationException.hpp>
#include <com/sun/star/ucb/XCommandEnvironment.hpp>
#include <com/sun/star/ucb/TransferInfo.hpp>
#include <com/sun/star/ucb/NameClash.hpp>
#include <comphelper/diagnose_ex.hxx>
#include <xmloff/xmltoken.hxx>
#include <unordered_map>
#include <rtl/character.hxx>

using namespace ::com::sun::star::ucb;
using namespace ::com::sun::star::uno;
using namespace ::com::sun::star::xml::sax;
using namespace ::com::sun::star;
using namespace ::xmloff::token;
using namespace ::utl;

namespace {

enum class Flags {
    NONE            = 0x00,
    FullStop        = 0x01,
    ExclamationMark = 0x02,
    QuestionMark    = 0x04,
};

}

namespace o3tl {
    template<> struct typed_flags<Flags> : is_typed_flags<Flags, 0x07> {};
}
const sal_Unicode cNonBreakingSpace = 0xA0; // UNICODE code for no break space

constexpr OUString pXMLImplWordStart_ExcptLstStr = u"WordExceptList.xml"_ustr;
constexpr OUString pXMLImplCplStt_ExcptLstStr = u"SentenceExceptList.xml"_ustr;
constexpr OUString pXMLImplAutocorr_ListStr = u"DocumentList.xml"_ustr;

// tdf#54409 check also typographical quotation marks in the case of skipped ASCII quotation marks
// Curious, why these \u0083\u0084\u0089\u0091\u0092\u0093\u0094 are handled as "begin characters"?
constexpr std::u16string_view
    /* also at these beginnings - Brackets and all kinds of begin characters */
    sImplSttSkipChars = u"\"'([{\u2018\u2019\u201a\u201b\u201c\u201d\u201e\u201f\u0083\u0084\u0089\u0091\u0092\u0093\u0094",
    /* also at these ends - Brackets and all kinds of begin characters */
    sImplEndSkipChars = u"\"')]}\u2018\u2019\u201a\u201b\u201c\u201d\u201e\u201f\u0083\u0084\u0089\u0091\u0092\u0093\u0094";

static OUString EncryptBlockName_Imp(std::u16string_view rName);

static bool NonFieldWordDelim( const sal_Unicode c )
{
    return ' ' == c || '\t' == c || 0x0a == c ||
            cNonBreakingSpace == c || 0x2011 == c;
}

static bool IsWordDelim( const sal_Unicode c )
{
    return c == 0x1 || NonFieldWordDelim(c);
}

static bool IsLowerLetter( sal_Int32 nCharType )
{
    return CharClass::isLetterType( nCharType ) &&
           ( css::i18n::KCharacterType::LOWER & nCharType);
}

static bool IsUpperLetter( sal_Int32 nCharType )
{
    return CharClass::isLetterType( nCharType ) &&
            ( css::i18n::KCharacterType::UPPER & nCharType);
}

static bool lcl_IsUnsupportedUnicodeChar( CharClass const & rCC, const OUString& rTxt,
                                   sal_Int32 nStt, sal_Int32 nEnd )
{
    for( ; nStt < nEnd; ++nStt )
    {
        css::i18n::UnicodeScript nScript = rCC.getScript( rTxt, nStt );
        switch( nScript )
        {
            case css::i18n::UnicodeScript_kCJKRadicalsSupplement:
            case css::i18n::UnicodeScript_kHangulJamo:
            case css::i18n::UnicodeScript_kCJKSymbolPunctuation:
            case css::i18n::UnicodeScript_kHiragana:
            case css::i18n::UnicodeScript_kKatakana:
            case css::i18n::UnicodeScript_kHangulCompatibilityJamo:
            case css::i18n::UnicodeScript_kEnclosedCJKLetterMonth:
            case css::i18n::UnicodeScript_kCJKCompatibility:
            case css::i18n::UnicodeScript_kCJKUnifiedIdeographsExtensionA:
            case css::i18n::UnicodeScript_kCJKUnifiedIdeograph:
            case css::i18n::UnicodeScript_kHangulSyllable:
            case css::i18n::UnicodeScript_kCJKCompatibilityIdeograph:
            case css::i18n::UnicodeScript_kHalfwidthFullwidthForm:
                return true;
            default: ; //do nothing
        }
    }
    return false;
}

static bool lcl_IsSymbolChar( CharClass const & rCC, const OUString& rTxt,
                                  sal_Int32 nStt, sal_Int32 nEnd )
{
    for( ; nStt < nEnd; ++nStt )
    {
        if( css::i18n::UnicodeType::PRIVATE_USE == rCC.getType( rTxt, nStt ))
            return true;
    }
    return false;
}

static bool lcl_IsInArr(std::u16string_view arr, const sal_uInt32 c)
{
    return std::any_of(arr.begin(), arr.end(), [c](const auto c1) { return c1 == c; });
}

SvxAutoCorrDoc::~SvxAutoCorrDoc()
{
}

// Called by the functions:
//  - FnCapitalStartWord
//  - FnCapitalStartSentence
// after the exchange of characters. Then the words, if necessary, can be inserted
// into the exception list.
void SvxAutoCorrDoc::SaveCpltSttWord( ACFlags, sal_Int32, const OUString&,
                                        sal_Unicode )
{
}

LanguageType SvxAutoCorrDoc::GetLanguage( sal_Int32 ) const
{
    return LANGUAGE_SYSTEM;
}

static const LanguageTag& GetAppLang()
{
    return Application::GetSettings().GetLanguageTag();
}

/// Never use an unresolved LANGUAGE_SYSTEM.
static LanguageType GetDocLanguage( const SvxAutoCorrDoc& rDoc, sal_Int32 nPos )
{
    LanguageType eLang = rDoc.GetLanguage( nPos );
    if (eLang == LANGUAGE_SYSTEM)
        eLang = GetAppLang().getLanguageType();     // the current work locale
    return eLang;
}

static LocaleDataWrapper& GetLocaleDataWrapper( LanguageType nLang )
{
    static std::unique_ptr<LocaleDataWrapper> xLclDtWrp;
    LanguageTag aLcl( nLang );
    if (!xLclDtWrp || xLclDtWrp->getLoadedLanguageTag() != aLcl)
        xLclDtWrp.reset(new LocaleDataWrapper(std::move(aLcl)));
    return *xLclDtWrp;
}
static TransliterationWrapper& GetIgnoreTranslWrapper()
{
    static int bIsInit = 0;
    static TransliterationWrapper aWrp( ::comphelper::getProcessComponentContext(),
                TransliterationFlags::IGNORE_KANA |
                TransliterationFlags::IGNORE_WIDTH );
    if( !bIsInit )
    {
        aWrp.loadModuleIfNeeded( GetAppLang().getLanguageType() );
        bIsInit = 1;
    }
    return aWrp;
}
static CollatorWrapper& GetCollatorWrapper()
{
    static CollatorWrapper aCollWrp = []()
    {
        CollatorWrapper tmp( ::comphelper::getProcessComponentContext() );
        tmp.loadDefaultCollator( GetAppLang().getLocale(), 0 );
        return tmp;
    }();
    return aCollWrp;
}

bool SvxAutoCorrect::IsAutoCorrectChar( sal_Unicode cChar )
{
    return  cChar == '\0' || cChar == '\t' || cChar == 0x0a ||
            cChar == ' '  || cChar == '\'' || cChar == '\"' ||
            cChar == '*'  || cChar == '_'  || cChar == '%' ||
            cChar == '.'  || cChar == ','  || cChar == ';' ||
            cChar == ':'  || cChar == '?' || cChar == '!' ||
            cChar == '<'  || cChar == '>' ||
            cChar == '/'  || cChar == '-';
}

namespace
{
    bool IsCompoundWordDelimChar(sal_Unicode cChar)
    {
        return  cChar == '-' || SvxAutoCorrect::IsAutoCorrectChar(cChar);
    }
}

bool SvxAutoCorrect::NeedsHardspaceAutocorr( sal_Unicode cChar )
{
    return cChar == '%' || cChar == ';' || cChar == ':'  || cChar == '?' || cChar == '!' ||
        cChar == '/' /*case for the urls exception*/;
}

ACFlags SvxAutoCorrect::GetDefaultFlags()
{
    ACFlags nRet = ACFlags::Autocorrect
                    | ACFlags::CapitalStartSentence
                    | ACFlags::CapitalStartWord
                    | ACFlags::ChgOrdinalNumber
                    | ACFlags::ChgToEnEmDash
                    | ACFlags::AddNonBrkSpace
                    | ACFlags::TransliterateRTL
                    | ACFlags::ChgAngleQuotes
                    | ACFlags::ChgWeightUnderl
                    | ACFlags::SetINetAttr
                    | ACFlags::SetDOIAttr
                    | ACFlags::ChgQuotes
                    | ACFlags::SaveWordCplSttLst
                    | ACFlags::SaveWordWordStartLst
                    | ACFlags::CorrectCapsLock;
    LanguageType eLang = GetAppLang().getLanguageType();
    if( eLang.anyOf(
        LANGUAGE_ENGLISH,
        LANGUAGE_ENGLISH_US,
        LANGUAGE_ENGLISH_UK,
        LANGUAGE_ENGLISH_AUS,
        LANGUAGE_ENGLISH_CAN,
        LANGUAGE_ENGLISH_NZ,
        LANGUAGE_ENGLISH_EIRE,
        LANGUAGE_ENGLISH_SAFRICA,
        LANGUAGE_ENGLISH_JAMAICA,
        LANGUAGE_ENGLISH_CARIBBEAN))
        nRet &= ~ACFlags(ACFlags::ChgQuotes|ACFlags::ChgSglQuotes);
    return nRet;
}

constexpr sal_Unicode cEmDash = 0x2014;
constexpr sal_Unicode cEnDash = 0x2013;
constexpr OUString sEmDash(u"\u2014"_ustr);
constexpr OUString sEnDash(u"\u2013"_ustr);
constexpr sal_Unicode cApostrophe = 0x2019;
constexpr sal_Unicode cLeftDoubleAngleQuote = 0xAB;
constexpr sal_Unicode cRightDoubleAngleQuote = 0xBB;
constexpr sal_Unicode cLeftSingleAngleQuote = 0x2039;
constexpr sal_Unicode cRightSingleAngleQuote = 0x203A;
// stop characters for searching preceding quotes
// (the first character is also the opening quote we are looking for)
const sal_Unicode aStopDoubleAngleQuoteStart[] = { 0x201E, 0x201D, 0x201C, 0 }; // preceding ,,
const sal_Unicode aStopDoubleAngleQuoteEnd[] = { cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0x201D, 0x201E, 0 }; // preceding >>
// preceding << for Romanian, handle also alternative primary closing quotation mark U+201C
const sal_Unicode aStopDoubleAngleQuoteEndRo[] = { cLeftDoubleAngleQuote, cRightDoubleAngleQuote, 0x201D, 0x201E, 0x201C, 0 };
const sal_Unicode aStopSingleQuoteEnd[] = { 0x201A, 0x2018, 0x201C, 0x201E, 0 };
const sal_Unicode aStopSingleQuoteEndRuUa[] = { 0x201E, 0x201C, cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0 };

SvxAutoCorrect::SvxAutoCorrect( OUString aShareAutocorrFile,
                                OUString aUserAutocorrFile )
    : sShareAutoCorrFile(std::move( aShareAutocorrFile ))
    , sUserAutoCorrFile(std::move( aUserAutocorrFile ))
    , eCharClassLang( LANGUAGE_DONTKNOW )
    , nFlags(SvxAutoCorrect::GetDefaultFlags())
    , cStartDQuote( 0 )
    , cEndDQuote( 0 )
    , cStartSQuote( 0 )
    , cEndSQuote( 0 )
{
}

SvxAutoCorrect::SvxAutoCorrect( const SvxAutoCorrect& rCpy )
    : sShareAutoCorrFile( rCpy.sShareAutoCorrFile )
    , sUserAutoCorrFile( rCpy.sUserAutoCorrFile )
    , aSwFlags( rCpy.aSwFlags )
    , eCharClassLang(rCpy.eCharClassLang)
    , nFlags( rCpy.nFlags & ~ACFlags(ACFlags::ChgWordLstLoad|ACFlags::CplSttLstLoad|ACFlags::WordStartLstLoad))
    , cStartDQuote( rCpy.cStartDQuote )
    , cEndDQuote( rCpy.cEndDQuote )
    , cStartSQuote( rCpy.cStartSQuote )
    , cEndSQuote( rCpy.cEndSQuote )
{
}

SvxAutoCorrect::~SvxAutoCorrect()
{
}

void SvxAutoCorrect::GetCharClass_( LanguageType eLang )
{
    moCharClass.emplace( LanguageTag( eLang) );
    eCharClassLang = eLang;
}

void SvxAutoCorrect::SetAutoCorrFlag( ACFlags nFlag, bool bOn )
{
    ACFlags nOld = nFlags;
    nFlags = bOn ? nFlags | nFlag
                 : nFlags & ~nFlag;

    if( !bOn )
    {
        if( (nOld & ACFlags::CapitalStartSentence) != (nFlags & ACFlags::CapitalStartSentence) )
            nFlags &= ~ACFlags::CplSttLstLoad;
        if( (nOld & ACFlags::CapitalStartWord) != (nFlags & ACFlags::CapitalStartWord) )
            nFlags &= ~ACFlags::WordStartLstLoad;
        if( (nOld & ACFlags::Autocorrect) != (nFlags & ACFlags::Autocorrect) )
            nFlags &= ~ACFlags::ChgWordLstLoad;
    }
}

// Correct TWo INitial CApitals
void SvxAutoCorrect::FnCapitalStartWord( SvxAutoCorrDoc& rDoc, const OUString& ;rTxt,
                                    sal_Int32 nSttPos, sal_Int32 nEndPos,
                                    LanguageType eLang )
{
    CharClass& rCC = GetCharClass( eLang );

    // Delete all non alphanumeric. Test the characters at the beginning/end of
    // the word ( recognizes: "(min.", "/min.", and so on.)
    for( ; nSttPos < nEndPos; ++nSttPos )
        if( rCC.isLetterNumeric( rTxt, nSttPos ))
            break;
    for( ; nSttPos < nEndPos; --nEndPos )
        if( rCC.isLetterNumeric( rTxt, nEndPos - 1 ))
            break;

    // Is the word a compounded word separated by delimiters?
    // If so, keep track of all delimiters so each constituent
    // word can be checked for two initial capital letters.
    std::deque<sal_Int32> aDelimiters;

    // Always check for two capitals at the beginning
    // of the entire word, so start at nSttPos.
    aDelimiters.push_back(nSttPos);

    // Find all compound word delimiters
    for (sal_Int32 n = nSttPos; n < nEndPos; ++n)
    {
        if (IsCompoundWordDelimChar(rTxt[ n ]))
        {
            aDelimiters.push_back( n + 1 ); // Get position of char after delimiter
        }
    }

    // Decide where to put the terminating delimiter.
    // If the last AutoCorrect char was a newline, then the AutoCorrect
    // char will not be included in rTxt.
    // If the last AutoCorrect char was not a newline, then the AutoCorrect
    // character will be the last character in rTxt.
    if (!IsCompoundWordDelimChar(rTxt[nEndPos-1]))
        aDelimiters.push_back(nEndPos);

    // Iterate through the word and all words that compose it.
    // Two capital letters at the beginning of word?
    for (size_t nI = 0; nI < aDelimiters.size() - 1; ++nI)
    {
        nSttPos = aDelimiters[nI];
        nEndPos = aDelimiters[nI + 1];

        if( nSttPos+2 < nEndPos &&
            IsUpperLetter( rCC.getCharacterType( rTxt, nSttPos )) &&
            IsUpperLetter( rCC.getCharacterType( rTxt, ++nSttPos )) &&
            // Is the third character a lower case
            IsLowerLetter( rCC.getCharacterType( rTxt, nSttPos +1 )) &&
            // Do not replace special attributes
            0x1 != rTxt[ nSttPos ] && 0x2 != rTxt[ nSttPos ])
        {
            // test if the word is in an exception list
            OUString sWord( rTxt.copy( nSttPos - 1, nEndPos - nSttPos + 1 ));
            if( !FindInWordStartExceptList(eLang, sWord) )
            {
                // Check that word isn't correctly spelt before correcting:
                css::uno::Reference< css::linguistic2::XSpellChecker1 > xSpeller =
                    LinguMgr::GetSpellChecker();
                if( xSpeller->hasLanguage(static_cast<sal_uInt16>(eLang)) )
                {
                    Sequence< css::beans::PropertyValue > aEmptySeq;
                    if (xSpeller->isValid(sWord, static_cast<sal_uInt16>(eLang), aEmptySeq))
                    {
                        return;
                    }
                }
                sal_Unicode cSave = rTxt[ nSttPos ];
                OUString sChar = rCC.lowercase( OUString(cSave) );
                if( sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar ))
                {
                    if( ACFlags::SaveWordWordStartLst & nFlags )
                        rDoc.SaveCpltSttWord( ACFlags::CapitalStartWord, nSttPos, sWord, cSave );
                }
            }
        }
    }
}

// Format ordinal numbers suffixes (1st -> 1^st)
bool SvxAutoCorrect::FnChgOrdinalNumber(
    SvxAutoCorrDoc& rDoc, const OUString& rTxt,
    sal_Int32 nSttPos, sal_Int32 nEndPos,
    LanguageType eLang)
{
    // 1st, 2nd, 3rd, 4 - 0th
    // 201th or 201st
    // 12th or 12nd
    bool bChg = false;

    // In some languages ordinal suffixes should never be
    // changed to superscript. Let's break for those languages.
    if (!eLang.anyOf(
         LANGUAGE_CATALAN,              // tdf#156792
         LANGUAGE_CATALAN_VALENCIAN,
         LANGUAGE_SWEDISH,
         LANGUAGE_SWEDISH_FINLAND))
    {
        CharClass& rCC = GetCharClass(eLang);

        for (; nSttPos < nEndPos; ++nSttPos)
            if (!lcl_IsInArr(sImplSttSkipChars, rTxt[nSttPos]))
                break;
        for (; nSttPos < nEndPos; --nEndPos)
            if (!lcl_IsInArr(sImplEndSkipChars, rTxt[nEndPos - 1]))
                break;

        // Get the last number in the string to check
        sal_Int32 nNumEnd = nEndPos;
        bool bFoundEnd = false;
        bool isValidNumber = true;
        sal_Int32 i = nEndPos;
        while (i > nSttPos)
        {
            i--;
            bool isDigit = rCC.isDigit(rTxt, i);
            if (bFoundEnd)
                isValidNumber &= (isDigit || !rCC.isLetter(rTxt, i));

            if (isDigit && !bFoundEnd)
            {
                bFoundEnd = true;
                nNumEnd = i;
            }
        }

        if (bFoundEnd && isValidNumber) {
            sal_Int32 nNum = o3tl::toInt32(rTxt.subView(nSttPos, nNumEnd - nSttPos + 1));
            std::u16string_view sEnd = rTxt.subView(nNumEnd + 1, nEndPos - nNumEnd - 1);

            // Check if the characters after that number correspond to the ordinal suffix
            uno::Reference< i18n::XOrdinalSuffix > xOrdSuffix
                = i18n::OrdinalSuffix::create(comphelper::getProcessComponentContext());

            uno::Sequence< OUString > aSuffixes = xOrdSuffix->getOrdinalSuffix(nNum, rCC.getLanguageTag().getLocale());

            // add extra suffixes for languages not handled by i18npool/ICU
            if ( primary(eLang) == primary(LANGUAGE_PORTUGUESE) &&
                            ( nEndPos == nNumEnd + 3 || nEndPos == nNumEnd + 4 ) &&
                            ( sEnd[0] == 'a' || sEnd[0] == 'o' || sEnd[0] == 'r' ) )
            {
               auto aExtendedSuffixes = comphelper::sequenceToContainer< std::vector<OUString> >(aSuffixes);
               aExtendedSuffixes.push_back(u"as"_ustr); // plural form of 'a'
               aExtendedSuffixes.push_back(u"os"_ustr); // plural form of 'o'
               aExtendedSuffixes.push_back(u"ra"_ustr); // alternative form of 'a'
               aExtendedSuffixes.push_back(u"ro"_ustr); // alternative form of 'o'
               aExtendedSuffixes.push_back(u"ras"_ustr); // alternative form of "as"
               aExtendedSuffixes.push_back(u"ros"_ustr); // alternative form of "os"
               aSuffixes = comphelper::containerToSequence(aExtendedSuffixes);
            }

            for (OUString const & sSuffix : aSuffixes)
            {
                if (sSuffix == sEnd)
                {
                    // Check if the ordinal suffix has to be set as super script
                    if (rCC.isLetter(sSuffix))
                    {
                        sal_Int32 nNumberChanged = 0;
                        sal_Int32 nSuffixChanged = 0;
                        // exceptions for Portuguese
                        // add missing dot: 1a -> 1.ª
                        // and remove optional 'r': 1ro -> 1.º
                        if ( primary(eLang) == primary(LANGUAGE_PORTUGUESE) )
                        {
                            if ( sSuffix.startsWith("r") )
                            {
                                rDoc.Delete( nNumEnd + 1, nNumEnd + 2 );
                                nSuffixChanged = -1;
                            }
                            rDoc.Insert( nNumEnd + 1, u"."_ustr );
                            nNumberChanged = 1;
                        }

                        // Do the change
                        SvxEscapementItem aSvxEscapementItem(DFLT_ESC_AUTO_SUPER,
                            DFLT_ESC_PROP, SID_ATTR_CHAR_ESCAPEMENT);
                        rDoc.SetAttr(nNumEnd + 1 + nNumberChanged,
                            nEndPos + nNumberChanged + nSuffixChanged,
                            SID_ATTR_CHAR_ESCAPEMENT,
                            aSvxEscapementItem);
                        bChg = true;
                        break;
                    }
                }
            }
        }
    }
    return bChg;
}

// Replace dashes
bool SvxAutoCorrect::FnChgToEnEmDash(
                                SvxAutoCorrDoc& rDoc, const OUString& rTxt,
                                sal_Int32 nSttPos, sal_Int32 nEndPos,
                                LanguageType eLang )
{
    bool bRet = false;
    CharClass& rCC = GetCharClass( eLang );
    if (eLang == LANGUAGE_SYSTEM)
        eLang = GetAppLang().getLanguageType();
    bool bAlwaysUseEmDash = (eLang == LANGUAGE_RUSSIAN || eLang == LANGUAGE_UKRAINIAN);

    // rTxt may refer to the frame text that will change in the calls to rDoc.Delete / rDoc.Insert;
    // keep a local copy for later use
    OUString aOrigTxt = rTxt;
    sal_Int32 nFirstReplacementTextLengthChange = 0;

    // replace " - " or " --" with "enDash"
    if( 1 < nSttPos && 1 <= nEndPos - nSttPos )
    {
        sal_Unicode cCh = rTxt[ nSttPos ];
        if( '-' == cCh )
        {
            if( 1 < nEndPos - nSttPos &&
                ' ' == rTxt[ nSttPos-1 ] &&
                '-' == rTxt[ nSttPos+1 ])
            {
                sal_Int32 n;
                for( n = nSttPos+2; n < nEndPos && lcl_IsInArr(
                            sImplSttSkipChars,(cCh = rTxt[ n ]));
                        ++n )
                    ;

                // found: " --[<AnySttChars>][A-z0-9]
                if( rCC.isLetterNumeric( OUString(cCh) ) )
                {
                    for( n = nSttPos-1; n && lcl_IsInArr(
                            sImplEndSkipChars,(cCh = rTxt[ --n ])); )
                        ;

                    // found: "[A-z0-9][<AnyEndChars>] --[<AnySttChars>][A-z0-9]
                    if( rCC.isLetterNumeric( OUString(cCh) ))
                    {
                        rDoc.Delete( nSttPos, nSttPos + 2 );
                        rDoc.Insert( nSttPos, bAlwaysUseEmDash ? sEmDash : sEnDash );
                        nFirstReplacementTextLengthChange = -1; // 2 ch -> 1 ch
                        bRet = true;
                    }
                }
            }
        }
        else if( 3 < nSttPos &&
                 ' ' == rTxt[ nSttPos-1 ] &&
                 '-' == rTxt[ nSttPos-2 ])
        {
            sal_Int32 n, nLen = 1, nTmpPos = nSttPos - 2;
            if( '-' == ( cCh = rTxt[ nTmpPos-1 ]) )
            {
                --nTmpPos;
                ++nLen;
                cCh = rTxt[ nTmpPos-1 ];
            }
            if( ' ' == cCh )
            {
                for( n = nSttPos; n < nEndPos && lcl_IsInArr(
                            sImplSttSkipChars,(cCh = rTxt[ n ]));
                        ++n )
                    ;

                // found: " - [<AnySttChars>][A-z0-9]
                if( rCC.isLetterNumeric( OUString(cCh) ) )
                {
                    cCh = ' ';
                    for( n = nTmpPos-1; n && lcl_IsInArr(
                            sImplEndSkipChars,(cCh = rTxt[ --n ])); )
                            ;
                    // found: "[A-z0-9][<AnyEndChars>] - [<AnySttChars>][A-z0-9]
                    if (rCC.isLetterNumeric(OUString(cCh)) || lcl_IsInArr(u".!?", cCh))
                    {
                        rDoc.Delete( nTmpPos, nTmpPos + nLen );
                        rDoc.Insert( nTmpPos, bAlwaysUseEmDash ? sEmDash : sEnDash );
                        nFirstReplacementTextLengthChange = 1 - nLen; // nLen ch -> 1 ch
                        bRet = true;
                    }
                }
            }
        }
    }

    // Replace [A-z0-9]--[A-z0-9] double dash with "emDash" or "enDash"
    // [0-9]--[0-9] double dash always replaced with "enDash"
    // Finnish and Hungarian use enDash instead of emDash.
    bool bEnDash = (eLang == LANGUAGE_HUNGARIAN || eLang == LANGUAGE_FINNISH);
    if( 4 <= nEndPos - nSttPos )
    {
        std::u16string_view sTmpView( aOrigTxt.subView( nSttPos, nEndPos - nSttPos ) );
        size_t nFndPos = sTmpView.find(u"--");
        if (nFndPos > 0 && nFndPos < sTmpView.size() - 2)
        {
            // Use proper codepoints. Currently, CharClass::isLetterNumeric is broken, it
            // uses the index *both* as code unit index (when checking it as ASCII), *and*
            // as code point index (when passes to css::i18n::XCharacterClassification).
            // Oh well... Anyway, single-codepoint strings will workaround it.
            sal_Int32 nStart = nSttPos + nFndPos;
            sal_uInt32 chStart = aOrigTxt.iterateCodePoints(&nStart, -1);
            OUString sStart(&chStart, 1);
            // No idea why sImplEndSkipChars is checked at start
            if (rCC.isLetterNumeric(sStart, 0) || lcl_IsInArr(sImplEndSkipChars, chStart))
            {
                sal_Int32 nEnd = nSttPos + nFndPos + 2;
                sal_uInt32 chEnd = aOrigTxt.iterateCodePoints(&nEnd, 1);
                OUString sEnd(&chEnd, 1);
                // No idea why sImplSttSkipChars is checked at end
                if (rCC.isLetterNumeric(sEnd, 0) || lcl_IsInArr(sImplSttSkipChars, chEnd))
                {
                    nSttPos = nSttPos + nFndPos + nFirstReplacementTextLengthChange;
                    rDoc.Delete(nSttPos, nSttPos + 2);
                    rDoc.Insert(nSttPos,
                                (bEnDash || (rCC.isDigit(sStart, 0) && rCC.isDigit(sEnd, 0))
                                     ? sEnDash
                                     : sEmDash));
                    bRet = true;
                }
            }
        }
    }
    return bRet;
}

// Add non-breaking space before specific punctuation marks in French text
sal_Int32 SvxAutoCorrect::FnAddNonBrkSpace(
                                SvxAutoCorrDoc& rDoc, std::u16string_view rTxt,
                                sal_Int32 nEndPos,
                                LanguageType eLang, bool& io_bNbspRunNext )
{
    sal_Int32 nRet = -1;

    CharClass& rCC = GetCharClass( eLang );

    if ( rCC.getLanguageTag().getLanguage() == "fr" )
    {
        bool bFrCA = (rCC.getLanguageTag().getCountry() == "CA");
        OUString allChars = u":;?!%"_ustr;
        OUString chars( allChars );
        if ( bFrCA )
            chars = ":";

        sal_Unicode cChar = rTxt[ nEndPos ];
        bool bHasSpace = chars.indexOf( cChar ) != -1;
        bool bIsSpecial = allChars.indexOf( cChar ) != -1;
        if ( bIsSpecial )
        {
            // Get the last word delimiter position
            sal_Int32 nSttWdPos = nEndPos;
            bool bWasWordDelim = false;
            while( nSttWdPos )
            {
                bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]);
                if (bWasWordDelim)
                    break;
            }

            //See if the text is the start of a protocol string, e.g. have text of
            //"http" see if it is the start of "http:" and if so leave it alone
            size_t nIndex = nSttWdPos + (bWasWordDelim ? 1 : 0);
            size_t nProtocolLen = nEndPos - nSttWdPos + 1;
            if (nIndex + nProtocolLen <= rTxt.size())
            {
                if (INetURLObject::CompareProtocolScheme(rTxt.substr(nIndex, nProtocolLen)) != INetProtocol::NotValid)
                    return -1;
            }

            // Check the presence of "://" in the word
            size_t nStrPos = rTxt.find( u"://", nSttWdPos + 1 );
            if ( nStrPos == std::u16string_view::npos && nEndPos > 0 )
            {
                // Check the previous char
                sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
                if ( ( chars.indexOf( cPrevChar ) == -1 ) && cPrevChar != '\t' )
                {
                    // Remove any previous normal space
                    sal_Int32 nPos = nEndPos - 1;
                    while ( cPrevChar == ' ' || cPrevChar == cNonBreakingSpace )
                    {
                        if ( nPos == 0 ) break;
                        nPos--;
                        cPrevChar = rTxt[ nPos ];
                    }

                    nPos++;
                    if ( nEndPos - nPos > 0 )
                        rDoc.Delete( nPos, nEndPos );

                    // Add the non-breaking space at the end pos
                    if ( bHasSpace )
                        rDoc.Insert( nPos, OUString(cNonBreakingSpace) );
                    io_bNbspRunNext = true;
                    nRet = nPos;
                }
                else if ( chars.indexOf( cPrevChar ) != -1 )
                    io_bNbspRunNext = true;
            }
        }
        else if ( cChar == '/' && nEndPos > 1 && static_cast<sal_Int32>(rTxt.size()) > (nEndPos - 1) )
        {
            // Remove the hardspace right before to avoid formatting URLs
            sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
            sal_Unicode cMaybeSpaceChar = rTxt[ nEndPos - 2 ];
            if ( cPrevChar == ':' && cMaybeSpaceChar == cNonBreakingSpace )
            {
                rDoc.Delete( nEndPos - 2, nEndPos - 1 );
                nRet = nEndPos - 1;
            }
        }
    }

    return nRet;
}

// URL recognition
bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
                                    sal_Int32 nSttPos, sal_Int32 nEndPos,
                                    LanguageType eLang )
{
    OUString sURL( URIHelper::FindFirstURLInText( rTxt, nSttPos, nEndPos,
                                                GetCharClass( eLang ) ));
    bool bRet = !sURL.isEmpty();
    if( bRet )          // so, set attribute:
        rDoc.SetINetAttr( nSttPos, nEndPos, sURL );
    return bRet;
}

// DOI citation recognition
bool SvxAutoCorrect::FnSetDOIAttr( SvxAutoCorrDoc& rDoc, std::u16string_view rTxt,
                                    sal_Int32 nSttPos, sal_Int32 nEndPos,
                                    LanguageType eLang )
{
    OUString sURL( URIHelper::FindFirstDOIInText( rTxt, nSttPos, nEndPos, GetCharClass( eLang ) ));
    bool bRet = !sURL.isEmpty();
    if( bRet )          // so, set attribute:
        rDoc.SetINetAttr( nSttPos, nEndPos, sURL );
    return bRet;
}

// Automatic *bold*, /italic/, -strikeout- and _underline_
bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
                                        sal_Int32 nEndPos )
{
    // Condition:
    //  at the beginning:   _, *, / or ~ after Space with the following !Space
    //  at the end:         _, *, / or ~ before Space (word delimiter?)

    sal_Unicode cInsChar = rTxt[ nEndPos ];  // underline, bold, italic or strikeout
    if( ++nEndPos != rTxt.getLength() &&
        !IsWordDelim( rTxt[ nEndPos ] ) )
        return false;

    --nEndPos;

    bool bAlphaNum = false;
    sal_Int32 nPos = nEndPos;
    sal_Int32  nFndPos = -1;
    CharClass& rCC = GetCharClass( LANGUAGE_SYSTEM );

    while( nPos )
    {
        switch( sal_Unicode c = rTxt[ --nPos ] )
        {
        case '_':
        case '-':
        case '/':
        case '*':
            if( c == cInsChar )
            {
                if( bAlphaNum && nPos+1 < nEndPos && ( !nPos ||
                    IsWordDelim( rTxt[ nPos-1 ])) &&
                    !IsWordDelim( rTxt[ nPos+1 ]))
                        nFndPos = nPos;
                else
                    // Condition is not satisfied, so cancel
                    nFndPos = -1;
                nPos = 0;
            }
            break;
        default:
            if( !bAlphaNum )
                bAlphaNum = rCC.isLetterNumeric( rTxt, nPos );
        }
    }

    if( -1 != nFndPos )
    {
        // first delete the Character at the end - this allows insertion
        // of an empty hint in SetAttr which would be removed by Delete
        // (fdo#62536, AUTOFMT in Writer)
        rDoc.Delete( nEndPos, nEndPos + 1 );

        // Span the Attribute over the area
        // the end.
        if( '*' == cInsChar )           // Bold
        {
            SvxWeightItem aSvxWeightItem( WEIGHT_BOLD, SID_ATTR_CHAR_WEIGHT );
            rDoc.SetAttr( nFndPos + 1, nEndPos,
                          SID_ATTR_CHAR_WEIGHT,
                          aSvxWeightItem);
        }
        else if( '/' == cInsChar )           // Italic
        {
            SvxPostureItem aSvxPostureItem( ITALIC_NORMAL, SID_ATTR_CHAR_POSTURE );
            rDoc.SetAttr( nFndPos + 1, nEndPos,
                          SID_ATTR_CHAR_POSTURE,
                          aSvxPostureItem);
        }
        else if( '-' == cInsChar )           // Strikeout
        {
            SvxCrossedOutItem aSvxCrossedOutItem( STRIKEOUT_SINGLE, SID_ATTR_CHAR_STRIKEOUT );
            rDoc.SetAttr( nFndPos + 1, nEndPos,
                          SID_ATTR_CHAR_STRIKEOUT,
                          aSvxCrossedOutItem);
        }
        else                            // Underline
        {
            SvxUnderlineItem aSvxUnderlineItem( LINESTYLE_SINGLE, SID_ATTR_CHAR_UNDERLINE );
            rDoc.SetAttr( nFndPos + 1, nEndPos,
                          SID_ATTR_CHAR_UNDERLINE,
                          aSvxUnderlineItem);
        }
        rDoc.Delete( nFndPos, nFndPos + 1 );
    }

    return -1 != nFndPos;
}

// Capitalize first letter of every sentence
void SvxAutoCorrect::FnCapitalStartSentence( SvxAutoCorrDoc& rDoc,
                                    const OUString& rTxt, bool bNormalPos,
                                    sal_Int32 nSttPos, sal_Int32 nEndPos,
                                    LanguageType eLang )
{

    if( rTxt.isEmpty() || nEndPos <= nSttPos )
        return;

    CharClass& rCC = GetCharClass( eLang );
    OUString aText( rTxt );
    const sal_Unicode *pStart = aText.getStr(),
                      *pStr = pStart + nEndPos,
                      *pWordStt = nullptr,
                      *pDelim = nullptr;

    bool bAtStart = false;
    do {
        --pStr;
        if (rCC.isLetter(aText, pStr - pStart))
        {
            if( !pWordStt )
                pDelim = pStr+1;
            pWordStt = pStr;
        }
        else if (pWordStt && !rCC.isDigit(aText, pStr - pStart))
        {
            if( (lcl_IsInArr( u"-'", *pStr ) || *pStr == cApostrophe) && // These characters are allowed in words
                pWordStt - 1 == pStr &&
                // Installation at beginning of paragraph. Replaced < by <= (#i38971#)
                (pStart + 1) <= pStr &&
                rCC.isLetter(aText, pStr-1 - pStart))
                pWordStt = --pStr;
            else
                break;
        }
        bAtStart = (pStart == pStr);
    } while( !bAtStart );

    if (!pWordStt)
        return;    // no character to be replaced

    if (rCC.isDigit(aText, pStr - pStart))
        return; // already ok

    if (IsUpperLetter(rCC.getCharacterType(aText, pWordStt - pStart)))
        return; // already ok

    //See if the text is the start of a protocol string, e.g. have text of
    //"http" see if it is the start of "http:" and if so leave it alone
    sal_Int32 nIndex = pWordStt - pStart;
    sal_Int32 nProtocolLen = pDelim - pWordStt + 1;
    if (nIndex + nProtocolLen <= rTxt.getLength())
    {
        if (INetURLObject::CompareProtocolScheme(rTxt.subView(nIndex, nProtocolLen)) != INetProtocol::NotValid)
            return; // already ok
    }

    if (0x1 == *pWordStt || 0x2 == *pWordStt)
        return; // already ok

    // Only capitalize, if string before specified characters is long enough
    if( *pDelim && 2 >= pDelim - pWordStt &&
        lcl_IsInArr( u".-)>", *pDelim ) )
        return;

    // tdf#59666 don't capitalize single Greek letters (except in Greek texts)
    if ( 1 == pDelim - pWordStt && 0x03B1 <= *pWordStt && *pWordStt <= 0x03C9 && eLang != LANGUAGE_GREEK )
        return;

    if( !bAtStart ) // Still no beginning of a paragraph?
    {
        if (NonFieldWordDelim(*pStr))
        {
            for (;;)
            {
                bAtStart = (pStart == pStr--);
                if (bAtStart || !NonFieldWordDelim(*pStr))
                    break;
            }
        }
        // Asian full stop, full width full stop, full width exclamation mark
        // and full width question marks are treated as word delimiters
        else if ( 0x3002 != *pStr && 0xFF0E != *pStr && 0xFF01 != *pStr &&
                  0xFF1F != *pStr )
            return; // no valid separator -> no replacement
    }

    // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
    if (FindInWordStartExceptList(eLang, OUString(pWordStt, pDelim - pWordStt)))
        return;

    if( bAtStart )  // at the beginning of a paragraph?
    {
        // Check out the previous paragraph, if it exists.
        // If so, then check to paragraph separator at the end.
        OUString const*const pPrevPara = rDoc.GetPrevPara(bNormalPos);
        if (!pPrevPara)
        {
            // valid separator -> replace
            OUString sChar( *pWordStt );
            sChar = rCC.titlecase(sChar); //see fdo#56740
            if (sChar != OUStringChar(*pWordStt))
               rDoc.ReplaceRange( pWordStt - pStart, 1, sChar );
            return;
        }

        aText = *pPrevPara;
        bAtStart = false;
        pStart = aText.getStr();
        pStr = pStart + aText.getLength();

        do {            // overwrite all blanks
            --pStr;
            if (!NonFieldWordDelim(*pStr))
                break;
            bAtStart = (pStart == pStr);
        } while( !bAtStart );

        if( bAtStart )
            return;  // no valid separator -> no replacement
    }

    // Found [ \t]+[A-Z0-9]+ until here. Test now on the paragraph separator.
    // all three can happen, but not more than once!
    const sal_Unicode* pExceptStt = nullptr;
    bool bContinue = true;
    Flags nFlag = Flags::NONE;
    do
    {
        switch (*pStr)
        {
            // Western and Asian full stop
            case '.':
            case 0x3002:
            case 0xFF0E:
            {
                if (pStr >= pStart + 2 && *(pStr - 2) == '.')
                {
                    //e.g. text "f.o.o. word": Now currently considering
                    //capitalizing word but second last character of
                    //previous word is a .  So probably last word is an
                    //anagram that ends in . and not truly the end of a
                    //previous sentence, so don't autocapitalize this word
                    return;
                }
                if (nFlag & Flags::FullStop)
                    return; // no valid separator -> no replacement
                nFlag |= Flags::FullStop;
                pExceptStt = pStr;
            }
            break;
            case '!':
            case 0xFF01:
            {
                if (nFlag & Flags::ExclamationMark)
                    return; // no valid separator -> no replacement
                nFlag |= Flags::ExclamationMark;
            }
            break;
            case '?':
            case 0xFF1F:
            {
                if (nFlag & Flags::QuestionMark)
                    return; // no valid separator -> no replacement
                nFlag |= Flags::QuestionMark;
            }
            break;
            default:
                if (nFlag == Flags::NONE)
                    return; // no valid separator -> no replacement
                else
                    bContinue = false;
                break;
        }

        if (bContinue && pStr-- == pStart)
        {
            return; // no valid separator -> no replacement
        }
    } while (bContinue);
    if (Flags::FullStop != nFlag)
        pExceptStt = nullptr;

    // Only capitalize, if string is long enough
    if( 2 > ( pStr - pStart ) )
        return;

    if (!rCC.isLetterNumeric(aText, pStr-- - pStart))
    {
        bool bValid = false, bAlphaFnd = false;
        const sal_Unicode* pTmpStr = pStr;
        while( !bValid )
        {
            if( rCC.isDigit( aText, pTmpStr - pStart ) )
            {
                bValid = true;
                pStr = pTmpStr - 1;
            }
            else if( rCC.isLetter( aText, pTmpStr - pStart ) )
            {
                if( bAlphaFnd )
                {
                    bValid = true;
                    pStr = pTmpStr;
                }
                else
                    bAlphaFnd = true;
            }
            else if (bAlphaFnd || NonFieldWordDelim(*pTmpStr))
                break;

            if( pTmpStr == pStart )
                break;

            --pTmpStr;
        }

        if( !bValid )
            return;       // no valid separator -> no replacement
    }

    bool bNumericOnly = '0' <= *(pStr+1) && *(pStr+1) <= '9';

    // Search for the beginning of the word
    while (!NonFieldWordDelim(*pStr))
    {
        if( bNumericOnly && rCC.isLetter( aText, pStr - pStart ) )
            bNumericOnly = false;

        if( pStart == pStr )
            break;

        --pStr;
    }

    if( bNumericOnly )      // consists of only numbers, then not
        return;

    if (NonFieldWordDelim(*pStr))
        ++pStr;

    OUString sWord;

    // check on the basis of the exception list
    if( pExceptStt )
    {
        sWord = OUString(pStr, pExceptStt - pStr + 1);
        if( FindInCplSttExceptList(eLang, sWord) )
            return;

        // Delete all non alphanumeric. Test the characters at the
        // beginning/end of the word ( recognizes: "(min.", "/min.", and so on.)
        OUString sTmp( sWord );
        while( !sTmp.isEmpty() &&
                !rCC.isLetterNumeric( sTmp, 0 ) )
            sTmp = sTmp.copy(1);

        // Remove all non alphanumeric characters towards the end up until
        // the last one.
        sal_Int32 nLen = sTmp.getLength();
        while( nLen && !rCC.isLetterNumeric( sTmp, nLen-1 ) )
            --nLen;
        if( nLen + 1 < sTmp.getLength() )
            sTmp = sTmp.copy( 0, nLen + 1 );

        if( !sTmp.isEmpty() && sTmp.getLength() != sWord.getLength() &&
            FindInCplSttExceptList(eLang, sTmp))
            return;

        if(FindInCplSttExceptList(eLang, sWord, true))
            return;
    }

    // Ok, then replace
    sal_Unicode cSave = *pWordStt;
    nSttPos = pWordStt - rTxt.getStr();
    OUString sChar = rCC.titlecase(OUString(cSave)); //see fdo#56740
    bool bRet = sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar );

    // Perhaps someone wants to have the word
    if( bRet && ACFlags::SaveWordCplSttLst & nFlags )
        rDoc.SaveCpltSttWord( ACFlags::CapitalStartSentence, nSttPos, sWord, cSave );
}

// Correct accidental use of cAPS LOCK key
bool SvxAutoCorrect::FnCorrectCapsLock( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
                                        sal_Int32 nSttPos, sal_Int32 nEndPos,
                                        LanguageType eLang )
{
    if (nEndPos - nSttPos < 2)
        // string must be at least 2-character long.
        return false;

    CharClass& rCC = GetCharClass( eLang );

    // Check the first 2 letters.
    if ( !IsLowerLetter(rCC.getCharacterType(rTxt, nSttPos)) )
        return false;

    if ( !IsUpperLetter(rCC.getCharacterType(rTxt, nSttPos+1)) )
        return false;

    OUStringBuffer aConverted;
    aConverted.append( rCC.uppercase(OUString(rTxt[nSttPos])) );
    aConverted.append( rCC.lowercase(OUString(rTxt[nSttPos+1])) );

    // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
    if (FindInWordStartExceptList(eLang, rTxt.copy(nSttPos, nEndPos - nSttPos)))
        return false;

    for( sal_Int32 i = nSttPos+2; i < nEndPos; ++i )
    {
        if ( IsLowerLetter(rCC.getCharacterType(rTxt, i)) )
            // A lowercase letter disqualifies the whole text.
            return false;

        if ( IsUpperLetter(rCC.getCharacterType(rTxt, i)) )
            // Another uppercase letter.  Convert it.
            aConverted.append( rCC.lowercase(OUString(rTxt[i])) );
        else
            // This is not an alphabetic letter.  Leave it as-is.
            aConverted.append( rTxt[i] );
    }

    // Replace the word.
    rDoc.Delete(nSttPos, nEndPos);
    rDoc.Insert(nSttPos, aConverted.makeStringAndClear());

    return true;
}

sal_Unicode SvxAutoCorrect::GetQuote( sal_Unicode cInsChar, bool bSttQuote,
                                        LanguageType eLang ) const
{
    sal_Unicode cRet = bSttQuote ? ( '\"' == cInsChar
                                    ? GetStartDoubleQuote()
                                    : GetStartSingleQuote() )
                                   : ( '\"' == cInsChar
                                    ? GetEndDoubleQuote()
                                    : GetEndSingleQuote() );
    if( !cRet )
    {
        // then through the Language find the right character
        if( LANGUAGE_NONE == eLang )
            cRet = cInsChar;
        else
        {
            LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
            OUString sRet( bSttQuote
                            ? ( '\"' == cInsChar
                                ? rLcl.getDoubleQuotationMarkStart()
                                : rLcl.getQuotationMarkStart() )
                            : ( '\"' == cInsChar
                                ? rLcl.getDoubleQuotationMarkEnd()
                                : rLcl.getQuotationMarkEnd() ));
            cRet = !sRet.isEmpty() ? sRet[0] : cInsChar;
        }
    }
    return cRet;
}

void SvxAutoCorrect::InsertQuote( SvxAutoCorrDoc& rDoc, sal_Int32 nInsPos,
                                    sal_Unicode cInsChar, bool bSttQuote,
                                    bool bIns, LanguageType eLang, ACQuotes eType ) const
{
    sal_Unicode cRet;

    if ( eType == ACQuotes::DoubleAngleQuote )
    {
        bool bSwiss = eLang == LANGUAGE_FRENCH_SWISS;
        // pressing " inside a quotation -> use second level angle quotes
        bool bLeftQuote = '\"' == cInsChar &&
                // start position and Romanian OR
                // not start position and Hungarian
                bSttQuote == (eLang != LANGUAGE_HUNGARIAN);
        cRet = ( '<' == cInsChar || bLeftQuote )
                ? ( bSwiss ? cLeftSingleAngleQuote : cLeftDoubleAngleQuote )
                : ( bSwiss ? cRightSingleAngleQuote : cRightDoubleAngleQuote );
    }
    else if ( eType == ACQuotes::UseApostrophe )
        cRet = cApostrophe;
    else
        cRet = GetQuote( cInsChar, bSttQuote, eLang );

    OUString sChg( cInsChar );
    if( bIns )
        rDoc.Insert( nInsPos, sChg );
    else
        rDoc.Replace( nInsPos, sChg );

    sChg = OUString(cRet);

    if( eType == ACQuotes::NonBreakingSpace )
    {
        if( rDoc.Insert( bSttQuote ? nInsPos+1 : nInsPos, OUStringChar(cNonBreakingSpace) ))
        {
            if( !bSttQuote )
                ++nInsPos;
        }
    }
    else if( eType == ACQuotes::DoubleAngleQuote && cInsChar != '\"' )
    {
        rDoc.Delete( nInsPos-1, nInsPos);
        --nInsPos;
    }

    rDoc.Replace( nInsPos, sChg );

    // i' -> I' in English (last step for the Undo)
    if( eType == ACQuotes::CapitalizeIAm )
        rDoc.Replace( nInsPos-1, u"I"_ustr );
}

OUString SvxAutoCorrect::GetQuote( SvxAutoCorrDoc const & rDoc, sal_Int32 nInsPos,
                                sal_Unicode cInsChar, bool bSttQuote )
{
    const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
    sal_Unicode cRet = GetQuote( cInsChar, bSttQuote, eLang );

    OUString sRet(cRet);

    if( '\"' == cInsChar )
    {
        if (primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS)
        {
            if( bSttQuote )
                sRet += " ";
            else
                sRet = " " + sRet;
        }
    }
    return sRet;
}

// search preceding opening quote in the paragraph before the insert position
static bool lcl_HasPrecedingChar( std::u16string_view rTxt, sal_Int32 nPos,
                const sal_Unicode sPrecedingChar, const sal_Unicode sStopChar, const sal_Unicode* aStopChars )
{
    sal_Unicode cTmpChar;

    do {
        cTmpChar = rTxt[ --nPos ];
        if ( cTmpChar == sPrecedingChar )
            return true;

        if ( cTmpChar == sStopChar )
            return false;

        for ( const sal_Unicode* pCh = aStopChars; *pCh; ++pCh )
            if ( cTmpChar == *pCh )
                return false;

    } while ( nPos > 0 );

    return false;
}

// WARNING: rText may become invalid, see comment below
void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
                                    sal_Int32 nInsPos, sal_Unicode cChar,
                                    bool bInsert, bool& io_bNbspRunNext, vcl::Window const * pFrameWin )
{
    bool bIsNextRun = io_bNbspRunNext;
    io_bNbspRunNext = false;  // if it was set, then it has to be turned off

    do{                                 // only for middle check loop !!
        if( cChar )
        {
            // Prevent double space
            if( nInsPos && ' ' == cChar &&
                IsAutoCorrFlag( ACFlags::IgnoreDoubleSpace ) &&
                ' ' == rTxt[ nInsPos - 1 ])
            {
                break;
            }

            bool bSingle = '\'' == cChar;
            bool bIsReplaceQuote =
                        (IsAutoCorrFlag( ACFlags::ChgQuotes ) && ('\"' == cChar )) ||
                        (IsAutoCorrFlag( ACFlags::ChgSglQuotes ) && bSingle );
            if( bIsReplaceQuote )
            {
                bool bSttQuote = !nInsPos;
                ACQuotes eType = ACQuotes::NONE;
                const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
                if (!bSttQuote)
                {
                    sal_Unicode cPrev = rTxt[ nInsPos-1 ];
                    bSttQuote = NonFieldWordDelim(cPrev) ||
                        lcl_IsInArr( u"([{", cPrev ) ||
                        ( cEmDash == cPrev ) ||
                        ( cEnDash == cPrev );
                    // tdf#38394 use opening quotation mark << in French l'<<word>>
                    if ( !bSingle && !bSttQuote && cPrev == cApostrophe &&
                        primary(eLang) == primary(LANGUAGE_FRENCH) &&
                        ( ( ( nInsPos == 2 || ( nInsPos > 2 && IsWordDelim( rTxt[ nInsPos-3 ] ) ) ) &&
                               // abbreviated form of ce, de, je, la, le, ne, me, te, se or si
                               u"cdjlnmtsCDJLNMTS"_ustr.indexOf( rTxt[ nInsPos-2 ] ) > -1 ) ||
                          ( ( nInsPos == 3 || (nInsPos > 3 && IsWordDelim( rTxt[ nInsPos-4 ] ) ) ) &&
                               // abbreviated form of que
                               ( rTxt[ nInsPos-2 ] == 'u' || rTxt[ nInsPos-2 ] == 'U' ) &&
                               ( rTxt[ nInsPos-3 ] == 'q' || rTxt[ nInsPos-3 ] == 'Q' ) ) ) )
                    {
                        bSttQuote = true;
                    }
                    // tdf#108423 for capitalization of English i'm
                    else if ( bSingle && ( cPrev == 'i' ) &&
                        primary(eLang) == primary(LANGUAGE_ENGLISH) &&
                        ( nInsPos == 1 || IsWordDelim( rTxt[ nInsPos-2 ] ) ) )
                    {
                        eType = ACQuotes::CapitalizeIAm;
                    }
                    // tdf#133524 support >>Hungarian<< and <<Romanian>> secondary level quotations
                    else if ( !bSingle && nInsPos &&
                        ( ( eLang == LANGUAGE_HUNGARIAN &&
                            lcl_HasPrecedingChar( rTxt, nInsPos,
                                bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEnd[0],
                                bSttQuote ? aStopDoubleAngleQuoteStart[1] : aStopDoubleAngleQuoteEnd[1],
                                bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEnd + 2 ) ) ||
                          ( eLang.anyOf(
                                LANGUAGE_ROMANIAN,
                                LANGUAGE_ROMANIAN_MOLDOVA ) &&
                            lcl_HasPrecedingChar( rTxt, nInsPos,
                                bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEndRo[0],
                                bSttQuote ? aStopDoubleAngleQuoteStart[1] : aStopDoubleAngleQuoteEndRo[1],
                                bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEndRo + 2 ) ) ) )
                    {
                        LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
                        // only if the opening double quotation mark is the default one
                        if ( rLcl.getDoubleQuotationMarkStart() == OUStringChar(aStopDoubleAngleQuoteStart[0]) )
                            eType = ACQuotes::DoubleAngleQuote;
                    }
                    else if ( bSingle && nInsPos && !bSttQuote &&
                        // tdf#128860 use apostrophe outside of second level quotation in Czech, German, Icelandic,
                        // Slovak and Slovenian instead of the – in this case, bad – closing quotation mark U+2018.
                        // tdf#123786 the same for Russian and Ukrainian
                        ( eLang.anyOf (
                                 LANGUAGE_CZECH,
                                 LANGUAGE_GERMAN,
                                 LANGUAGE_GERMAN_SWISS,
                                 LANGUAGE_GERMAN_AUSTRIAN,
                                 LANGUAGE_GERMAN_LUXEMBOURG,
                                 LANGUAGE_GERMAN_LIECHTENSTEIN,
                                 LANGUAGE_ICELANDIC,
                                 LANGUAGE_SLOVAK,
                                 LANGUAGE_SLOVENIAN ) ) )
                    {
                        sal_Unicode sStartChar = GetStartSingleQuote();
                        sal_Unicode sEndChar = GetEndSingleQuote();
                        if ( !sStartChar || !sEndChar ) {
                            LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
                            if ( !sStartChar ) sStartChar = rLcl.getQuotationMarkStart()[0];
                            if ( !sEndChar ) sEndChar = rLcl.getQuotationMarkStart()[0];
                        }
                        if ( !lcl_HasPrecedingChar( rTxt, nInsPos, sStartChar, sEndChar, aStopSingleQuoteEnd + 1 ) )
                        {
                            CharClass& rCC = GetCharClass( eLang );
                            if ( rCC.isLetter(rTxt, nInsPos-1) )
                            {
                                eType = ACQuotes::UseApostrophe;
                            }
                        }
                    }
                    else if ( bSingle && nInsPos && !bSttQuote &&
                          ( eLang.anyOf (
                                 LANGUAGE_RUSSIAN,
                                 LANGUAGE_UKRAINIAN ) &&
                            !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEndRuUa[0], aStopSingleQuoteEndRuUa[1],  aStopSingleQuoteEndRuUa + 2 ) ) )
                    {
                        LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
                        CharClass& rCC = GetCharClass( eLang );
                        if ( rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEndRuUa[0]) &&
                             // use apostrophe only after letters, not after digits or punctuation
                             rCC.isLetter(rTxt, nInsPos-1) )
                        {
                            eType = ACQuotes::UseApostrophe;
                        }
                    }
                }

                if ( eType == ACQuotes::NONE && !bSingle &&
                    ( primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS ) )
                    eType = ACQuotes::NonBreakingSpace;

                InsertQuote( rDoc, nInsPos, cChar, bSttQuote, bInsert, eLang, eType );
                break;
            }
            // tdf#133524 change "<<" and ">>" to double angle quotation marks
            else if ( IsAutoCorrFlag( ACFlags::ChgQuotes ) &&
                IsAutoCorrFlag( ACFlags::ChgAngleQuotes ) &&
                ('<' == cChar || '>' == cChar) &&
                nInsPos > 0 && cChar == rTxt[ nInsPos-1 ] )
            {
                const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
                if ( eLang.anyOf(
                        LANGUAGE_CATALAN,              // primary level
                        LANGUAGE_CATALAN_VALENCIAN,    // primary level
                        LANGUAGE_FINNISH,              // alternative primary level
                        LANGUAGE_FRENCH_SWISS,         // second level
                        LANGUAGE_GALICIAN,             // primary level
                        LANGUAGE_HUNGARIAN,            // second level
                        LANGUAGE_POLISH,               // second level
                        LANGUAGE_PORTUGUESE,           // primary level
                        LANGUAGE_PORTUGUESE_BRAZILIAN, // primary level
                        LANGUAGE_ROMANIAN,             // second level
                        LANGUAGE_ROMANIAN_MOLDOVA,     // second level
                        LANGUAGE_SWEDISH,              // alternative primary level
                        LANGUAGE_SWEDISH_FINLAND,      // alternative primary level
                        LANGUAGE_UKRAINIAN,            // primary level
                        LANGUAGE_USER_ARAGONESE,       // primary level
                        LANGUAGE_USER_ASTURIAN ) ||    // primary level
                    primary(eLang) == primary(LANGUAGE_GERMAN) ||  // alternative primary level
                    primary(eLang) == primary(LANGUAGE_SPANISH) )  // primary level
                {
                    InsertQuote( rDoc, nInsPos, cChar, false, bInsert, eLang, ACQuotes::DoubleAngleQuote );
                    break;
                }
            }

            if( bInsert )
                rDoc.Insert( nInsPos, OUString(cChar) );
            else
                rDoc.Replace( nInsPos, OUString(cChar) );

            // Hardspaces autocorrection
            if ( IsAutoCorrFlag( ACFlags::AddNonBrkSpace ) )
            {
                // WARNING ATTENTION: rTxt is an alias of the text node's OUString
                // and its length may change (even become shorter) if FnAddNonBrkSpace succeeds!
                sal_Int32 nUpdatedPos = -1;
                if (NeedsHardspaceAutocorr(cChar))
                    nUpdatedPos = FnAddNonBrkSpace( rDoc, rTxt, nInsPos, GetDocLanguage( rDoc, nInsPos ), io_bNbspRunNext );
                if (nUpdatedPos >= 0)
                {
                    nInsPos = nUpdatedPos;
                }
                else if ( bIsNextRun && !IsAutoCorrectChar( cChar ) )
                {
                    // Remove the NBSP if it wasn't an autocorrection
                    if ( nInsPos != 0 && NeedsHardspaceAutocorr( rTxt[ nInsPos - 1 ] ) &&
                            cChar != ' ' && cChar != '\t' && cChar != cNonBreakingSpace )
                    {
                        // Look for the last HARD_SPACE
                        sal_Int32 nPos = nInsPos - 1;
                        bool bContinue = true;
                        while ( bContinue )
                        {
                            const sal_Unicode cTmpChar = rTxt[ nPos ];
                            if ( cTmpChar == cNonBreakingSpace )
                            {
                                rDoc.Delete( nPos, nPos + 1 );
                                bContinue = false;
                            }
                            else if ( !NeedsHardspaceAutocorr( cTmpChar ) || nPos == 0 )
                                bContinue = false;
                            nPos--;
                        }
                    }
                }
            }
        }

        if( !nInsPos )
            break;

        sal_Int32 nPos = nInsPos - 1;

        if( IsWordDelim( rTxt[ nPos ]))
            break;

        // Set bold or underline automatically?
        if (('*' == cChar || '_' == cChar || '/' == cChar || '-' == cChar) && (nPos+1 < rTxt.getLength()))
        {
            if( IsAutoCorrFlag( ACFlags::ChgWeightUnderl ) )
            {
                FnChgWeightUnderl( rDoc, rTxt, nPos+1 );
            }
            break;
        }

        while( nPos && !IsWordDelim( rTxt[ --nPos ]))
            ;

        // Found a Paragraph-start or a Blank, search for the word shortcut in
        // auto.
        sal_Int32 nCapLttrPos = nPos+1;        // on the 1st Character
        if( !nPos && !IsWordDelim( rTxt[ 0 ]))
            --nCapLttrPos;          // begin of paragraph and no blank

        const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos );
        CharClass& rCC = GetCharClass( eLang );

        // no symbol characters
        if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nInsPos ))
            break;

        if( IsAutoCorrFlag( ACFlags::Autocorrect ) &&
            // tdf#134940 fix regression of arrow "-->" resulted by premature
            // replacement of "--" since '>' was added to IsAutoCorrectChar()
            '>' != cChar )
        {
            // WARNING ATTENTION: rTxt is an alias of the text node's OUString
            // and becomes INVALID if ChgAutoCorrWord returns true!
            // => use aPara/pPara to create a valid copy of the string!
            OUString aPara;
            OUString* pPara = IsAutoCorrFlag(ACFlags::CapitalStartSentence) ? &aPara : nullptr;

            bool bChgWord = rDoc.ChgAutoCorrWord( nCapLttrPos, nInsPos,
                                                    *this, pPara );
            if( !bChgWord )
            {
                sal_Int32 nCapLttrPos1 = nCapLttrPos, nInsPos1 = nInsPos;
                while( nCapLttrPos1 < nInsPos &&
                        lcl_IsInArr( sImplSttSkipChars, rTxt[ nCapLttrPos1 ] )
                        )
                        ++nCapLttrPos1;
                while( nCapLttrPos1 < nInsPos1 && nInsPos1 &&
                        lcl_IsInArr( sImplEndSkipChars, rTxt[ nInsPos1-1 ] )
                        )
                        --nInsPos1;

                if( (nCapLttrPos1 != nCapLttrPos || nInsPos1 != nInsPos ) &&
                    nCapLttrPos1 < nInsPos1 &&
                    rDoc.ChgAutoCorrWord( nCapLttrPos1, nInsPos1, *this, pPara ))
                {
                    bChgWord = true;
                    nCapLttrPos = nCapLttrPos1;
                }
            }

            if( bChgWord )
            {
                if( !aPara.isEmpty() )
                {
                    sal_Int32 nEnd = nCapLttrPos;
                    while( nEnd < aPara.getLength() &&
                            !IsWordDelim( aPara[ nEnd ]))
                        ++nEnd;

                    // Capital letter at beginning of paragraph?
                    if( IsAutoCorrFlag( ACFlags::CapitalStartSentence ) )
                    {
                        FnCapitalStartSentence( rDoc, aPara, false,
                                                nCapLttrPos, nEnd, eLang );
                    }

                    if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) )
                    {
                        FnChgToEnEmDash( rDoc, aPara, nCapLttrPos, nEnd, eLang );
                    }
                }
                break;
            }
        }

        if( IsAutoCorrFlag( ACFlags::TransliterateRTL ) && GetDocLanguage( rDoc, nInsPos ) == LANGUAGE_HUNGARIAN )
        {
            // WARNING ATTENTION: rTxt is an alias of the text node's OUString
            // and becomes INVALID if TransliterateRTLWord returns true!
            if ( rDoc.TransliterateRTLWord( nCapLttrPos, nInsPos ) )
                break;
        }

        if( ( IsAutoCorrFlag( ACFlags::ChgOrdinalNumber ) &&
                (nInsPos >= 2 ) &&       // fdo#69762 avoid autocorrect for 2e-3
                ( '-' != cChar || 'E' != rtl::toAsciiUpperCase(rTxt[nInsPos-1]) || '0' > rTxt[nInsPos-2] || '9' < rTxt[nInsPos-2] ) &&
                FnChgOrdinalNumber( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ||
            ( IsAutoCorrFlag( ACFlags::SetINetAttr ) &&
                ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
                FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ||
            ( IsAutoCorrFlag( ACFlags::SetDOIAttr ) &&
                ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
                FnSetDOIAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) )
            ;
        else
        {
            bool bLockKeyOn = pFrameWin && (pFrameWin->GetIndicatorState() & KeyIndicatorState::CAPSLOCK);
            bool bUnsupported = lcl_IsUnsupportedUnicodeChar( rCC, rTxt, nCapLttrPos, nInsPos );

            if ( bLockKeyOn && IsAutoCorrFlag( ACFlags::CorrectCapsLock ) &&
                 FnCorrectCapsLock( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) )
            {
                // Correct accidental use of cAPS LOCK key (do this only when
                // the caps or shift lock key is pressed). Turn off the caps
                // lock afterwards.
                pFrameWin->SimulateKeyPress( KEY_CAPSLOCK );
            }

            // Capital letter at beginning of paragraph ?
            if( !bUnsupported &&
                IsAutoCorrFlag( ACFlags::CapitalStartSentence ) )
            {
                FnCapitalStartSentence( rDoc, rTxt, true, nCapLttrPos, nInsPos, eLang );
            }

            // Two capital letters at beginning of word ??
            if( !bUnsupported &&
                IsAutoCorrFlag( ACFlags::CapitalStartWord ) )
            {
                FnCapitalStartWord( rDoc, rTxt, nCapLttrPos, nInsPos, eLang );
            }

            if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) )
            {
                FnChgToEnEmDash( rDoc, rTxt, nCapLttrPos, nInsPos, eLang );
            }
        }

    } while( false );
}

SvxAutoCorrectLanguageLists& SvxAutoCorrect::GetLanguageList_(
                                                        LanguageType eLang )
{
    LanguageTag aLanguageTag( eLang);
    if (m_aLangTable.find(aLanguageTag) == m_aLangTable.end())
        (void)CreateLanguageFile(aLanguageTag);
    const auto iter = m_aLangTable.find(aLanguageTag);
    assert(iter != m_aLangTable.end());
    return iter->second;
}

void SvxAutoCorrect::SaveCplSttExceptList( LanguageType eLang )
{
    auto const iter = m_aLangTable.find(LanguageTag(eLang));
    if (iter != m_aLangTable.end())
        iter->second.SaveCplSttExceptList();
    else
    {
        SAL_WARN("editeng", "Save an empty list? ");
    }
}

void SvxAutoCorrect::SaveWordStartExceptList(LanguageType eLang)
{
    auto const iter = m_aLangTable.find(LanguageTag(eLang));
    if (iter != m_aLangTable.end())
        iter->second.SaveWordStartExceptList();
    else
    {
        SAL_WARN("editeng", "Save an empty list? ");
    }
}

// Adds a single word. The list will immediately be written to the file!
bool SvxAutoCorrect::AddCplSttException( const OUString& rNew,
                                        LanguageType eLang )
{
    SvxAutoCorrectLanguageLists* pLists = nullptr;
    // either the right language is present or it will be this in the general list
    auto iter = m_aLangTable.find(LanguageTag(eLang));
    if (iter != m_aLangTable.end())
        pLists = &iter->second;
    else
    {
        LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
        iter = m_aLangTable.find(aLangTagUndetermined);
        if (iter != m_aLangTable.end())
            pLists = &iter->second;
        else if(CreateLanguageFile(aLangTagUndetermined))
        {
            iter = m_aLangTable.find(aLangTagUndetermined);
            assert(iter != m_aLangTable.end());
            pLists = &iter->second;
        }
    }
    OSL_ENSURE(pLists, "No auto correction data");
    return pLists && pLists->AddToCplSttExceptList(rNew);
}

// Adds a single word. The list will immediately be written to the file!
bool SvxAutoCorrect::AddWordStartException( const OUString& rNew,
                                         LanguageType eLang )
{
    SvxAutoCorrectLanguageLists* pLists = nullptr;
    //either the right language is present or it is set in the general list
    auto iter = m_aLangTable.find(LanguageTag(eLang));
    if (iter != m_aLangTable.end())
        pLists = &iter->second;
    else
    {
        LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
        iter = m_aLangTable.find(aLangTagUndetermined);
        if (iter != m_aLangTable.end())
            pLists = &iter->second;
        else if(CreateLanguageFile(aLangTagUndetermined))
        {
            iter = m_aLangTable.find(aLangTagUndetermined);
            assert(iter != m_aLangTable.end());
            pLists = &iter->second;
        }
    }
    OSL_ENSURE(pLists, "No auto correction file!");
    return pLists && pLists->AddToWordStartExceptList(rNew);
}

OUString SvxAutoCorrect::GetPrevAutoCorrWord(SvxAutoCorrDoc const& rDoc, const OUString& rTxt,
                                             sal_Int32 nPos)
{
    OUString sRet;
    if( !nPos )
        return sRet;

    sal_Int32 nEnd = nPos;

    // it must be followed by a blank or tab!
    if( ( nPos < rTxt.getLength() &&
        !IsWordDelim( rTxt[ nPos ])) ||
        IsWordDelim( rTxt[ --nPos ]))
        return sRet;

    while( nPos && !IsWordDelim( rTxt[ --nPos ]))
        ;

    // Found a Paragraph-start or a Blank, search for the word shortcut in
    // auto.
    sal_Int32 nCapLttrPos = nPos+1;        // on the 1st Character
    if( !nPos && !IsWordDelim( rTxt[ 0 ]))
        --nCapLttrPos;          // Beginning of paragraph and no Blank!

    while( lcl_IsInArr( sImplSttSkipChars, rTxt[ nCapLttrPos ]) )
        if( ++nCapLttrPos >= nEnd )
            return sRet;

    if( 3 > nEnd - nCapLttrPos )
        return sRet;

    const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos );

    CharClass& rCC = GetCharClass(eLang);

    if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nEnd ))
        return sRet;

    sRet = rTxt.copy( nCapLttrPos, nEnd - nCapLttrPos );
    return sRet;
}

// static
std::vector<OUString> SvxAutoCorrect::GetChunkForAutoText(std::u16string_view rTxt,
                                                          const sal_Int32 nPos)
{
    constexpr sal_Int32 nMinLen = 3;
    constexpr sal_Int32 nMaxLen = 9;
    std::vector<OUString> aRes;
    if (nPos >= nMinLen)
    {
        sal_Int32 nBegin = std::max<sal_Int32>(nPos - nMaxLen, 0);
        // TODO: better detect word boundaries (not only whitespaces, but also e.g. punctuation)
        if (nBegin > 0 && !IsWordDelim(rTxt[nBegin-1]))
        {
            while (nBegin + nMinLen <= nPos && !IsWordDelim(rTxt[nBegin]))
                ++nBegin;
        }
        if (nBegin + nMinLen <= nPos)
        {
            OUString sRes( rTxt.substr(nBegin, nPos - nBegin) );
            aRes.push_back(sRes);
            bool bLastStartedWithDelim = IsWordDelim(sRes[0]);
            for (sal_Int32 i = 1; i <= sRes.getLength() - nMinLen; ++i)
            {
                bool bAdd = bLastStartedWithDelim;
                bLastStartedWithDelim = IsWordDelim(sRes[i]);
                bAdd = bAdd || bLastStartedWithDelim;
                if (bAdd)
                    aRes.push_back(sRes.copy(i));
            }
        }
    }
    return aRes;
}

bool SvxAutoCorrect::CreateLanguageFile( const LanguageTag& rLanguageTag, bool bNewFile )
{
    OSL_ENSURE(m_aLangTable.find(rLanguageTag) == m_aLangTable.end(), "Language already exists ");

    OUString sUserDirFile( GetAutoCorrFileName( rLanguageTag, true ));
    OUString sShareDirFile( sUserDirFile );

    SvxAutoCorrectLanguageLists* pLists = nullptr;

    tools::Time nAktTime(tools::Time::SYSTEM);

    auto nFndPos = aLastFileTable.find(rLanguageTag);
    bool lastCheckLessThan2MinutesAgo = nFndPos != aLastFileTable.end();
    if (lastCheckLessThan2MinutesAgo)
    {
        const tools::Time nLastCheckTime(tools::Time::fromEncodedTime(nFndPos->second));
        lastCheckLessThan2MinutesAgo
            = nLastCheckTime < nAktTime && nAktTime - nLastCheckTime < tools::Time(0, 2);
    }
    if (lastCheckLessThan2MinutesAgo)
    {
        // no need to test the file, because the last check is not older then
        // 2 minutes.
        if( bNewFile )
        {
            sShareDirFile = sUserDirFile;
            auto itBool = m_aLangTable.emplace(std::piecewise_construct,
                            std::forward_as_tuple(rLanguageTag),
                            std::forward_as_tuple(*this, sShareDirFile, sUserDirFile));
            pLists = &itBool.first->second;
            aLastFileTable.erase(nFndPos);
        }
    }
    else if(
             ( FStatHelper::IsDocument( sUserDirFile ) ||
               FStatHelper::IsDocument( sShareDirFile =
                   GetAutoCorrFileName( rLanguageTag ) ) ||
               FStatHelper::IsDocument( sShareDirFile =
                   GetAutoCorrFileName( rLanguageTag, false, false, true) )
             ) ||
        ( sShareDirFile = sUserDirFile, bNewFile )
          )
    {
        auto itBool = m_aLangTable.emplace(std::piecewise_construct,
                        std::forward_as_tuple(rLanguageTag),
                        std::forward_as_tuple(*this, sShareDirFile, sUserDirFile));
        pLists = &itBool.first->second;
        if (nFndPos != aLastFileTable.end())
            aLastFileTable.erase(nFndPos);
    }
    else if( !bNewFile )
    {
        aLastFileTable[rLanguageTag] = nAktTime.GetTime();
    }
    return pLists != nullptr;
}

bool SvxAutoCorrect::PutText( const OUString& rShort, const OUString& rLong,
                                LanguageType eLang )
{
    LanguageTag aLanguageTag( eLang);
    if (auto const iter = m_aLangTable.find(aLanguageTag); iter != m_aLangTable.end())
        return iter->second.PutText(rShort, rLong);
    if (CreateLanguageFile(aLanguageTag))
    {
        auto const iter = m_aLangTable.find(aLanguageTag);
        assert (iter != m_aLangTable.end());
        return iter->second.PutText(rShort, rLong);
    }
    return false;
}

void SvxAutoCorrect::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries,
                                              std::vector<SvxAutocorrWord>& aDeleteEntries,
                                              LanguageType eLang )
{
    LanguageTag aLanguageTag( eLang);
    auto iter = m_aLangTable.find(aLanguageTag);
    if (iter != m_aLangTable.end())
    {
        iter->second.MakeCombinedChanges( aNewEntries, aDeleteEntries );
    }
    else if(CreateLanguageFile( aLanguageTag ))
    {
        iter = m_aLangTable.find(aLanguageTag);
        assert(iter != m_aLangTable.end());
        iter->second.MakeCombinedChanges( aNewEntries, aDeleteEntries );
    }
}

//  - return the replacement text (only for SWG-Format, all other
//    can be taken from the word list!)
bool SvxAutoCorrect::GetLongText( const OUString&, OUString& )
{
    return false;
}

void SvxAutoCorrect::refreshBlockList( const uno::Reference< embed::XStorage >& )
{
}

// Text with attribution (only the SWG - SWG format!)
bool SvxAutoCorrect::PutText( const css::uno::Reference < css::embed::XStorage >&,
                              const OUString&, const OUString&, SfxObjectShell&, OUString& )
{
    return false;
}

OUString EncryptBlockName_Imp(std::u16string_view rName)
{
    OUStringBuffer aName;
    aName.append('#').append(rName);
    for (size_t nLen = rName.size(), nPos = 1; nPos < nLen; ++nPos)
    {
        if (lcl_IsInArr( u"!/:.\\", aName[nPos]))
            aName[nPos] &= 0x0f;
    }
    return aName.makeStringAndClear();
}

/* This code is copied from SwXMLTextBlocks::GeneratePackageName */
static void GeneratePackageName ( std::u16string_view rShort, OUString& rPackageName )
{
    OString sByte(OUStringToOString(rShort, RTL_TEXTENCODING_UTF7));
    OUStringBuffer aBuf(OStringToOUString(sByte, RTL_TEXTENCODING_ASCII_US));

    for (sal_Int32 nPos = 0; nPos < aBuf.getLength(); ++nPos)
    {
        switch (aBuf[nPos])
        {
            case '!':
            case '/':
            case ':':
            case '.':
            case '\\':
            // tdf#156769 - escape the question mark in the storage name
            case '?':
                aBuf[nPos] = '_';
                break;
            default:
                break;
        }
    }

    rPackageName = aBuf.makeStringAndClear();
}

static std::optional<SvxAutocorrWordList::WordSearchStatus>
lcl_SearchWordsInList( SvxAutoCorrectLanguageLists* pList,
                       std::u16string_view rTxt,
                       sal_Int32& rStt, sal_Int32 nEndPos )
{
    const SvxAutocorrWordList* pAutoCorrWordList = pList->GetAutocorrWordList();
    return pAutoCorrWordList->SearchWordsInList( rTxt, rStt, nEndPos );
}

// the search for the words in the substitution table
std::optional<SvxAutocorrWordList::WordSearchStatus>
SvxAutoCorrect::SearchWordsInList(
    std::u16string_view rTxt, sal_Int32& rStt, sal_Int32 nEndPos,
    SvxAutoCorrDoc&, LanguageTag& rLang )
{
    LanguageTag aLanguageTag( rLang);
    if( aLanguageTag.isSystemLocale() )
        aLanguageTag.reset( MsLangId::getConfiguredSystemLanguage());

    /* TODO-BCP47: this is so ugly, should all maybe be a proper fallback
     * list instead? */

    // First search for eLang, then US-English -> English
    // and last in LANGUAGE_UNDETERMINED
    if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
    {
        //the language is available - so bring it on
        const auto iter = m_aLangTable.find(aLanguageTag);
        assert(iter != m_aLangTable.end());
        SvxAutoCorrectLanguageLists & rList = iter->second;
        auto pRet = lcl_SearchWordsInList( &rList, rTxt, rStt, nEndPos );
        if( pRet )
        {
            rLang = aLanguageTag;
            return pRet;
        }
    }
    else
    {
        // If the specific language's list wasn't found, keep on searching
        LanguageType eLang = aLanguageTag.getLanguageType();
        // the primary language for example EN
        aLanguageTag.reset(aLanguageTag.getLanguage());
        LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
        if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED
            && (m_aLangTable.find(aLanguageTag) != m_aLangTable.end()
                || CreateLanguageFile(aLanguageTag, false)))
        {
            //the language is available - so bring it on
            const auto iter = m_aLangTable.find(aLanguageTag);
            assert(iter != m_aLangTable.end());
            SvxAutoCorrectLanguageLists& rList = iter->second;
            auto pRet = lcl_SearchWordsInList(&rList, rTxt, rStt, nEndPos);
            if (pRet)
            {
                rLang = aLanguageTag;
                return pRet;
            }
        }
    }

    // This is the [All] entry in AutoCorrect Options' replacement language list, used with all
    // languages, so don't skip when language-specific list didn't contain the text:
    if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
            CreateLanguageFile(aLanguageTag, false))
    {
        //the language is available - so bring it on
        const auto iter = m_aLangTable.find(aLanguageTag);
        assert(iter != m_aLangTable.end());
        SvxAutoCorrectLanguageLists& rList = iter->second;
        auto pRet = lcl_SearchWordsInList( &rList, rTxt, rStt, nEndPos );
        if( pRet )
        {
            rLang = std::move(aLanguageTag);
            return pRet;
        }
    }
    return std::nullopt;
}

bool SvxAutoCorrect::SearchWordsNext(
    std::u16string_view rTxt, sal_Int32& rStt, sal_Int32 nEndPos,
    SvxAutocorrWordList::WordSearchStatus& rStatus )
{
    const SvxAutocorrWordList* pWordList = rStatus.GetAutocorrWordList();
    return pWordList->SearchWordsNext( rTxt, rStt, nEndPos, rStatus );
}

bool SvxAutoCorrect::FindInWordStartExceptList( LanguageType eLang,
                                             const OUString& sWord )
{
    LanguageTag aLanguageTag( eLang);

    /* TODO-BCP47: again horrible ugliness */

    // First search for eLang, then primary language of eLang
    // and last in LANGUAGE_UNDETERMINED

    if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
    {
        //the language is available - so bring it on
        const auto iter = m_aLangTable.find(aLanguageTag);
        assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
        auto& rList = iter->second;
        if(rList.GetWordStartExceptList()->find(sWord) != rList.GetWordStartExceptList()->end() )
            return true;
    }

    // If it still could not be found here, then keep on searching
    // the primary language for example EN
    aLanguageTag.reset(aLanguageTag.getLanguage());
    LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
    if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
                (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
                 CreateLanguageFile(aLanguageTag, false)))
    {
        //the language is available - so bring it on
        const auto iter = m_aLangTable.find(aLanguageTag);
        assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
        auto& rList = iter->second;
        if(rList.GetWordStartExceptList()->find(sWord) != rList.GetWordStartExceptList()->end() )
            return true;
    }

    if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
            CreateLanguageFile(aLanguageTag, false))
    {
        //the language is available - so bring it on
        const auto iter = m_aLangTable.find(aLanguageTag);
        assert(iter != m_aLangTable.end());
        auto& rList = iter->second;
        if(rList.GetWordStartExceptList()->find(sWord) != rList.GetWordStartExceptList()->end() )
            return true;
    }
    return false;
}

static bool lcl_FindAbbreviation(const SvStringsISortDtor* pList, const OUString& sWord)
{
    SvStringsISortDtor::const_iterator it = pList->find(u"~"_ustr);
    SvStringsISortDtor::size_type nPos = it - pList->begin();
    if( nPos < pList->size() )
    {
        OUString sLowerWord(sWord.toAsciiLowerCase());
        OUString sAbr;
        for( SvStringsISortDtor::size_type n = nPos; n < pList->size(); ++n )
        {
            sAbr = (*pList)[ n ];
            if (sAbr[0] != '~')
                break;
            // ~ and ~. are not allowed!
            if( 2 < sAbr.getLength() && sAbr.getLength() - 1 <= sWord.getLength() )
            {
                OUString sLowerAbk(sAbr.toAsciiLowerCase());
                for (sal_Int32 i = sLowerAbk.getLength(), ii = sLowerWord.getLength(); i;)
                {
                    if( !--i )      // agrees
                        return true;

                    if( sLowerAbk[i] != sLowerWord[--ii])
                        break;
                }
            }
        }
    }
    OSL_ENSURE( !(nPos && '~' == (*pList)[ --nPos ][ 0 ] ),
            "Wrongly sorted exception list?" );
    return false;
}

bool SvxAutoCorrect::FindInCplSttExceptList(LanguageType eLang,
                                const OUString& sWord, bool bAbbreviation)
{
    LanguageTag aLanguageTag( eLang);

    /* TODO-BCP47: did I mention terrible horrible ugliness? */

    // First search for eLang, then primary language of eLang
    // and last in LANGUAGE_UNDETERMINED

    if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
    {
        //the language is available - so bring it on
        const auto iter = m_aLangTable.find(aLanguageTag);
        assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
        const SvStringsISortDtor* pList = iter->second.GetCplSttExceptList();
        if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
            return true;
    }

    // If it still could not be found here, then keep on searching
    // the primary language for example EN
    aLanguageTag.reset(aLanguageTag.getLanguage());
    LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
    if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
                (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
                 CreateLanguageFile(aLanguageTag, false)))
    {
        //the language is available - so bring it on
        const auto iter = m_aLangTable.find(aLanguageTag);
        assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
        const SvStringsISortDtor* pList = iter->second.GetCplSttExceptList();
        if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
            return true;
    }

    if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
            CreateLanguageFile(aLanguageTag, false))
    {
        //the language is available - so bring it on
        const auto iter = m_aLangTable.find(aLanguageTag);
        assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
        const SvStringsISortDtor* pList = iter->second.GetCplSttExceptList();
        if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
            return true;
    }
    return false;
}

OUString SvxAutoCorrect::GetAutoCorrFileName( const LanguageTag& rLanguageTag,
                                            bool bNewFile, bool bTst, bool bUnlocalized ) const
{
    OUString sRet, sExt( rLanguageTag.getBcp47() );
    if (bUnlocalized)
    {
        // we don't want variant, so we'll take "fr" instead of "fr-CA" for example
        std::vector< OUString > vecFallBackStrings = rLanguageTag.getFallbackStrings(false);
        if (!vecFallBackStrings.empty())
           sExt = vecFallBackStrings[0];
    }

    sExt = "_" + sExt + ".dat";
    if( bNewFile )
        sRet = sUserAutoCorrFile + sExt;
    else if( !bTst )
        sRet = sShareAutoCorrFile + sExt;
    else
    {
        // test first in the user directory - if not exist, then
        sRet = sUserAutoCorrFile + sExt;
        if( !FStatHelper::IsDocument( sRet ))
            sRet = sShareAutoCorrFile + sExt;
    }
    return sRet;
}

SvxAutoCorrectLanguageLists::SvxAutoCorrectLanguageLists(
                SvxAutoCorrect& rParent,
                OUString aShareAutoCorrectFile,
                OUString aUserAutoCorrectFile)
:   sShareAutoCorrFile(std::move( aShareAutoCorrectFile )),
    sUserAutoCorrFile(std::move( aUserAutoCorrectFile )),
    aModifiedDate( Date::EMPTY ),
    aModifiedTime( tools::Time::EMPTY ),
    aLastCheckTime( tools::Time::EMPTY ),
    rAutoCorrect(rParent),
    nFlags(ACFlags::NONE)
{
}

SvxAutoCorrectLanguageLists::~SvxAutoCorrectLanguageLists()
{
}

bool SvxAutoCorrectLanguageLists::IsFileChanged_Imp()
{
    // Access the file system only every 2 minutes to check the date stamp
    bool bRet = false;

    tools::Time nMinTime( 0, 2 );
    tools::Time nAktTime( tools::Time::SYSTEM );
    if( aLastCheckTime <= nAktTime) // overflow?
        return false;
    nAktTime -= aLastCheckTime;
    if( nAktTime > nMinTime )     // min time past
    {
        Date aTstDate( Date::EMPTY ); tools::Time aTstTime( tools::Time::EMPTY );
        if( FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
                                            &aTstDate, &aTstTime ) &&
            ( aModifiedDate != aTstDate || aModifiedTime != aTstTime ))
        {
            bRet = true;
            // then remove all the lists fast!
            if( (ACFlags::CplSttLstLoad & nFlags) && pCplStt_ExcptLst )
            {
                pCplStt_ExcptLst.reset();
            }
            if( (ACFlags::WordStartLstLoad & nFlags) && pWordStart_ExcptLst )
            {
                pWordStart_ExcptLst.reset();
            }
            if( (ACFlags::ChgWordLstLoad & nFlags) && pAutocorr_List )
            {
                pAutocorr_List.reset();
            }
            nFlags &= ~ACFlags(ACFlags::CplSttLstLoad | ACFlags::WordStartLstLoad | ACFlags::ChgWordLstLoad );
        }
        aLastCheckTime = tools::Time( tools::Time::SYSTEM );
    }
    return bRet;
}

void SvxAutoCorrectLanguageLists::LoadXMLExceptList_Imp(
                                        std::unique_ptr<SvStringsISortDtor>& rpLst,
                                        const OUString& sStrmName,
                                        rtl::Reference<SotStorage>& rStg)
{
    if( rpLst )
        rpLst->clear();
    else
        rpLst.reset( new SvStringsISortDtor );

    {
        if( rStg.is() && rStg->IsStream( sStrmName ) )
        {
            rtl::Reference<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
                ( StreamMode::READ | StreamMode::SHARE_DENYWRITE | StreamMode::NOCREATE ) );
            if( ERRCODE_NONE != xStrm->GetError())
            {
                xStrm.clear();
                rStg.clear();
                RemoveStream_Imp( sStrmName );
            }
            else
            {
                const uno::Reference< uno::XComponentContext >& xContext =
                    comphelper::getProcessComponentContext();

                xml::sax::InputSource aParserInput;
                aParserInput.sSystemId = sStrmName;

                xStrm->Seek( 0 );
                xStrm->SetBufferSize( 8 * 1024 );
                aParserInput.aInputStream = new utl::OInputStreamWrapper( *xStrm );

                // get filter
                uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLExceptionListImport ( xContext, *rpLst );

                // connect parser and filter
                uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create( xContext );
                uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler;
                xParser->setFastDocumentHandler( xFilter );
                xParser->registerNamespace( u"http://openoffice.org/2001/block-list"_ustr, SvXMLAutoCorrectToken::NAMESPACE );
                xParser->setTokenHandler( xTokenHandler );

                // parse
                try
                {
                    xParser->parseStream( aParserInput );
                }
                catch( const xml::sax::SAXParseException& )
                {
                    // re throw ?
                }
                catch( const xml::sax::SAXException& )
                {
                    // re throw ?
                }
                catch( const io::IOException& )
                {
                    // re throw ?
                }
            }
        }

        // Set time stamp
        FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
                                        &aModifiedDate, &aModifiedTime );
        aLastCheckTime = tools::Time( tools::Time::SYSTEM );
    }

}

void SvxAutoCorrectLanguageLists::SaveExceptList_Imp(
                            const SvStringsISortDtor& rLst,
                            const OUString& sStrmName,
                            rtl::Reference<SotStorage> const &rStg,
                            bool bConvert )
{
    if( !rStg.is() )
        return;

    if( rLst.empty() )
    {
        rStg->Remove( sStrmName );
        rStg->Commit();
    }
    else
    {
        rtl::Reference<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
                ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
        if( xStrm.is() )
        {
            xStrm->SetSize( 0 );
            xStrm->SetBufferSize( 8192 );
            xStrm->SetProperty( u"MediaType"_ustr, Any(u"text/xml"_ustr) );

            const uno::Reference< uno::XComponentContext >& xContext =
                comphelper::getProcessComponentContext();

            uno::Reference < xml::sax::XWriter > xWriter  = xml::sax::Writer::create(xContext);
            uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *xStrm );
            xWriter->setOutputStream(xOut);

            uno::Reference < xml::sax::XDocumentHandler > xHandler(xWriter, UNO_QUERY_THROW);
            rtl::Reference< SvXMLExceptionListExport > xExp( new SvXMLExceptionListExport( xContext, rLst, sStrmName, xHandler ) );

            xExp->exportDoc( XML_BLOCK_LIST );

            xStrm->Commit();
            if( xStrm->GetError() == ERRCODE_NONE )
            {
                xStrm.clear();
                if (!bConvert)
                {
                    rStg->Commit();
                    if( ERRCODE_NONE != rStg->GetError() )
                    {
                        rStg->Remove( sStrmName );
                        rStg->Commit();
                    }
                }
            }
        }
    }
}

SvxAutocorrWordList* SvxAutoCorrectLanguageLists::LoadAutocorrWordList()
{
    if( pAutocorr_List )
        pAutocorr_List->DeleteAndDestroyAll();
    else
        pAutocorr_List.reset( new SvxAutocorrWordList() );

    try
    {
        uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sShareAutoCorrFile, embed::ElementModes::READ );
        uno::Reference < io::XStream > xStrm = xStg->openStreamElement( pXMLImplAutocorr_ListStr, embed::ElementModes::READ );
        const uno::Reference< uno::XComponentContext >& xContext = comphelper::getProcessComponentContext();

        xml::sax::InputSource aParserInput;
        aParserInput.sSystemId = pXMLImplAutocorr_ListStr;
        aParserInput.aInputStream = xStrm->getInputStream();

        // get parser
        uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create(xContext);
        SAL_INFO("editeng", "AutoCorrect Import" );
        uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLAutoCorrectImport( xContext, pAutocorr_List.get(), rAutoCorrect, xStg );
        uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler;

        // connect parser and filter
        xParser->setFastDocumentHandler( xFilter );
        xParser->registerNamespace( u"http://openoffice.org/2001/block-list"_ustr, SvXMLAutoCorrectToken::NAMESPACE );
        xParser->setTokenHandler(xTokenHandler);

        // parse
        xParser->parseStream( aParserInput );
    }
    catch ( const uno::Exception& )
    {
        TOOLS_WARN_EXCEPTION("editeng", "when loading " << sShareAutoCorrFile);
    }

    // Set time stamp
    FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
                                    &aModifiedDate, &aModifiedTime );
    aLastCheckTime = tools::Time( tools::Time::SYSTEM );

    return pAutocorr_List.get();
}

const SvxAutocorrWordList* SvxAutoCorrectLanguageLists::GetAutocorrWordList()
{
    if( !( ACFlags::ChgWordLstLoad & nFlags ) || IsFileChanged_Imp() )
    {
        LoadAutocorrWordList();
        if( !pAutocorr_List )
        {
            OSL_ENSURE( false, "No valid list" );
            pAutocorr_List.reset( new SvxAutocorrWordList() );
        }
        nFlags |= ACFlags::ChgWordLstLoad;
    }
    return pAutocorr_List.get();
}

SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetCplSttExceptList()
{
    if( !( ACFlags::CplSttLstLoad & nFlags ) || IsFileChanged_Imp() )
    {
        LoadCplSttExceptList();
        if( !pCplStt_ExcptLst )
        {
            OSL_ENSURE( false, "No valid list" );
            pCplStt_ExcptLst.reset( new SvStringsISortDtor );
        }
        nFlags |= ACFlags::CplSttLstLoad;
    }
    return pCplStt_ExcptLst.get();
}

bool SvxAutoCorrectLanguageLists::AddToCplSttExceptList(const OUString& rNew)
{
    bool bRet = false;
    if( !rNew.isEmpty() && GetCplSttExceptList()->insert( rNew ).second )
    {
        MakeUserStorage_Impl();
        rtl::Reference<SotStorage> xStg = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);

        SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );

        xStg = nullptr;
        // Set time stamp
        FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
                                            &aModifiedDate, &aModifiedTime );
        aLastCheckTime = tools::Time( tools::Time::SYSTEM );
        bRet = true;
    }
    return bRet;
}

bool SvxAutoCorrectLanguageLists::AddToWordStartExceptList(const OUString& rNew)
{
    bool bRet = false;
    if( !rNew.isEmpty() && GetWordStartExceptList()->insert( rNew ).second )
    {
        MakeUserStorage_Impl();
        rtl::Reference<SotStorage> xStg = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);

        SaveExceptList_Imp( *pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg );

        xStg = nullptr;
        // Set time stamp
        FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
                                            &aModifiedDate, &aModifiedTime );
        aLastCheckTime = tools::Time( tools::Time::SYSTEM );
        bRet = true;
    }
    return bRet;
}

SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadCplSttExceptList()
{
    try
    {
        rtl::Reference<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
        if( xStg.is() && xStg->IsContained( pXMLImplCplStt_ExcptLstStr ) )
            LoadXMLExceptList_Imp( pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
    }
    catch (const css::ucb::ContentCreationException&)
    {
    }
    return pCplStt_ExcptLst.get();
}

void SvxAutoCorrectLanguageLists::SaveCplSttExceptList()
{
    MakeUserStorage_Impl();
    rtl::Reference<SotStorage> xStg = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);

    SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );

    xStg = nullptr;

    // Set time stamp
    FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
                                            &aModifiedDate, &aModifiedTime );
    aLastCheckTime = tools::Time( tools::Time::SYSTEM );
}

SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadWordStartExceptList()
{
    try
    {
        rtl::Reference<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
        if( xStg.is() && xStg->IsContained( pXMLImplWordStart_ExcptLstStr ) )
            LoadXMLExceptList_Imp( pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg );
    }
    catch (const css::ucb::ContentCreationException &)
    {
        TOOLS_WARN_EXCEPTION("editeng", "SvxAutoCorrectLanguageLists::LoadWordStartExceptList");
    }
    return pWordStart_ExcptLst.get();
}

void SvxAutoCorrectLanguageLists::SaveWordStartExceptList()
{
    MakeUserStorage_Impl();
    rtl::Reference<SotStorage> xStg = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);

    SaveExceptList_Imp( *pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg );

    xStg = nullptr;
    // Set time stamp
    FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
                                            &aModifiedDate, &aModifiedTime );
    aLastCheckTime = tools::Time( tools::Time::SYSTEM );
}

SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetWordStartExceptList()
{
    if( !( ACFlags::WordStartLstLoad & nFlags ) || IsFileChanged_Imp() )
    {
        LoadWordStartExceptList();
        if( !pWordStart_ExcptLst )
        {
            OSL_ENSURE( false, "No valid list" );
            pWordStart_ExcptLst.reset( new SvStringsISortDtor );
        }
        nFlags |= ACFlags::WordStartLstLoad;
    }
    return pWordStart_ExcptLst.get();
}

void SvxAutoCorrectLanguageLists::RemoveStream_Imp( const OUString& rName )
{
    if( sShareAutoCorrFile != sUserAutoCorrFile )
    {
        rtl::Reference<SotStorage> xStg = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);
        if( xStg.is() && ERRCODE_NONE == xStg->GetError() &&
            xStg->IsStream( rName ) )
        {
            xStg->Remove( rName );
            xStg->Commit();

            xStg = nullptr;
        }
    }
}

void SvxAutoCorrectLanguageLists::MakeUserStorage_Impl()
{
    // The conversion needs to happen if the file is already in the user
    // directory and is in the old format. Additionally it needs to
    // happen when the file is being copied from share to user.

    bool bError = false, bConvert = false, bCopy = false;
    INetURLObject aDest;
    INetURLObject aSource;

    if (sUserAutoCorrFile != sShareAutoCorrFile )
    {
        aSource = INetURLObject ( sShareAutoCorrFile );
        aDest = INetURLObject ( sUserAutoCorrFile );
        if ( SotStorage::IsOLEStorage ( sShareAutoCorrFile ) )
        {
            aDest.SetExtension ( u"bak" );
            bConvert = true;
        }
        bCopy = true;
    }
    else if ( SotStorage::IsOLEStorage ( sUserAutoCorrFile ) )
    {
        aSource = INetURLObject ( sUserAutoCorrFile );
        aDest = INetURLObject ( sUserAutoCorrFile );
        aDest.SetExtension ( u"bak" );
        bCopy = bConvert = true;
    }
    if (bCopy)
    {
        try
        {
            OUString sMain(aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ));
            sal_Int32 nSlashPos = sMain.lastIndexOf('/');
            sMain = sMain.copy(0, nSlashPos);
            ::ucbhelper::Content aNewContent( sMain, uno::Reference< XCommandEnvironment >(), comphelper::getProcessComponentContext() );
            TransferInfo aInfo;
            aInfo.NameClash = NameClash::OVERWRITE;
            aInfo.NewTitle = aDest.GetLastName();
            aInfo.SourceURL = aSource.GetMainURL( INetURLObject::DecodeMechanism::ToIUri );
            aInfo.MoveData  = false;
            aNewContent.executeCommand( u"transfer"_ustr, Any(aInfo));
        }
        catch (...)
        {
            bError = true;
        }
    }
    if (bConvert && !bError)
    {
        rtl::Reference<SotStorage> xSrcStg = new SotStorage( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), StreamMode::READ );
        rtl::Reference<SotStorage> xDstStg = new SotStorage(sUserAutoCorrFile, StreamMode::WRITE);

        if( xSrcStg.is() && xDstStg.is() )
        {
            std::unique_ptr<SvStringsISortDtor> pTmpWordList;

            if (xSrcStg->IsContained( pXMLImplWordStart_ExcptLstStr ) )
                LoadXMLExceptList_Imp( pTmpWordList, pXMLImplWordStart_ExcptLstStr, xSrcStg );

            if (pTmpWordList)
            {
                SaveExceptList_Imp( *pTmpWordList, pXMLImplWordStart_ExcptLstStr, xDstStg, true );
                pTmpWordList.reset();
            }

            if (xSrcStg->IsContained( pXMLImplCplStt_ExcptLstStr ) )
                LoadXMLExceptList_Imp( pTmpWordList, pXMLImplCplStt_ExcptLstStr, xSrcStg );

            if (pTmpWordList)
            {
                SaveExceptList_Imp( *pTmpWordList, pXMLImplCplStt_ExcptLstStr, xDstStg, true );
                pTmpWordList->clear();
            }

            GetAutocorrWordList();
            MakeBlocklist_Imp( *xDstStg );
            sShareAutoCorrFile = sUserAutoCorrFile;
            xDstStg = nullptr;
            try
            {
                ::ucbhelper::Content aContent ( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), uno::Reference < XCommandEnvironment >(), comphelper::getProcessComponentContext() );
                aContent.executeCommand ( u"delete"_ustr, Any ( true ) );
            }
            catch (...)
            {
            }
        }
    }
    else if( bCopy && !bError )
        sShareAutoCorrFile = sUserAutoCorrFile;
}

bool SvxAutoCorrectLanguageLists::MakeBlocklist_Imp( SotStorage& rStg )
{
    bool bRet = true, bRemove = !pAutocorr_List || pAutocorr_List->empty();
    if( !bRemove )
    {
        rtl::Reference<SotStorageStream> refList = rStg.OpenSotStream( pXMLImplAutocorr_ListStr,
                    ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
        if( refList.is() )
        {
            refList->SetSize( 0 );
            refList->SetBufferSize( 8192 );
            refList->SetProperty( u"MediaType"_ustr, Any(u"text/xml"_ustr) );

            const uno::Reference< uno::XComponentContext >& xContext =
                comphelper::getProcessComponentContext();

            uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext);
            uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *refList );
            xWriter->setOutputStream(xOut);

            rtl::Reference< SvXMLAutoCorrectExport > xExp( new SvXMLAutoCorrectExport( xContext, pAutocorr_List.get(), pXMLImplAutocorr_ListStr, xWriter ) );

            xExp->exportDoc( XML_BLOCK_LIST );

            refList->Commit();
            bRet = ERRCODE_NONE == refList->GetError();
            if( bRet )
            {
                refList.clear();
                rStg.Commit();
                if( ERRCODE_NONE != rStg.GetError() )
                {
                    bRemove = true;
                    bRet = false;
                }
            }
        }
        else
            bRet = false;
    }

    if( bRemove )
    {
        rStg.Remove( pXMLImplAutocorr_ListStr );
        rStg.Commit();
    }

    return bRet;
}

bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries, std::vector<SvxAutocorrWord>& aDeleteEntries )
{
    // First get the current list!
    GetAutocorrWordList();

    MakeUserStorage_Impl();
    rtl::Reference<SotStorage> xStorage = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);

    bool bRet = xStorage.is() && ERRCODE_NONE == xStorage->GetError();

    if( bRet )
    {
        for (SvxAutocorrWord & aWordToDelete : aDeleteEntries)
        {
            std::optional<SvxAutocorrWord> xFoundEntry = pAutocorr_List->FindAndRemove( &aWordToDelete );
            if( xFoundEntry )
            {
                if( !xFoundEntry->IsTextOnly() )
                {
                    OUString aName( aWordToDelete.GetShort() );
                    if (xStorage->IsOLEStorage())
                        aName = EncryptBlockName_Imp(aName);
                    else
                        GeneratePackageName ( aWordToDelete.GetShort(), aName );

                    if( xStorage->IsContained( aName ) )
                    {
                        xStorage->Remove( aName );
                        bRet = xStorage->Commit();
                    }
                }
            }
        }

        for (const SvxAutocorrWord & aNewEntrie : aNewEntries)
        {
            SvxAutocorrWord aWordToAdd(aNewEntrie.GetShort(), aNewEntrie.GetLong(), true );
            std::optional<SvxAutocorrWord> xRemoved = pAutocorr_List->FindAndRemove( &aWordToAdd );
            if( xRemoved )
            {
                if( !xRemoved->IsTextOnly() )
                {
                    // Still have to remove the Storage
                    OUString sStorageName( aWordToAdd.GetShort() );
                    if (xStorage->IsOLEStorage())
                        sStorageName = EncryptBlockName_Imp(sStorageName);
                    else
                        GeneratePackageName ( aWordToAdd.GetShort(), sStorageName);

                    if( xStorage->IsContained( sStorageName ) )
                        xStorage->Remove( sStorageName );
                }
            }
            bRet = pAutocorr_List->Insert( std::move(aWordToAdd) );

            if ( !bRet )
            {
                break;
            }
        }

        if ( bRet )
        {
            bRet = MakeBlocklist_Imp( *xStorage );
        }
    }
    return bRet;
}

bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, const OUString& rLong )
{
    // First get the current list!
    GetAutocorrWordList();

    MakeUserStorage_Impl();
    rtl::Reference<SotStorage> xStg = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);

    bool bRet = xStg.is() && ERRCODE_NONE == xStg->GetError();

    // Update the word list
    if( bRet )
    {
        SvxAutocorrWord aNew(rShort, rLong, true );
        std::optional<SvxAutocorrWord> xRemove = pAutocorr_List->FindAndRemove( &aNew );
        if( xRemove )
        {
            if( !xRemove->IsTextOnly() )
            {
                // Still have to remove the Storage
                OUString sStgNm( rShort );
                if (xStg->IsOLEStorage())
                    sStgNm = EncryptBlockName_Imp(sStgNm);
                else
                    GeneratePackageName ( rShort, sStgNm);

                if( xStg->IsContained( sStgNm ) )
                    xStg->Remove( sStgNm );
            }
        }

        if( pAutocorr_List->Insert( std::move(aNew) ) )
        {
            bRet = MakeBlocklist_Imp( *xStg );
            xStg = nullptr;
        }
        else
        {
            bRet = false;
        }
    }
    return bRet;
}

void SvxAutoCorrectLanguageLists::PutText( const OUString& rShort,
                                               SfxObjectShell& rShell )
{
    // First get the current list!
    GetAutocorrWordList();

    MakeUserStorage_Impl();

    try
    {
        uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sUserAutoCorrFile, embed::ElementModes::READWRITE );
        OUString sLong;
        bool bRet = rAutoCorrect.PutText( xStg, sUserAutoCorrFile, rShort, rShell, sLong );
        xStg = nullptr;

        // Update the word list
        if( bRet )
        {
            if( pAutocorr_List->Insert( SvxAutocorrWord(rShort, sLong, false) ) )
            {
                rtl::Reference<SotStorage> xStor = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
                MakeBlocklist_Imp( *xStor );
            }
        }
    }
    catch ( const uno::Exception& )
    {
    }
}

// Keep the list sorted ...
struct SvxAutocorrWordList::CompareSvxAutocorrWordList
{
    bool operator()( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) const
    {
        CollatorWrapper& rCmp = ::GetCollatorWrapper();
        return rCmp.compareString( lhs.GetShort(), rhs.GetShort() ) < 0;
    }
};

namespace {

typedef std::unordered_map<OUString, SvxAutocorrWord> AutocorrWordHashType;

}

struct SvxAutocorrWordList::Impl
{

    // only one of these contains the data
    // maSortedVector is manually sorted so we can optimise data movement
    mutable AutocorrWordSetType maSortedVector;
    mutable AutocorrWordHashType maHash; // key is 'Short'

    void DeleteAndDestroyAll()
    {
        maHash.clear();
        maSortedVector.clear();
    }
};

SvxAutocorrWordList::SvxAutocorrWordList() : mpImpl(new Impl) {}

SvxAutocorrWordList::~SvxAutocorrWordList()
{
}

void SvxAutocorrWordList::DeleteAndDestroyAll()
{
    mpImpl->DeleteAndDestroyAll();
}

struct SvxAutocorrWordList::Iterator::Impl {
    typedef SvxAutocorrWordList::AutocorrWordSetType::const_iterator VecIterType;
    typedef AutocorrWordHashType::const_iterator HashIterType;

    HashIterType  mHashIter, mHashEnd;
    VecIterType   mSortedVectorIter, mSortedVectorEnd;

    Impl(const HashIterType& hashIter, const HashIterType& hashEnd,
         const VecIterType& vecIter, const VecIterType& vecEnd)
        : mHashIter(hashIter), mHashEnd(hashEnd),
          mSortedVectorIter(vecIter), mSortedVectorEnd(vecEnd) {}

    bool Step() {
        // Iterate hash table, followed by sorted vector
        if (mHashIter != mHashEnd) {
            return ++mHashIter != mHashEnd
                   || mSortedVectorIter != mSortedVectorEnd;
        }
        return ++mSortedVectorIter != mSortedVectorEnd;
    }

    const SvxAutocorrWord& operator*() {
        return (mHashIter == mHashEnd) ? *mSortedVectorIter : mHashIter->second;
    }
    const SvxAutocorrWord* operator->() {
        return (mHashIter == mHashEnd) ? &*mSortedVectorIter : &mHashIter->second;
    }
};

SvxAutocorrWordList::Iterator::Iterator(
    std::unique_ptr<SvxAutocorrWordList::Iterator::Impl> pImpl
) : mpImpl(std::move(pImpl))
{
}

SvxAutocorrWordList::Iterator::Iterator(
    const SvxAutocorrWordList::Iterator& it
) : mpImpl(new Impl(*(it.mpImpl)))
{
}

SvxAutocorrWordList::Iterator::~Iterator()
{
}

bool SvxAutocorrWordList::Iterator::Step()
{
    return mpImpl->Step();
}

const SvxAutocorrWord& SvxAutocorrWordList::Iterator::operator*() const
{
    return **mpImpl;
}

const SvxAutocorrWord* SvxAutocorrWordList::Iterator::operator->() const
{
    return mpImpl->operator->();
}

bool SvxAutocorrWordList::ContainsPattern(const OUString& aShort) const
{
    // check hash table first
    if (mpImpl->maHash.contains(aShort)) {
        return true;
    }

    // then do binary search on sorted vector
    CollatorWrapper& rCmp = ::GetCollatorWrapper();
    auto it = std::lower_bound(mpImpl->maSortedVector.begin(),
                               mpImpl->maSortedVector.end(),
                               aShort,
                               [&](const SvxAutocorrWord& elm,
                                   const OUString& val) {
                                       return rCmp.compareString(elm.GetShort(),
                                                                 val) < 0;
                                   } );
    if (it != mpImpl->maSortedVector.end()
        && rCmp.compareString(aShort, it->GetShort()) == 0)
    {
        return true;
    }

    return false;
}

// returns true if inserted
const SvxAutocorrWord* SvxAutocorrWordList::Insert(SvxAutocorrWord aWord) const
{
    if ( mpImpl->maSortedVector.empty() ) // use the hash
    {
        OUString aShort = aWord.GetShort();
        auto [it,inserted] = mpImpl->maHash.emplace( std::move(aShort), std::move(aWord) );
        if (inserted)
            return &(it->second);
        return nullptr;
    }
    else
    {
        auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), aWord, CompareSvxAutocorrWordList());
        CollatorWrapper& rCmp = ::GetCollatorWrapper();
        if (it == mpImpl->maSortedVector.end() || rCmp.compareString( aWord.GetShort(), it->GetShort() ) != 0)
        {
            it = mpImpl->maSortedVector.insert(it, std::move(aWord));
            return &*it;
        }
        return nullptr;
    }
}

void SvxAutocorrWordList::LoadEntry(const OUString& sWrong, const OUString& sRight, bool bOnlyTxt)
{
    (void)Insert(SvxAutocorrWord( sWrong, sRight, bOnlyTxt ));
}

bool SvxAutocorrWordList::empty() const
{
    return mpImpl->maHash.empty() && mpImpl->maSortedVector.empty();
}

std::optional<SvxAutocorrWord> SvxAutocorrWordList::FindAndRemove(const SvxAutocorrWord *pWord)
{

    if ( mpImpl->maSortedVector.empty() ) // use the hash
    {
        AutocorrWordHashType::iterator it = mpImpl->maHash.find( pWord->GetShort() );
        if( it != mpImpl->maHash.end() )
        {
            SvxAutocorrWord pMatch = std::move(it->second);
            mpImpl->maHash.erase (it);
            return pMatch;
        }
    }
    else
    {
        auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), *pWord, CompareSvxAutocorrWordList());
        if (it != mpImpl->maSortedVector.end() && !CompareSvxAutocorrWordList()(*pWord, *it))
        {
            SvxAutocorrWord pMatch = std::move(*it);
            mpImpl->maSortedVector.erase (it);
            return pMatch;
        }
    }
    return std::optional<SvxAutocorrWord>();
}

// return the sorted contents - defer sorting until we have to.
const SvxAutocorrWordList::AutocorrWordSetType& SvxAutocorrWordList::getSortedContent() const
{
    // convert from hash to set permanently
    if ( mpImpl->maSortedVector.empty() )
    {
        std::vector<SvxAutocorrWord> tmp;
        tmp.reserve(mpImpl->maHash.size());
        for (auto & rPair : mpImpl->maHash)
            tmp.emplace_back(std::move(rPair.second));
        mpImpl->maHash.clear();
        // sort twice - this gets the list into mostly-sorted order, which
        // reduces the number of times we need to invoke the expensive ICU collate fn.
        std::sort(tmp.begin(), tmp.end(),
            [] ( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs )
            {
                return lhs.GetShort() < rhs.GetShort();
            });
        // This beast has some O(N log(N)) in a terribly slow ICU collate fn.
        // stable_sort is twice as fast as sort in this situation because it does
        // fewer comparison operations.
        std::stable_sort(tmp.begin(), tmp.end(), CompareSvxAutocorrWordList());
        mpImpl->maSortedVector = std::move(tmp);
    }
    return mpImpl->maSortedVector;
}

std::optional<SvxAutocorrWord>
SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *pFnd,
                                 std::u16string_view rTxt,
                                 sal_Int32 &rStt,
                                 sal_Int32 nEndPos) const
{
    const OUString& rChk = pFnd->GetShort();

    sal_Int32 left_wildcard = rChk.startsWith( ".*" ) ? 2 : 0; // ".*word" pattern?
    sal_Int32 right_wildcard = rChk.endsWith( ".*" ) ? 2 : 0; // "word.*" pattern?
    assert(nEndPos >= 0);
    size_t nSttWdPos = nEndPos;

    // direct replacement of keywords surrounded by colons (for example, ":name:")
    bool bColonNameColon = static_cast<sal_Int32>(rTxt.size()) > nEndPos &&
        rTxt[nEndPos] == ':' && rChk[0] == ':' && rChk.endsWith(":");
    if ( nEndPos + (bColonNameColon ? 1 : 0) < rChk.getLength() - left_wildcard - right_wildcard )
    {
        return std::nullopt;
    }

    bool bWasWordDelim = false;
    sal_Int32 nCalcStt = nEndPos - rChk.getLength() + left_wildcard;
    if (bColonNameColon)
        nCalcStt++;
    if( !right_wildcard && ( !nCalcStt || nCalcStt == rStt || left_wildcard || bColonNameColon ||
          ( nCalcStt < rStt &&
            IsWordDelim( rTxt[ nCalcStt - 1 ] ))) )
    {
        TransliterationWrapper& rCmp = GetIgnoreTranslWrapper();
        OUString sWord( rTxt.substr(nCalcStt, rChk.getLength() - left_wildcard) );
        if( (!left_wildcard && rCmp.isEqual( rChk, sWord )) || (left_wildcard && rCmp.isEqual( rChk.copy(left_wildcard), sWord) ))
        {
            rStt = nCalcStt;
            if (!left_wildcard)
            {
                // fdo#33899 avoid "1/2", "1/3".. to be replaced by fractions in dates, eg. 1/2/14
                if (static_cast<sal_Int32>(rTxt.size()) > nEndPos && rTxt[nEndPos] == '/' && rChk.indexOf('/') != -1)
                {
                    return std::nullopt;
                }
                return *pFnd;
            }
            // get the first word delimiter position before the matching ".*word" pattern
            while( rStt && !(bWasWordDelim = IsWordDelim( rTxt[ --rStt ])))
                ;
            if (bWasWordDelim) rStt++;

            // don't let wildcard pattern override non-wildcard one
            OUString aShort(rTxt.substr(rStt, nEndPos - rStt));
            if (ContainsPattern(aShort)) {
                return std::nullopt;
            }

            OUString left_pattern( rTxt.substr(rStt, nEndPos - rStt - rChk.getLength() + left_wildcard) );
            // avoid double spaces before simple "word" replacement
            left_pattern += (left_pattern.getLength() == 0 && pFnd->GetLong()[0] == 0x20) ? pFnd->GetLong().subView(1) : pFnd->GetLong();
            return SvxAutocorrWord(aShort, left_pattern);
        }
    } else
    // match "word.*" or ".*word.*" patterns, eg. "i18n.*", ".*---.*", TODO: add transliteration support
    if ( right_wildcard )
    {

        OUString sTmp( rChk.copy( left_wildcard, rChk.getLength() - left_wildcard - right_wildcard ) );
        // Get the last word delimiter position
        bool not_suffix;

        while( nSttWdPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ])))
            ;
        // search the first occurrence (with a left word delimitation, if needed)
        size_t nFndPos = rStt - 1;
        do {
            nFndPos = rTxt.find( sTmp, nFndPos + 1);
            if (nFndPos == std::u16string_view::npos)
                break;
            not_suffix = bWasWordDelim && (nSttWdPos >= (nFndPos + sTmp.getLength()));
        } while ( (!left_wildcard && nFndPos && !IsWordDelim( rTxt[ nFndPos - 1 ])) || not_suffix );

        if ( nFndPos != std::u16string_view::npos )
        {
            sal_Int32 extra_repl = static_cast<sal_Int32>(nFndPos) + sTmp.getLength() > nEndPos ? 1: 0; // for patterns with terminating characters, eg. "a:"

            if ( left_wildcard )
            {
                // get the first word delimiter position before the matching ".*word.*" pattern
                while( nFndPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nFndPos ])))
                    ;
                if (bWasWordDelim) nFndPos++;
            }
            if (nEndPos + extra_repl <= static_cast<sal_Int32>(nFndPos))
            {
                return std::nullopt;
            }
            // return matching pattern and its replacement as a new list item, eg. "i18ns" -> "internationalizations"
            OUString aShort( rTxt.substr(nFndPos, nEndPos - nFndPos + extra_repl) );
            // don't let wildcard pattern override non-wildcard one
            if (ContainsPattern(aShort)) {
                return std::nullopt;
            }

            OUString aLong;
            rStt = nFndPos;
            if ( !left_wildcard )
            {
                sal_Int32 siz = nEndPos - nFndPos - sTmp.getLength();
                aLong = pFnd->GetLong() + (siz > 0 ? rTxt.substr(nFndPos + sTmp.getLength(), siz) : u"");
            } else {
                OUStringBuffer buf;
                do {
                    nSttWdPos = rTxt.find( sTmp, nFndPos);
                    if (nSttWdPos != std::u16string_view::npos)
                    {
                        sal_Int32 nTmp(nFndPos);
                        while (nTmp < static_cast<sal_Int32>(nSttWdPos) && !IsWordDelim(rTxt[nTmp]))
                        {
                            nTmp++;
                        }
                        if (nTmp < static_cast<sal_Int32>(nSttWdPos)) {
                            break; // word delimiter found
                        }
                        buf.append(rTxt.substr(nFndPos, nSttWdPos - nFndPos)).append(pFnd->GetLong());
                        nFndPos = nSttWdPos + sTmp.getLength();
                    }
                } while (nSttWdPos != std::u16string_view::npos);
                if (static_cast<sal_Int32>(nEndPos - nFndPos) > extra_repl) {
                    buf.append(rTxt.substr(nFndPos, nEndPos - nFndPos));
                }
                aLong = buf.makeStringAndClear();
            }
            if ( (static_cast<sal_Int32>(rTxt.size()) > nEndPos && IsWordDelim(rTxt[nEndPos])) || static_cast<sal_Int32>(rTxt.size()) == nEndPos )
            {
                return SvxAutocorrWord(aShort, aLong);
            }
        }
    }
    return std::nullopt;
}

std::optional<SvxAutocorrWordList::WordSearchStatus>
SvxAutocorrWordList::SearchWordsInList(std::u16string_view rTxt,
                                       sal_Int32& rStt,
                                       sal_Int32 nEndPos) const
{
    for (auto it = mpImpl->maHash.begin(); it != mpImpl->maHash.end(); ++it)
    {
        if(auto pTmp = WordMatches(&it->second, rTxt, rStt, nEndPos))
        {
            return WordSearchStatus(
                *pTmp, this,
                Iterator(std::make_unique<Iterator::Impl>(
                    it, mpImpl->maHash.end(),
                    mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end()
                ))
            );
        }
    }

    for (auto it = mpImpl->maSortedVector.begin();
         it != mpImpl->maSortedVector.end(); ++it)
    {
        if(auto pTmp = WordMatches(&*it, rTxt, rStt, nEndPos))
        {
            return WordSearchStatus(
                *pTmp, this,
                Iterator(std::make_unique<Iterator::Impl>(
                    mpImpl->maHash.end(), mpImpl->maHash.end(),
                    it, mpImpl->maSortedVector.end()
                ))
            );
        }
    }

    return std::nullopt;
}

bool
SvxAutocorrWordList::SearchWordsNext(std::u16string_view rTxt,
                                     sal_Int32& rStt,
                                     sal_Int32 nEndPos,
                                     SvxAutocorrWordList::WordSearchStatus& rStatus) const
{
    while(rStatus.StepIter())
    {
        if(auto pTmp = WordMatches(rStatus.GetWordAtIter(),
                                   rTxt, rStt, nEndPos))
        {
            rStatus.mFnd = *pTmp;
            return true;
        }
    }

    return false;
}

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Messung V0.5 in Prozent

¤ Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.0.76Bemerkung: (vorverarbeitet am 2026-06-06) ¤

*Bot Zugriff

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.