/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* * This file is part of the LibreOffice project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * This file incorporates work covered by the following license notice: * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
// tdf#54409 check also typographical quotation marks in the case of skipped ASCII quotation marks // Curious, why these \u0083\u0084\u0089\u0091\u0092\u0093\u0094 are handled as "begin characters"?
constexpr std::u16string_view /* also at these beginnings - Brackets and all kinds of begin characters */
sImplSttSkipChars = u"\"'([{\u2018\u2019\u201a\u201b\u201c\u201d\u201e\u201f\u0083\u0084\u0089\u0091\u0092\u0093\u0094", /* also at these ends - Brackets and all kinds of begin characters */
sImplEndSkipChars = u"\"')]}\u2018\u2019\u201a\u201b\u201c\u201d\u201e\u201f\u0083\u0084\u0089\u0091\u0092\u0093\u0094";
// Called by the functions: // - FnCapitalStartWord // - FnCapitalStartSentence // after the exchange of characters. Then the words, if necessary, can be inserted // into the exception list. void SvxAutoCorrDoc::SaveCpltSttWord( ACFlags, sal_Int32, const OUString&,
sal_Unicode )
{
}
// Delete all non alphanumeric. Test the characters at the beginning/end of // the word ( recognizes: "(min.", "/min.", and so on.) for( ; nSttPos < nEndPos; ++nSttPos ) if( rCC.isLetterNumeric( rTxt, nSttPos )) break; for( ; nSttPos < nEndPos; --nEndPos ) if( rCC.isLetterNumeric( rTxt, nEndPos - 1 )) break;
// Is the word a compounded word separated by delimiters? // If so, keep track of all delimiters so each constituent // word can be checked for two initial capital letters.
std::deque<sal_Int32> aDelimiters;
// Always check for two capitals at the beginning // of the entire word, so start at nSttPos.
aDelimiters.push_back(nSttPos);
// Find all compound word delimiters for (sal_Int32 n = nSttPos; n < nEndPos; ++n)
{ if (IsCompoundWordDelimChar(rTxt[ n ]))
{
aDelimiters.push_back( n + 1 ); // Get position of char after delimiter
}
}
// Decide where to put the terminating delimiter. // If the last AutoCorrect char was a newline, then the AutoCorrect // char will not be included in rTxt. // If the last AutoCorrect char was not a newline, then the AutoCorrect // character will be the last character in rTxt. if (!IsCompoundWordDelimChar(rTxt[nEndPos-1]))
aDelimiters.push_back(nEndPos);
// Iterate through the word and all words that compose it. // Two capital letters at the beginning of word? for (size_t nI = 0; nI < aDelimiters.size() - 1; ++nI)
{
nSttPos = aDelimiters[nI];
nEndPos = aDelimiters[nI + 1];
if( nSttPos+2 < nEndPos &&
IsUpperLetter( rCC.getCharacterType( rTxt, nSttPos )) &&
IsUpperLetter( rCC.getCharacterType( rTxt, ++nSttPos )) && // Is the third character a lower case
IsLowerLetter( rCC.getCharacterType( rTxt, nSttPos +1 )) && // Do not replace special attributes
0x1 != rTxt[ nSttPos ] && 0x2 != rTxt[ nSttPos ])
{ // test if the word is in an exception list
OUString sWord( rTxt.copy( nSttPos - 1, nEndPos - nSttPos + 1 )); if( !FindInWordStartExceptList(eLang, sWord) )
{ // Check that word isn't correctly spelt before correcting:
css::uno::Reference< css::linguistic2::XSpellChecker1 > xSpeller =
LinguMgr::GetSpellChecker(); if( xSpeller->hasLanguage(static_cast<sal_uInt16>(eLang)) )
{
Sequence< css::beans::PropertyValue > aEmptySeq; if (xSpeller->isValid(sWord, static_cast<sal_uInt16>(eLang), aEmptySeq))
{ return;
}
}
sal_Unicode cSave = rTxt[ nSttPos ];
OUString sChar = rCC.lowercase( OUString(cSave) ); if( sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar ))
{ if( ACFlags::SaveWordWordStartLst & nFlags )
rDoc.SaveCpltSttWord( ACFlags::CapitalStartWord, nSttPos, sWord, cSave );
}
}
}
}
}
// In some languages ordinal suffixes should never be // changed to superscript. Let's break for those languages. if (!eLang.anyOf(
LANGUAGE_CATALAN, // tdf#156792
LANGUAGE_CATALAN_VALENCIAN,
LANGUAGE_SWEDISH,
LANGUAGE_SWEDISH_FINLAND))
{
CharClass& rCC = GetCharClass(eLang);
for (; nSttPos < nEndPos; ++nSttPos) if (!lcl_IsInArr(sImplSttSkipChars, rTxt[nSttPos])) break; for (; nSttPos < nEndPos; --nEndPos) if (!lcl_IsInArr(sImplEndSkipChars, rTxt[nEndPos - 1])) break;
// Get the last number in the string to check
sal_Int32 nNumEnd = nEndPos; bool bFoundEnd = false; bool isValidNumber = true;
sal_Int32 i = nEndPos; while (i > nSttPos)
{
i--; bool isDigit = rCC.isDigit(rTxt, i); if (bFoundEnd)
isValidNumber &= (isDigit || !rCC.isLetter(rTxt, i));
// Check if the characters after that number correspond to the ordinal suffix
uno::Reference< i18n::XOrdinalSuffix > xOrdSuffix
= i18n::OrdinalSuffix::create(comphelper::getProcessComponentContext());
// add extra suffixes for languages not handled by i18npool/ICU if ( primary(eLang) == primary(LANGUAGE_PORTUGUESE) &&
( nEndPos == nNumEnd + 3 || nEndPos == nNumEnd + 4 ) &&
( sEnd[0] == 'a' || sEnd[0] == 'o' || sEnd[0] == 'r' ) )
{ auto aExtendedSuffixes = comphelper::sequenceToContainer< std::vector<OUString> >(aSuffixes);
aExtendedSuffixes.push_back(u"as"_ustr); // plural form of 'a'
aExtendedSuffixes.push_back(u"os"_ustr); // plural form of 'o'
aExtendedSuffixes.push_back(u"ra"_ustr); // alternative form of 'a'
aExtendedSuffixes.push_back(u"ro"_ustr); // alternative form of 'o'
aExtendedSuffixes.push_back(u"ras"_ustr); // alternative form of "as"
aExtendedSuffixes.push_back(u"ros"_ustr); // alternative form of "os"
aSuffixes = comphelper::containerToSequence(aExtendedSuffixes);
}
for (OUString const & sSuffix : aSuffixes)
{ if (sSuffix == sEnd)
{ // Check if the ordinal suffix has to be set as super script if (rCC.isLetter(sSuffix))
{
sal_Int32 nNumberChanged = 0;
sal_Int32 nSuffixChanged = 0; // exceptions for Portuguese // add missing dot: 1a -> 1.ª // and remove optional 'r': 1ro -> 1.º if ( primary(eLang) == primary(LANGUAGE_PORTUGUESE) )
{ if ( sSuffix.startsWith("r") )
{
rDoc.Delete( nNumEnd + 1, nNumEnd + 2 );
nSuffixChanged = -1;
}
rDoc.Insert( nNumEnd + 1, u"."_ustr );
nNumberChanged = 1;
}
// rTxt may refer to the frame text that will change in the calls to rDoc.Delete / rDoc.Insert; // keep a local copy for later use
OUString aOrigTxt = rTxt;
sal_Int32 nFirstReplacementTextLengthChange = 0;
// Replace [A-z0-9]--[A-z0-9] double dash with "emDash" or "enDash" // [0-9]--[0-9] double dash always replaced with "enDash" // Finnish and Hungarian use enDash instead of emDash. bool bEnDash = (eLang == LANGUAGE_HUNGARIAN || eLang == LANGUAGE_FINNISH); if( 4 <= nEndPos - nSttPos )
{
std::u16string_view sTmpView( aOrigTxt.subView( nSttPos, nEndPos - nSttPos ) );
size_t nFndPos = sTmpView.find(u"--"); if (nFndPos > 0 && nFndPos < sTmpView.size() - 2)
{ // Use proper codepoints. Currently, CharClass::isLetterNumeric is broken, it // uses the index *both* as code unit index (when checking it as ASCII), *and* // as code point index (when passes to css::i18n::XCharacterClassification). // Oh well... Anyway, single-codepoint strings will workaround it.
sal_Int32 nStart = nSttPos + nFndPos;
sal_uInt32 chStart = aOrigTxt.iterateCodePoints(&nStart, -1);
OUString sStart(&chStart, 1); // No idea why sImplEndSkipChars is checked at start if (rCC.isLetterNumeric(sStart, 0) || lcl_IsInArr(sImplEndSkipChars, chStart))
{
sal_Int32 nEnd = nSttPos + nFndPos + 2;
sal_uInt32 chEnd = aOrigTxt.iterateCodePoints(&nEnd, 1);
OUString sEnd(&chEnd, 1); // No idea why sImplSttSkipChars is checked at end if (rCC.isLetterNumeric(sEnd, 0) || lcl_IsInArr(sImplSttSkipChars, chEnd))
{
nSttPos = nSttPos + nFndPos + nFirstReplacementTextLengthChange;
rDoc.Delete(nSttPos, nSttPos + 2);
rDoc.Insert(nSttPos,
(bEnDash || (rCC.isDigit(sStart, 0) && rCC.isDigit(sEnd, 0))
? sEnDash
: sEmDash));
bRet = true;
}
}
}
} return bRet;
}
// Add non-breaking space before specific punctuation marks in French text
sal_Int32 SvxAutoCorrect::FnAddNonBrkSpace(
SvxAutoCorrDoc& rDoc, std::u16string_view rTxt,
sal_Int32 nEndPos,
LanguageType eLang, bool& io_bNbspRunNext )
{
sal_Int32 nRet = -1;
sal_Unicode cChar = rTxt[ nEndPos ]; bool bHasSpace = chars.indexOf( cChar ) != -1; bool bIsSpecial = allChars.indexOf( cChar ) != -1; if ( bIsSpecial )
{ // Get the last word delimiter position
sal_Int32 nSttWdPos = nEndPos; bool bWasWordDelim = false; while( nSttWdPos )
{
bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]); if (bWasWordDelim) break;
}
//See if the text is the start of a protocol string, e.g. have text of //"http" see if it is the start of "http:" and if so leave it alone
size_t nIndex = nSttWdPos + (bWasWordDelim ? 1 : 0);
size_t nProtocolLen = nEndPos - nSttWdPos + 1; if (nIndex + nProtocolLen <= rTxt.size())
{ if (INetURLObject::CompareProtocolScheme(rTxt.substr(nIndex, nProtocolLen)) != INetProtocol::NotValid) return -1;
}
// Check the presence of "://" in the word
size_t nStrPos = rTxt.find( u"://", nSttWdPos + 1 ); if ( nStrPos == std::u16string_view::npos && nEndPos > 0 )
{ // Check the previous char
sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ]; if ( ( chars.indexOf( cPrevChar ) == -1 ) && cPrevChar != '\t' )
{ // Remove any previous normal space
sal_Int32 nPos = nEndPos - 1; while ( cPrevChar == ' ' || cPrevChar == cNonBreakingSpace )
{ if ( nPos == 0 ) break;
nPos--;
cPrevChar = rTxt[ nPos ];
}
// Automatic *bold*, /italic/, -strikeout- and _underline_ bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
sal_Int32 nEndPos )
{ // Condition: // at the beginning: _, *, / or ~ after Space with the following !Space // at the end: _, *, / or ~ before Space (word delimiter?)
if( -1 != nFndPos )
{ // first delete the Character at the end - this allows insertion // of an empty hint in SetAttr which would be removed by Delete // (fdo#62536, AUTOFMT in Writer)
rDoc.Delete( nEndPos, nEndPos + 1 );
bool bAtStart = false; do {
--pStr; if (rCC.isLetter(aText, pStr - pStart))
{ if( !pWordStt )
pDelim = pStr+1;
pWordStt = pStr;
} elseif (pWordStt && !rCC.isDigit(aText, pStr - pStart))
{ if( (lcl_IsInArr( u"-'", *pStr ) || *pStr == cApostrophe) && // These characters are allowed in words
pWordStt - 1 == pStr && // Installation at beginning of paragraph. Replaced < by <= (#i38971#)
(pStart + 1) <= pStr &&
rCC.isLetter(aText, pStr-1 - pStart))
pWordStt = --pStr; else break;
}
bAtStart = (pStart == pStr);
} while( !bAtStart );
if (!pWordStt) return; // no character to be replaced
if (rCC.isDigit(aText, pStr - pStart)) return; // already ok
if (IsUpperLetter(rCC.getCharacterType(aText, pWordStt - pStart))) return; // already ok
//See if the text is the start of a protocol string, e.g. have text of //"http" see if it is the start of "http:" and if so leave it alone
sal_Int32 nIndex = pWordStt - pStart;
sal_Int32 nProtocolLen = pDelim - pWordStt + 1; if (nIndex + nProtocolLen <= rTxt.getLength())
{ if (INetURLObject::CompareProtocolScheme(rTxt.subView(nIndex, nProtocolLen)) != INetProtocol::NotValid) return; // already ok
}
if (0x1 == *pWordStt || 0x2 == *pWordStt) return; // already ok
// Only capitalize, if string before specified characters is long enough if( *pDelim && 2 >= pDelim - pWordStt &&
lcl_IsInArr( u".-)>", *pDelim ) ) return;
if( !bAtStart ) // Still no beginning of a paragraph?
{ if (NonFieldWordDelim(*pStr))
{ for (;;)
{
bAtStart = (pStart == pStr--); if (bAtStart || !NonFieldWordDelim(*pStr)) break;
}
} // Asian full stop, full width full stop, full width exclamation mark // and full width question marks are treated as word delimiters elseif ( 0x3002 != *pStr && 0xFF0E != *pStr && 0xFF01 != *pStr &&
0xFF1F != *pStr ) return; // no valid separator -> no replacement
}
// No replacement for words in TWo INitial CApitals or sMALL iNITIAL list if (FindInWordStartExceptList(eLang, OUString(pWordStt, pDelim - pWordStt))) return;
if( bAtStart ) // at the beginning of a paragraph?
{ // Check out the previous paragraph, if it exists. // If so, then check to paragraph separator at the end.
OUString const*const pPrevPara = rDoc.GetPrevPara(bNormalPos); if (!pPrevPara)
{ // valid separator -> replace
OUString sChar( *pWordStt );
sChar = rCC.titlecase(sChar); //see fdo#56740 if (sChar != OUStringChar(*pWordStt))
rDoc.ReplaceRange( pWordStt - pStart, 1, sChar ); return;
}
do { // overwrite all blanks
--pStr; if (!NonFieldWordDelim(*pStr)) break;
bAtStart = (pStart == pStr);
} while( !bAtStart );
if( bAtStart ) return; // no valid separator -> no replacement
}
// Found [ \t]+[A-Z0-9]+ until here. Test now on the paragraph separator. // all three can happen, but not more than once! const sal_Unicode* pExceptStt = nullptr; bool bContinue = true;
Flags nFlag = Flags::NONE; do
{ switch (*pStr)
{ // Western and Asian full stop case'.': case 0x3002: case 0xFF0E:
{ if (pStr >= pStart + 2 && *(pStr - 2) == '.')
{ //e.g. text "f.o.o. word": Now currently considering //capitalizing word but second last character of //previous word is a . So probably last word is an //anagram that ends in . and not truly the end of a //previous sentence, so don't autocapitalize this word return;
} if (nFlag & Flags::FullStop) return; // no valid separator -> no replacement
nFlag |= Flags::FullStop;
pExceptStt = pStr;
} break; case'!': case 0xFF01:
{ if (nFlag & Flags::ExclamationMark) return; // no valid separator -> no replacement
nFlag |= Flags::ExclamationMark;
} break; case'?': case 0xFF1F:
{ if (nFlag & Flags::QuestionMark) return; // no valid separator -> no replacement
nFlag |= Flags::QuestionMark;
} break; default: if (nFlag == Flags::NONE) return; // no valid separator -> no replacement else
bContinue = false; break;
}
if (bContinue && pStr-- == pStart)
{ return; // no valid separator -> no replacement
}
} while (bContinue); if (Flags::FullStop != nFlag)
pExceptStt = nullptr;
// Only capitalize, if string is long enough if( 2 > ( pStr - pStart ) ) return;
// Search for the beginning of the word while (!NonFieldWordDelim(*pStr))
{ if( bNumericOnly && rCC.isLetter( aText, pStr - pStart ) )
bNumericOnly = false;
if( pStart == pStr ) break;
--pStr;
}
if( bNumericOnly ) // consists of only numbers, then not return;
if (NonFieldWordDelim(*pStr))
++pStr;
OUString sWord;
// check on the basis of the exception list if( pExceptStt )
{
sWord = OUString(pStr, pExceptStt - pStr + 1); if( FindInCplSttExceptList(eLang, sWord) ) return;
// Delete all non alphanumeric. Test the characters at the // beginning/end of the word ( recognizes: "(min.", "/min.", and so on.)
OUString sTmp( sWord ); while( !sTmp.isEmpty() &&
!rCC.isLetterNumeric( sTmp, 0 ) )
sTmp = sTmp.copy(1);
// Remove all non alphanumeric characters towards the end up until // the last one.
sal_Int32 nLen = sTmp.getLength(); while( nLen && !rCC.isLetterNumeric( sTmp, nLen-1 ) )
--nLen; if( nLen + 1 < sTmp.getLength() )
sTmp = sTmp.copy( 0, nLen + 1 );
// No replacement for words in TWo INitial CApitals or sMALL iNITIAL list if (FindInWordStartExceptList(eLang, rTxt.copy(nSttPos, nEndPos - nSttPos))) returnfalse;
for( sal_Int32 i = nSttPos+2; i < nEndPos; ++i )
{ if ( IsLowerLetter(rCC.getCharacterType(rTxt, i)) ) // A lowercase letter disqualifies the whole text. returnfalse;
if ( IsUpperLetter(rCC.getCharacterType(rTxt, i)) ) // Another uppercase letter. Convert it.
aConverted.append( rCC.lowercase(OUString(rTxt[i])) ); else // This is not an alphabetic letter. Leave it as-is.
aConverted.append( rTxt[i] );
}
// Replace the word.
rDoc.Delete(nSttPos, nEndPos);
rDoc.Insert(nSttPos, aConverted.makeStringAndClear());
// search preceding opening quote in the paragraph before the insert position staticbool lcl_HasPrecedingChar( std::u16string_view rTxt, sal_Int32 nPos, const sal_Unicode sPrecedingChar, const sal_Unicode sStopChar, const sal_Unicode* aStopChars )
{
sal_Unicode cTmpChar;
do {
cTmpChar = rTxt[ --nPos ]; if ( cTmpChar == sPrecedingChar ) returntrue;
if ( cTmpChar == sStopChar ) returnfalse;
for ( const sal_Unicode* pCh = aStopChars; *pCh; ++pCh ) if ( cTmpChar == *pCh ) returnfalse;
} while ( nPos > 0 );
returnfalse;
}
// WARNING: rText may become invalid, see comment below void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
sal_Int32 nInsPos, sal_Unicode cChar, bool bInsert, bool& io_bNbspRunNext, vcl::Window const * pFrameWin )
{ bool bIsNextRun = io_bNbspRunNext;
io_bNbspRunNext = false; // if it was set, then it has to be turned off
// Found a Paragraph-start or a Blank, search for the word shortcut in // auto.
sal_Int32 nCapLttrPos = nPos+1; // on the 1st Character if( !nPos && !IsWordDelim( rTxt[ 0 ]))
--nCapLttrPos; // begin of paragraph and no blank
// no symbol characters if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nInsPos )) break;
if( IsAutoCorrFlag( ACFlags::Autocorrect ) && // tdf#134940 fix regression of arrow "-->" resulted by premature // replacement of "--" since '>' was added to IsAutoCorrectChar() '>' != cChar )
{ // WARNING ATTENTION: rTxt is an alias of the text node's OUString // and becomes INVALID if ChgAutoCorrWord returns true! // => use aPara/pPara to create a valid copy of the string!
OUString aPara;
OUString* pPara = IsAutoCorrFlag(ACFlags::CapitalStartSentence) ? &aPara : nullptr;
if( IsAutoCorrFlag( ACFlags::TransliterateRTL ) && GetDocLanguage( rDoc, nInsPos ) == LANGUAGE_HUNGARIAN )
{ // WARNING ATTENTION: rTxt is an alias of the text node's OUString // and becomes INVALID if TransliterateRTLWord returns true! if ( rDoc.TransliterateRTLWord( nCapLttrPos, nInsPos ) )
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5
¤ Diese beiden folgenden Angebotsgruppen bietet das Unternehmen0.67Angebot
Wie Sie bei der Firma Beratungs- und Dienstleistungen beauftragen können
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.