// We don't want to end up with 2GB read in one line just because of malformed // multiline fields, so chop it _somewhere_, which is twice supported columns // times arbitrary maximum cell content length, 2*1024*64K=128M, and because // it's sal_Unicode that's 256MB. If it's 2GB of data without LF we're out of // luck anyway.
constexpr sal_Int32 nArbitraryCellLengthLimit = SAL_MAX_UINT16;
constexpr sal_Int32 nArbitraryLineLengthLimit = 2 * MAXCOLCOUNT * nArbitraryCellLengthLimit;
namespace
{ constchar SYLK_LF[] = "\x1b :";
}
namespace {
enumclass SylkVersion
{
SCALC3, // Wrote wrongly quoted strings and unescaped semicolons.
OOO32, // Correct strings, plus multiline content.
OWN, // Place our new versions, if any, before this value.
OTHER // Assume that aliens wrote correct strings.
};
@return FIELDEND_QUOTEifendoffieldquote DONTKNOW_QUOTEanythingelse
*/ static QuoteType lcl_isFieldEndQuote( const sal_Unicode* p, const sal_Unicode* pSeps, sal_Unicode& rcDetectSep )
{ // Due to broken CSV generators that don't double embedded quotes check if // a field separator immediately or with trailing spaces follows the quote, // only then end the field, or at end of string.
constexpr sal_Unicode cBlank = ' '; if (p[1] == cBlank && ScGlobal::UnicodeStrChr( pSeps, cBlank)) return FIELDEND_QUOTE; // Detect a possible blank separator if it's not already in the list (which // was checked right above for p[1]==cBlank). constbool bBlankSep = (p[1] == cBlank && !rcDetectSep && p[2] && p[2] != cBlank); while (p[1] == cBlank)
++p; if (lcl_isFieldEnd( p[1], pSeps)) return FIELDEND_QUOTE; // Extended separator detection after a closing quote (with or without // blanks). Note that nQuotes is incremented *after* the call so is not yet // even here, and that with separator detection we reach here only if // lcl_isEscapedOrFieldEndQuote() did not already detect FIRST_QUOTE or // SECOND_QUOTE for an escaped embedded quote, thus nQuotes does not have // to be checked. if (!rcDetectSep)
{ static constexpr sal_Unicode vSep[] = { ',', '\t', ';' }; for (const sal_Unicode c : vSep)
{ if (p[1] == c)
{
rcDetectSep = c; return FIELDEND_QUOTE;
}
}
} // Blank separator is least significant, after others. if (bBlankSep)
{
rcDetectSep = cBlank; return FIELDEND_QUOTE;
} return DONTKNOW_QUOTE;
}
/** Determine if *p is a quote that is escaped by being doubled or ends a quotedfield.
enumclass DoubledQuoteMode
{
KEEP_ALL, // both are taken, additionally start and end quote are included in string
ESCAPE, // escaped quote, one is taken, one ignored
};
*/ staticconst sal_Unicode* lcl_ScanString( const sal_Unicode* p, OUString& rField, const sal_Unicode* pSeps, sal_Unicode cStr, DoubledQuoteMode eMode, bool& rbOverflowCell )
{
OUString aString; bool bClosingQuote = (eMode == DoubledQuoteMode::KEEP_ALL); const sal_Unicode* const pStart = p; if (eMode != DoubledQuoteMode::KEEP_ALL)
p++; //! jump over opening quote bool bCont; do
{
bCont = false; const sal_Unicode* p0 = p; for( ;; )
{ if (!*p)
{ // Encountering end of data after an opening quote is not a // quoted string, ReadCsvLine() concatenated lines with '\n' // for a properly quoted embedded linefeed. if (eMode == DoubledQuoteMode::KEEP_ALL) // Caller would append that data anyway, so we can do it // already here. break;
return pStart;
}
if( *p == cStr )
{ if ( *++p != cStr )
{ // break or continue for loop if (eMode == DoubledQuoteMode::ESCAPE)
{
sal_Unicode cDetectSep = 0xffff; // No separator detection here. if (lcl_isFieldEndQuote( p-1, pSeps, cDetectSep) == FIELDEND_QUOTE)
{
bClosingQuote = true; break;
} else continue;
} else break;
} // doubled quote char switch ( eMode )
{ case DoubledQuoteMode::KEEP_ALL :
p++; // both for us (not breaking for-loop) break; case DoubledQuoteMode::ESCAPE :
p++; // one for us (breaking for-loop)
bCont = true; // and more break;
} if ( eMode == DoubledQuoteMode::ESCAPE ) break;
} else
p++;
} if ( p0 < p )
{ if (!lcl_appendLineData( aString, p0, ((eMode != DoubledQuoteMode::KEEP_ALL && (*p || *(p-1) == cStr)) ? p-1 : p)))
rbOverflowCell = true;
}
} while ( bCont );
if (!bClosingQuote) return pStart;
if (!aString.isEmpty())
rField += aString;
return p;
}
staticvoid lcl_UnescapeSylk( OUString & rString, SylkVersion eVersion )
{ // Older versions didn't escape the semicolon. // Older versions quoted the string and doubled embedded quotes, but not // the semicolons, which was plain wrong. if (eVersion >= SylkVersion::OOO32)
rString = rString.replaceAll(";;", ";"); else
rString = rString.replaceAll("\"\"", "\"");
rString = rString.replaceAll(SYLK_LF, "\n");
}
staticconst sal_Unicode* lcl_ScanSylkString( const sal_Unicode* p,
OUString& rString, SylkVersion eVersion )
{ const sal_Unicode* pStartQuote = p; const sal_Unicode* pEndQuote = nullptr; while( *(++p) )
{ if( *p == '"' )
{
pEndQuote = p; if (eVersion >= SylkVersion::OOO32)
{ if (*(p+1) == ';')
{ if (*(p+2) == ';')
{
p += 2; // escaped ';'
pEndQuote = nullptr;
} else break; // end field
}
} else
{ if (*(p+1) == '"')
{
++p; // escaped '"'
pEndQuote = nullptr;
} elseif (*(p+1) == ';') break; // end field
}
}
} if (!pEndQuote)
pEndQuote = p; // Take all data as string.
rString += std::u16string_view(pStartQuote + 1, pEndQuote - pStartQuote - 1 );
lcl_UnescapeSylk( rString, eVersion); return p;
}
staticconst sal_Unicode* lcl_ScanSylkFormula( const sal_Unicode* p,
OUString& rString, SylkVersion eVersion )
{ const sal_Unicode* pStart = p; if (eVersion >= SylkVersion::OOO32)
{ while (*p)
{ if (*p == ';')
{ if (*(p+1) == ';')
++p; // escaped ';' else break; // end field
}
++p;
}
rString += std::u16string_view( pStart, p - pStart);
lcl_UnescapeSylk( rString, eVersion);
} else
{ // Nasty. If in old versions the formula contained a semicolon, it was // quoted and embedded quotes were doubled, but semicolons were not. If // there was no semicolon, it could still contain quotes and doubled // embedded quotes if it was something like ="a""b", which was saved as // E"a""b" as is and has to be preserved, even if older versions // couldn't even load it correctly. However, theoretically another // field might follow and thus the line contain a semicolon again, such // as ...;E"a""b";... bool bQuoted = false; if (*p == '"')
{ // May be a quoted expression or just a string constant expression // with quotes. while (*(++p))
{ if (*p == '"')
{ if (*(p+1) == '"')
++p; // escaped '"' else break; // closing '"', had no ';' yet
} elseif (*p == ';')
{
bQuoted = true; // ';' within quoted expression break;
}
}
p = pStart;
} if (bQuoted)
p = lcl_ScanSylkString( p, rString, eVersion); else
{ while (*p && *p != ';')
++p;
rString += std::u16string_view( pStart, p - pStart);
}
} return p;
}
staticvoid lcl_WriteString( SvStream& rStrm, OUString& rString, sal_Unicode cQuote, sal_Unicode cEsc )
{ if (cEsc)
{ // the goal is to replace cStr by cStr+cStr
OUString strFrom(cEsc);
OUString strTo = strFrom + strFrom;
rString = rString.replaceAll(strFrom, strTo);
}
sal_uInt16 nFound = 0; bool bInNum = false; for (sal_Int32 nPos = 0; nPos < nLen && (bInNum || nFound < nMaxNumberParts); ++nPos)
{ bool bLetter = false; if (rtl::isAsciiDigit(rStr[nPos]) ||
(((!bInNum && nFound==nMP) || (bInNum && nFound==nMP+1))
&& (bLetter = ScGlobal::getCharClass().isLetterNumeric( rStr, nPos))))
{ if (!bInNum)
{
bInNum = true;
nStart[nFound] = nPos;
++nFound;
}
nEnd[nFound-1] = nPos; if (bIso && (bLetter || (2 <= nFound && nFound <= 6 && nPos > nStart[nFound-1] + 1))) // Each M,D,h,m,s at most 2 digits.
bIso = false;
} else
{
bInNum = false; if (bIso)
{ // ([+-])YYYY-MM-DD([T ]hh:mm(:ss(.fff)))(([+-])TZ) // XXX NOTE: timezone is accepted here, but number // formatter parser will not, so the end result will be // type Text to preserve timezone information. switch (rStr[nPos])
{ case'+': if (nFound >= 5 && nPos == nEnd[nFound-1] + 1) // Accept timezone offset.
; elseif (nPos > 0) // Accept one leading sign.
bIso = false; break; case'-': if (nFound >= 5 && nPos == nEnd[nFound-1] + 1) // Accept timezone offset.
; elseif (nFound == 0 && nPos > 0) // Accept one leading sign.
bIso = false; elseif (nFound < 1 || 2 < nFound || nPos != nEnd[nFound-1] + 1) // Not immediately after 1 or 1-2
bIso = false; break; case'T': case' ': if (nFound != 3 || nPos != nEnd[nFound-1] + 1) // Not immediately after 1-2-3
bIso = false; break; case':': if (nFound < 4 || 5 < nFound || nPos != nEnd[nFound-1] + 1) // Not at 1-2-3T4:5:
bIso = false; break; case'.': case',': if (nFound != 6 || nPos != nEnd[nFound-1] + 1) // Not at 1-2-3T4:5:6.
bIso = false; break; case'Z': if (nFound >= 5 && nPos == nEnd[nFound-1] + 1) // Accept Zero timezone.
; else
bIso = false; break; default:
bIso = false;
}
}
}
}
if (nFound < 3)
bIso = false;
if (bIso)
{ // Leave conversion and detection of various possible number // formats to the number formatter. ISO is recognized in any locale // so we can directly use the document's formatter.
sal_uInt32 nFormat = 0; double fVal = 0.0;
SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable(); if (pDocFormatter->IsNumberFormat( rStr, nFormat, fVal))
{ if (pDocFormatter->GetType(nFormat) & SvNumFormatType::DATE)
{
ScAddress aPos(nCol,nRow,nTab); if (bUseDocImport)
rDocImport.setNumericCell(aPos, fVal); else
rDoc.SetValue(aPos, fVal);
rDoc.SetNumberFormat(aPos, nFormat);
return bMultiLine; // success
}
} // If we reach here it is type Text (e.g. timezone or trailing // characters). Handled below.
}
if ( nFound == 1 )
{ // try to break one number (without separators) into date fields
if ( nDateLen >= 5 && nDateLen <= 8 &&
ScGlobal::getCharClass().isNumeric( rStr.copy( nDateStart, nDateLen ) ) )
{ // 6 digits: 2 each for day, month, year // 8 digits: 4 for year, 2 each for day and month // 5 or 7 digits: first field is shortened by 1
CalendarWrapper* pCalendar = (bSecondCal ? pSecondCalendar : &rCalendar);
sal_Int16 nNumMonths = pCalendar->getNumberOfMonthsInYear(); if ( nDay && nMonth && nDay<=31 && nMonth<=nNumMonths )
{
--nMonth;
pCalendar->setValue( i18n::CalendarFieldIndex::DAY_OF_MONTH, nDay );
pCalendar->setValue( i18n::CalendarFieldIndex::MONTH, nMonth );
pCalendar->setValue( i18n::CalendarFieldIndex::YEAR, nYear );
sal_Int16 nHour, nMinute, nSecond; // #i14974# The imported value should have no fractional value, so set the // time fields to zero (ICU calendar instance defaults to current date/time)
nHour = nMinute = nSecond = 0; if (nFound > 3)
nHour = static_cast<sal_Int16>(o3tl::toInt32(rStr.subView( nStart[3], nEnd[3]+1-nStart[3]))); if (nFound > 4)
nMinute = static_cast<sal_Int16>(o3tl::toInt32(rStr.subView( nStart[4], nEnd[4]+1-nStart[4]))); if (nFound > 5)
nSecond = static_cast<sal_Int16>(o3tl::toInt32(rStr.subView( nStart[5], nEnd[5]+1-nStart[5]))); // do not use calendar's milliseconds, to avoid fractional part truncation double fFrac = 0.0; if (nFound > 6)
{
sal_Unicode cDec = '.';
OUString aT = OUStringChar(cDec) + rStr.subView( nStart[6], nEnd[6]+1-nStart[6]);
rtl_math_ConversionStatus eStatus; double fV = rtl::math::stringToDouble( aT, cDec, 0, &eStatus ); if (eStatus == rtl_math_ConversionStatus_Ok)
fFrac = fV / 86400.0;
}
sal_Int32 nPos; if (nFound > 3 && 1 <= nHour && nHour <= 12// nHour 0 and >=13 can't be AM/PM
&& (nPos = nEnd[nFound-1] + 1) < nLen)
{ // Dreaded AM/PM may be following. while (nPos < nLen && rStr[nPos] == ' ')
++nPos; if (nPos < nLen)
{
sal_Int32 nStop = nPos; while (nStop < nLen && rStr[nStop] != ' ')
++nStop;
OUString aAmPm = rStr.copy( nPos, nStop - nPos); // For AM only 12 needs to be treated, whereas for PM // it must not. Check both, locale and second/English // strings. if (nHour == 12 &&
(rTransliteration.isEqual( aAmPm, pFormatter->GetLocaleData()->getTimeAM()) ||
(pSecondTransliteration && pSecondTransliteration->isEqual( aAmPm, u"AM"_ustr))))
{
nHour = 0;
} elseif (nHour < 12 &&
(rTransliteration.isEqual( aAmPm, pFormatter->GetLocaleData()->getTimePM()) ||
(pSecondTransliteration && pSecondTransliteration->isEqual( aAmPm, u"PM"_ustr))))
{
nHour += 12;
}
}
}
pCalendar->setValue( i18n::CalendarFieldIndex::HOUR, nHour );
pCalendar->setValue( i18n::CalendarFieldIndex::MINUTE, nMinute );
pCalendar->setValue( i18n::CalendarFieldIndex::SECOND, nSecond );
pCalendar->setValue( i18n::CalendarFieldIndex::MILLISECOND, 0 ); if ( pCalendar->isValid() )
{ // Whole days diff. double fDiff = DateTime::Sub( DateTime(pDocFormatter->GetNullDate()),
pCalendar->getEpochStart()); // #i14974# must use getLocalDateTime to get the same // date values as set above double fDays = pCalendar->getLocalDateTime() + fFrac;
fDays -= fDiff;
LanguageType eLatin, eCjk, eCtl;
rDoc.GetLanguage( eLatin, eCjk, eCtl );
LanguageType eDocLang = eLatin; //! which language for date formats?
SvNumFormatType nType = (nFound > 3 ? SvNumFormatType::DATETIME : SvNumFormatType::DATE);
sal_uLong nFormat = pDocFormatter->GetStandardFormat( nType, eDocLang ); // maybe there is a special format including seconds or milliseconds if (nFound > 5)
nFormat = pDocFormatter->GetStandardFormat( fDays, nFormat, nType, eDocLang);
while(--nSkipLines>0)
{
aLine = ReadCsvLine(rStrm, !bFixed, aSeps, cStr, cDetectSep); // content is ignored if ( rStrm.eof() ) break;
}
// Determine range for Undo. // We don't need this during import of a file to a new sheet or document... bool bDetermineRange = bUndo; bool bColumnsAreDetermined = false;
// Row heights don't need to be adjusted on the fly if EndPaste() is called // afterwards, which happens only if bDetermineRange. This variable also // survives the toggle of bDetermineRange down at the end of the do{} loop. bool bRangeIsDetermined = bDetermineRange;
ScDocumentImport aDocImport(rDoc); do
{ const SCCOL nLastCol = nEndCol; // tdf#129701 preserve value of nEndCol for( ;; )
{
aLine = ReadCsvLine(rStrm, !bFixed, aSeps, cStr, cDetectSep); if ( rStrm.eof() && aLine.isEmpty() ) break;
assert(pSeps == aSeps.getStr());
if ( nRow > rDoc.MaxRow() )
{
bOverflowRow = true; // display warning on import break; // for
}
if (!bDetermineRange)
EmbeddedNullTreatment( aLine);
sal_Int32 nLineLen = aLine.getLength();
SCCOL nCol = nStartCol; bool bMultiLine = false; if ( bFixed ) // Fixed line length
{ if (bDetermineRange)
{ if (!bColumnsAreDetermined)
{ // Yes, the check is nCol<=rDoc.MaxCol()+1, +1 because it // is only an overflow if there is really data following to // be put behind the last column, which doesn't happen if // info is SC_COL_SKIP. for (i=0; i < nInfoCount && nCol <= rDoc.MaxCol()+1; ++i)
{ const sal_uInt8 nFmt = pColFormat[i]; if (nFmt != SC_COL_SKIP) // otherwise don't increment nCol either
{ if (nCol > rDoc.MaxCol())
bOverflowCol = true; // display warning on import
++nCol;
}
}
bColumnsAreDetermined = true;
}
} else
{
sal_Int32 nStartIdx = 0; // Same maxcol+1 check reason as above. for (i=0; i < nInfoCount && nCol <= rDoc.MaxCol()+1; ++i)
{
sal_Int32 nNextIdx = nStartIdx; if (i + 1 < nInfoCount)
CountVisualWidth( aLine, nNextIdx, pColStart[i+1] - pColStart[i] ); else
nNextIdx = nLineLen;
sal_uInt8 nFmt = pColFormat[i]; if (nFmt != SC_COL_SKIP) // otherwise don't increment nCol either
{ if (nCol > rDoc.MaxCol())
bOverflowCol = true; // display warning on import else
{ bool bIsQuoted = false;
aCell = lcl_GetFixed( aLine, nStartIdx, nNextIdx, bIsQuoted, bOverflowCell ); if (bIsQuoted && bQuotedAsText)
nFmt = SC_COL_TEXT;
bMultiLine |= lcl_PutString(
aDocImport, !mbOverwriting, nCol, nRow, nTab, aCell, nFmt,
&aNumFormatter, bDetectNumFormat, bDetectSciNumFormat, bEvaluateFormulas, bSkipEmptyCells,
aTransliteration, aCalendar,
pEnglishTransliteration.get(), pEnglishCalendar.get());
}
++nCol;
}
nStartIdx = nNextIdx;
}
}
} else// Search for the separator
{
SCCOL nSourceCol = 0;
sal_uInt16 nInfoStart = 0; const sal_Unicode* p = aLine.getStr(); // tdf#129701 if there is only one column, and user wants to treat empty cells, // we need to detect *p = null bool bIsLastColEmpty = !(*p) && !bSkipEmptyCells && !bDetermineRange; // Yes, the check is nCol<=rDoc.MaxCol()+1, +1 because it is only an // overflow if there is really data following to be put behind // the last column, which doesn't happen if info is // SC_COL_SKIP. while ( (*p || bIsLastColEmpty) && nCol <= rDoc.MaxCol()+1)
{ bool bIsQuoted = false;
p = ScImportExport::ScanNextFieldFromString( p, aCell,
cStr, pSeps, bMerge, bIsQuoted, bOverflowCell, bRemoveSpace );
sal_uInt8 nFmt = SC_COL_STANDARD; for ( i=nInfoStart; i<nInfoCount; i++ )
{ if ( pColStart[i] == nSourceCol + 1 ) // pColStart is 1-based
{
nFmt = pColFormat[i];
nInfoStart = i + 1; // ColInfos are in succession break; // for
}
} if ( nFmt != SC_COL_SKIP )
{ if (nCol > rDoc.MaxCol())
bOverflowCol = true; // display warning on import elseif (!bDetermineRange)
{ if (bIsQuoted && bQuotedAsText)
nFmt = SC_COL_TEXT;
bMultiLine |= lcl_PutString(
aDocImport, !mbOverwriting, nCol, nRow, nTab, aCell, nFmt,
&aNumFormatter, bDetectNumFormat, bDetectSciNumFormat, bEvaluateFormulas, bSkipEmptyCells,
aTransliteration, aCalendar,
pEnglishTransliteration.get(), pEnglishCalendar.get());
}
++nCol; if (bIsLastColEmpty)
{
bIsLastColEmpty = false; // toggle to stop
} else
{ // tdf#129701 detect if there is a last empty column when we need it
bIsLastColEmpty = (nCol == nLastCol) && !(*p) && !bSkipEmptyCells && !bDetermineRange;
}
}
++nSourceCol;
}
} if (nEndCol < nCol)
nEndCol = nCol; //! points to the next free or even rDoc.MaxCol()+2
if (!bDetermineRange)
{ if (bMultiLine && !bRangeIsDetermined && pDocSh)
{ // Adjust just once at the end for a whole range.
nFirstUpdateRowHeight = std::min( nFirstUpdateRowHeight, nRow );
nLastUpdateRowHeight = std::max( nLastUpdateRowHeight, nRow );
}
xProgress->SetStateOnPercent( rStrm.Tell() - nOldPos );
}
++nRow;
} // so far nRow/nEndCol pointed to the next free if (nRow > nStartRow)
--nRow; if (nEndCol > nStartCol)
nEndCol = ::std::min( static_cast<SCCOL>(nEndCol - 1), rDoc.MaxCol());
if (bDetermineRange)
{
aRange.aEnd.SetCol( nEndCol );
aRange.aEnd.SetRow( nRow );
void ScImportExport::EmbeddedNullTreatment( OUString & rStr )
{ // A nasty workaround for data with embedded NULL characters. As long as we // can't handle them properly as cell content (things assume 0-terminated // strings at too many places) simply strip all NULL characters from raw // data. Excel does the same. See fdo#57841 for sample data.
// The normal case is no embedded NULL, check first before de-/allocating // ustring stuff.
sal_Unicode cNull = 0; if (sal_Int32 pos = rStr.indexOf(cNull); pos >= 0)
{
rStr = rStr.replaceAll(std::u16string_view(&cNull, 1), u"", pos);
}
}
const sal_Unicode* ScImportExport::ScanNextFieldFromString( const sal_Unicode* p,
OUString& rField, sal_Unicode cStr, const sal_Unicode* pSeps, bool bMergeSeps, bool& rbIsQuoted, bool& rbOverflowCell, bool bRemoveSpace )
{
rbIsQuoted = false;
rField.clear(); const sal_Unicode cBlank = ' '; if (cStr && !ScGlobal::UnicodeStrChr(pSeps, cBlank))
{ // Cope with broken generators that put leading blanks before a quoted // field, like "field1", "field2", "..." // NOTE: this is not in conformance with http://tools.ietf.org/html/rfc4180 const sal_Unicode* pb = p; while (*pb == cBlank)
++pb; if (*pb == cStr)
p = pb;
} if (cStr && *p == cStr) // String in quotes
{
rbIsQuoted = true; const sal_Unicode* p1;
p1 = p = lcl_ScanString( p, rField, pSeps, cStr, DoubledQuoteMode::ESCAPE, rbOverflowCell ); while (!lcl_isFieldEnd( *p, pSeps))
p++; // Append remaining unquoted and undelimited data (dirty, dirty) to // this field. if (p > p1)
{ const sal_Unicode* ptrim_f = p; if ( bRemoveSpace )
{ while ( ptrim_f > p1 && ( *(ptrim_f - 1) == cBlank ) )
--ptrim_f;
} if (!lcl_appendLineData( rField, p1, ptrim_f))
rbOverflowCell = true;
} if( *p )
p++;
} else// up to delimiter
{ const sal_Unicode* p0 = p; while (!lcl_isFieldEnd( *p, pSeps))
p++; const sal_Unicode* ptrim_i = p0; const sal_Unicode* ptrim_f = p; // [ptrim_i,ptrim_f) is cell data after trimming if ( bRemoveSpace )
{ while ( ptrim_i < ptrim_f && *ptrim_i == cBlank )
++ptrim_i; while ( ptrim_f > ptrim_i && ( *(ptrim_f - 1) == cBlank ) )
--ptrim_f;
} if (!lcl_appendLineData( rField, ptrim_i, ptrim_f))
rbOverflowCell = true; if( *p )
p++;
} if ( bMergeSeps ) // skip following delimiters
{ while (*p && ScGlobal::UnicodeStrChr( pSeps, *p))
p++;
} return p;
}
namespace {
/** *Checkifagivenstringhasanylinebreakcharactersorseparators. * *@paramrStrstringtoinspect. *@paramcSepseparatorcharacter.
*/ bool hasLineBreaksOrSeps( const OUString& rStr, sal_Unicode cSep )
{ const sal_Unicode* p = rStr.getStr(); for (sal_Int32 i = 0, n = rStr.getLength(); i < n; ++i, ++p)
{
sal_Unicode c = *p; if (c == cSep) // separator found. returntrue;
bool ScImportExport::Doc2Dif( SvStream& rStrm )
{ // for DIF in the clipboard, IBM_850 is always used
ScFormatFilter::Get().ScExportDif( rStrm, rDoc, aRange, RTL_TEXTENCODING_IBM_850 ); returntrue;
}
// for DIF in the clipboard, IBM_850 is always used
ScFormatFilter::Get().ScImportDif( rStrm, pImportDoc.get(), aRange.aStart, RTL_TEXTENCODING_IBM_850 );
SCCOL nEndCol;
SCROW nEndRow;
pImportDoc->GetCellArea( nTab, nEndCol, nEndRow ); // if there are no cells in the imported content, nEndCol/nEndRow may be before the start if ( nEndCol < aRange.aStart.Col() )
nEndCol = aRange.aStart.Col(); if ( nEndRow < aRange.aStart.Row() )
nEndRow = aRange.aStart.Row();
aRange.aEnd = ScAddress( nEndCol, nEndRow, nTab );
// If this is set, read from this file, instead of the real clipboard during paste. char* pEnv = getenv("SC_DEBUG_HTML_PASTE_FROM"); if (pEnv)
{
OUString aURL;
osl::FileBase::getFileURLFromSystemPath(OUString::fromUtf8(pEnv), aURL);
SvFileStream aStream(aURL, StreamMode::READ);
pImp->Read( aStream, rBaseURL );
} else
{
pImp->Read( rStrm, rBaseURL );
}
aRange = pImp->GetRange();
bool bOk = StartPaste(); if (bOk)
{ // ScHTMLImport may call ScDocument::InitDrawLayer, resulting in // a Draw Layer but no Draw View -> create Draw Layer and View here if (pDocSh)
pDocSh->MakeDrawLayer();
// Precondition: pStr is guaranteed to be non-NULL and points to a 0-terminated // array. staticconst sal_Unicode* lcl_UnicodeStrChr( const sal_Unicode* pStr,
sal_Unicode c )
{ while (*pStr)
{ if (*pStr == c) return pStr;
++pStr;
} return nullptr;
}
sal_Int32 nLastOffset = 0;
sal_Int32 nQuotes = 0; while (!rStream.eof() && aStr.getLength() < nArbitraryLineLengthLimit)
{ const sal_Unicode * p = aStr.getStr() + nLastOffset; const sal_Unicode * const pStop = aStr.getStr() + aStr.getLength(); while (p < pStop)
{ if (!*p)
{ // Skip embedded null-characters. They don't change // anything and are handled at a higher level.
++p; continue;
}
if (nQuotes)
{ if (*p == cFieldQuote)
{ if (bFieldStart)
{
++nQuotes;
bFieldStart = false;
eQuoteState = FIELDSTART_QUOTE;
nFirstLineLength = aStr.getLength();
nFirstLineStreamPos = rStream.Tell();
} // Do not detect a FIELDSTART_QUOTE if not in // bFieldStart mode, in which case for unquoted content // we are in FIELDEND_QUOTE state. elseif (eQuoteState != FIELDEND_QUOTE)
{
eQuoteState = lcl_isEscapedOrFieldEndQuote( nQuotes, p, pSeps, cFieldQuote, rcDetectSep);
// DONTKNOW_QUOTE is an embedded unescaped quote we // don't count for pairing. if (eQuoteState != DONTKNOW_QUOTE)
++nQuotes;
}
} elseif (eQuoteState == FIELDEND_QUOTE)
{ if (bFieldStart) // If blank is a separator it starts a field, if it // is not and thus maybe leading before quote we // are still at start of field regarding quotes.
bFieldStart = (*p == ' ' || lcl_UnicodeStrChr( pSeps, *p) != nullptr); else
bFieldStart = (lcl_UnicodeStrChr( pSeps, *p) != nullptr);
}
} else
{ if (*p == cFieldQuote && bFieldStart)
{
nQuotes = 1;
eQuoteState = FIELDSTART_QUOTE;
bFieldStart = false;
nFirstLineLength = aStr.getLength();
nFirstLineStreamPos = rStream.Tell();
} elseif (eQuoteState == FIELDEND_QUOTE)
{ // This also skips leading blanks at beginning of line // if followed by a quote. It's debatable whether we // actually want that or not, but congruent with what // ScanNextFieldFromString() does. if (bFieldStart)
bFieldStart = (*p == ' ' || lcl_UnicodeStrChr( pSeps, *p) != nullptr); else
bFieldStart = (lcl_UnicodeStrChr( pSeps, *p) != nullptr);
}
} // A quote character inside a field content does not start // a quote.
++p;
}
if ((nQuotes & 1) == 0) // We still have a (theoretical?) problem here if due to // nArbitraryLineLengthLimit (or nMaxSourceLines below) we // split a string right between a doubled quote pair. break; elseif (eQuoteState == DONTKNOW_QUOTE) // A single unescaped quote somewhere in a quote started // field, most likely that was not meant to have embedded // linefeeds either. break; elseif (++nLine >= nMaxSourceLines && nMaxSourceLines > 0) // Unconditionally increment nLine even if nMaxSourceLines==0 // so it can be observed in debugger. break; else
{
nLastOffset = aStr.getLength();
OUString aNext;
rStream.ReadUniOrByteStringLine(aNext, rStream.GetStreamCharSet(), nArbitraryLineLengthLimit); if (!rStream.eof())
aStr += "\n" + aNext;
}
} if (nQuotes & 1)
{ // No closing quote at all. A single quote at field start => no // embedded linefeeds for that field, take only first logical line.
aStr = aStr.copy( 0, nFirstLineLength);
rStream.Seek( nFirstLineStreamPos);
}
} return aStr;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.