/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This file incorporates work covered by the following license notice:
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
/* TODO! This file should not be called textenc.c, because it is not the
implementation of rtl/textenc.h. Rather, it should be called
gettextencodingdata.c. */
#include <sal/config.h>
#include <cstdlib>
#include <osl/module.hxx>
#include <rtl/textenc.h>
#include <rtl/tencinfo.h>
#include <sal/log.hxx>
#include <sal/types.h>
#include "convertsimple.hxx"
#include "gettextencodingdata.hxx"
#include "tcvtutf8.hxx"
#include "tenchelp.hxx"
#define NOTABUNI_START
0 xFF
#define NOTABUNI_END
0 x00
#define NOTABCHAR_START
0 xFFFF
#define NOTABCHAR_END
0 x0000
#define SAME8090UNI_START
0 x80
#define SAME8090UNI_END
0 x9F
sal_uInt16
const aImpl8090SameToUniTab[SAME8090UNI_END
- SAME8090UNI_START
+
1 ]
= {
0 x0080,
0 x0081,
0 x0082,
0 x0083,
0 x0084,
0 x0085,
0 x0086,
0 x0087,
/* 0x80 */
0 x0088,
0 x0089,
0 x008A,
0 x008B,
0 x008C,
0 x008D,
0 x008E,
0 x008F,
0 x0090,
0 x0091,
0 x0092,
0 x0093,
0 x0094,
0 x0095,
0 x0096,
0 x0097,
/* 0x90 */
0 x0098,
0 x0099,
0 x009A,
0 x009B,
0 x009C,
0 x009D,
0 x009E,
0 x009F };
#define SAME8090CHAR_START
0 x0080
#define SAME8090CHAR_END
0 x009F
unsigned char const aImpl8090SameToCharTab[SAME8090CHAR_END
- SAME8090CHAR_START
+
1 ]
= {
0 x80,
0 x81,
0 x82,
0 x83,
0 x84,
0 x85,
0 x86,
0 x87,
/* 0x0080 */
0 x88,
0 x89,
0 x8A,
0 x8B,
0 x8C,
0 x8D,
0 x8E,
0 x8F,
0 x90,
0 x91,
0 x92,
0 x93,
0 x94,
0 x95,
0 x96,
0 x97,
/* 0x0090 */
0 x98,
0 x99,
0 x9A,
0 x9B,
0 x9C,
0 x9D,
0 x9E,
0 x9F };
#define SAMEA0FFCHAR_START
0 x00A0
#define SAMEA0FFCHAR_END
0 x00FF
unsigned char const aImplA0FFSameToCharTab[SAMEA0FFCHAR_END
- SAMEA0FFCHAR_START
+
1 ]
= {
0 xA0,
0 xA1,
0 xA2,
0 xA3,
0 xA4,
0 xA5,
0 xA6,
0 xA7,
/* 0x00A0 */
0 xA8,
0 xA9,
0 xAA,
0 xAB,
0 xAC,
0 xAD,
0 xAE,
0 xAF,
0 xB0,
0 xB1,
0 xB2,
0 xB3,
0 xB4,
0 xB5,
0 xB6,
0 xB7,
/* 0x00B0 */
0 xB8,
0 xB9,
0 xBA,
0 xBB,
0 xBC,
0 xBD,
0 xBE,
0 xBF,
0 xC0,
0 xC1,
0 xC2,
0 xC3,
0 xC4,
0 xC5,
0 xC6,
0 xC7,
/* 0x00C0 */
0 xC8,
0 xC9,
0 xCA,
0 xCB,
0 xCC,
0 xCD,
0 xCE,
0 xCF,
0 xD0,
0 xD1,
0 xD2,
0 xD3,
0 xD4,
0 xD5,
0 xD6,
0 xD7,
/* 0x00D0 */
0 xD8,
0 xD9,
0 xDA,
0 xDB,
0 xDC,
0 xDD,
0 xDE,
0 xDF,
0 xE0,
0 xE1,
0 xE2,
0 xE3,
0 xE4,
0 xE5,
0 xE6,
0 xE7,
/* 0x00E0 */
0 xE8,
0 xE9,
0 xEA,
0 xEB,
0 xEC,
0 xED,
0 xEE,
0 xEF,
0 xF0,
0 xF1,
0 xF2,
0 xF3,
0 xF4,
0 xF5,
0 xF6,
0 xF7,
/* 0x00F0 */
0 xF8,
0 xF9,
0 xFA,
0 xFB,
0 xFC,
0 xFD,
0 xFE,
0 xFF };
/* ======================================================================= */
/* MS-1252 */
/* Windows Standard CharSet (ANSI) for Western Script */
/* 1-Byte, 0x00-0x7F ASCII without exception */
/* Convert-Tables: mappings/vendors/micsft/windows/cp1252.txt from 04/15/98 Version 2.01 */
/* Last-Changes from us: */
/* ----------------------------------------------------------------------- */
#define MS1252UNI_START
0 x80
#define MS1252UNI_END
0 xFF
sal_uInt16
const aImplMS1252ToUniTab[MS1252UNI_END - MS1252UNI_START +
1 ] =
{
/* 0 1 2 3 4 5 6 7 */
/* 8 9 A B C D E F */
0 x20AC,
0 ,
0 x201A,
0 x0192,
0 x201E,
0 x2026,
0 x2020,
0 x2021,
/* 0x80 */
0 x02C6,
0 x2030,
0 x0160,
0 x2039,
0 x0152,
0 ,
0 x017D,
0 ,
/* 0x80 */
0 ,
0 x2018,
0 x2019,
0 x201C,
0 x201D,
0 x2022,
0 x2013,
0 x2014,
/* 0x90 */
0 x02DC,
0 x2122,
0 x0161,
0 x203A,
0 x0153,
0 ,
0 x017E,
0 x0178,
/* 0x90 */
0 x00A0,
0 x00A1,
0 x00A2,
0 x00A3,
0 x00A4,
0 x00A5,
0 x00A6,
0 x00A7,
/* 0xA0 */
0 x00A8,
0 x00A9,
0 x00AA,
0 x00AB,
0 x00AC,
0 x00AD,
0 x00AE,
0 x00AF,
/* 0xA0 */
0 x00B0,
0 x00B1,
0 x00B2,
0 x00B3,
0 x00B4,
0 x00B5,
0 x00B6,
0 x00B7,
/* 0xB0 */
0 x00B8,
0 x00B9,
0 x00BA,
0 x00BB,
0 x00BC,
0 x00BD,
0 x00BE,
0 x00BF,
/* 0xB0 */
0 x00C0,
0 x00C1,
0 x00C2,
0 x00C3,
0 x00C4,
0 x00C5,
0 x00C6,
0 x00C7,
/* 0xC0 */
0 x00C8,
0 x00C9,
0 x00CA,
0 x00CB,
0 x00CC,
0 x00CD,
0 x00CE,
0 x00CF,
/* 0xC0 */
0 x00D0,
0 x00D1,
0 x00D2,
0 x00D3,
0 x00D4,
0 x00D5,
0 x00D6,
0 x00D7,
/* 0xD0 */
0 x00D8,
0 x00D9,
0 x00DA,
0 x00DB,
0 x00DC,
0 x00DD,
0 x00DE,
0 x00DF,
/* 0xD0 */
0 x00E0,
0 x00E1,
0 x00E2,
0 x00E3,
0 x00E4,
0 x00E5,
0 x00E6,
0 x00E7,
/* 0xE0 */
0 x00E8,
0 x00E9,
0 x00EA,
0 x00EB,
0 x00EC,
0 x00ED,
0 x00EE,
0 x00EF,
/* 0xE0 */
0 x00F0,
0 x00F1,
0 x00F2,
0 x00F3,
0 x00F4,
0 x00F5,
0 x00F6,
0 x00F7,
/* 0xF0 */
0 x00F8,
0 x00F9,
0 x00FA,
0 x00FB,
0 x00FC,
0 x00FD,
0 x00FE,
0 x00FF
/* 0xF0 */
};
/* ----------------------------------------------------------------------- */
#define MS1252TOCHARTABEX_COUNT
27
ImplUniCharTabData
const aImplMS1252ToCharTabEx[MS1252TOCHARTABEX_COUNT] =
{
{
0 x0152,
0 x8C,
0 },
{
0 x0153,
0 x9C,
0 },
{
0 x0160,
0 x8A,
0 },
{
0 x0161,
0 x9A,
0 },
{
0 x0178,
0 x9F,
0 },
{
0 x017D,
0 x8E,
0 },
{
0 x017E,
0 x9E,
0 },
{
0 x0192,
0 x83,
0 },
{
0 x02C6,
0 x88,
0 },
{
0 x02DC,
0 x98,
0 },
{
0 x2013,
0 x96,
0 },
{
0 x2014,
0 x97,
0 },
{
0 x2018,
0 x91,
0 },
{
0 x2019,
0 x92,
0 },
{
0 x201A,
0 x82,
0 },
{
0 x201C,
0 x93,
0 },
{
0 x201D,
0 x94,
0 },
{
0 x201E,
0 x84,
0 },
{
0 x2020,
0 x86,
0 },
{
0 x2021,
0 x87,
0 },
{
0 x2022,
0 x95,
0 },
{
0 x2026,
0 x85,
0 },
{
0 x2030,
0 x89,
0 },
{
0 x2039,
0 x8B,
0 },
{
0 x203A,
0 x9B,
0 },
{
0 x20AC,
0 x80,
0 },
{
0 x2122,
0 x99,
0 },
};
/* ----------------------------------------------------------------------- */
ImplByteConvertData
const aImplMS1252ByteCvtData =
{
aImplMS1252ToUniTab,
nullptr,
aImplA0FFSameToCharTab,
nullptr,
aImplMS1252ToCharTabEx,
MS1252UNI_START, MS1252UNI_END,
NOTABUNI_START, NOTABUNI_END,
SAMEA0FFCHAR_START, SAMEA0FFCHAR_END,
NOTABCHAR_START, NOTABCHAR_END,
MS1252TOCHARTABEX_COUNT
};
/* ----------------------------------------------------------------------- */
ImplTextEncodingData
const aImplMS1252TextEncodingData
= { { &aImplMS1252ByteCvtData,
sal::detail::textenc::convertCharToUnicode,
sal::detail::textenc::convertUnicodeToChar,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr },
"iso8859-1" ,
"windows-1252" ,
1 ,
1 ,
1 ,
0 ,
RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME };
/* WIN, SCRIPT_LATIN, pc code page 850 */
/* ======================================================================= */
/* ISO-8859-1 */
/* Unix Standard CharSet (Latin1) for Western Script */
/* 1-Byte, 0x00-0x7F ASCII without exception, 0x80-0x9F control character like in Unicode */
/* Convert-Tables: mappings/iso8859/8859-1.txt from 07/27/99 Version 1.0 (based on Unicode 3.0) */
/* Last-Changes from us: */
#define ISO88591UNI_START
0 xA0
#define ISO88591UNI_END
0 xFF
sal_uInt16
const aImplISO88591ToUniTab[ISO88591UNI_END - ISO88591UNI_START +
1 ] =
{
/* 0 1 2 3 4 5 6 7 */
/* 8 9 A B C D E F */
0 x00A0,
0 x00A1,
0 x00A2,
0 x00A3,
0 x00A4,
0 x00A5,
0 x00A6,
0 x00A7,
/* 0xA0 */
0 x00A8,
0 x00A9,
0 x00AA,
0 x00AB,
0 x00AC,
0 x00AD,
0 x00AE,
0 x00AF,
/* 0xA0 */
0 x00B0,
0 x00B1,
0 x00B2,
0 x00B3,
0 x00B4,
0 x00B5,
0 x00B6,
0 x00B7,
/* 0xB0 */
0 x00B8,
0 x00B9,
0 x00BA,
0 x00BB,
0 x00BC,
0 x00BD,
0 x00BE,
0 x00BF,
/* 0xB0 */
0 x00C0,
0 x00C1,
0 x00C2,
0 x00C3,
0 x00C4,
0 x00C5,
0 x00C6,
0 x00C7,
/* 0xC0 */
0 x00C8,
0 x00C9,
0 x00CA,
0 x00CB,
0 x00CC,
0 x00CD,
0 x00CE,
0 x00CF,
/* 0xC0 */
0 x00D0,
0 x00D1,
0 x00D2,
0 x00D3,
0 x00D4,
0 x00D5,
0 x00D6,
0 x00D7,
/* 0xD0 */
0 x00D8,
0 x00D9,
0 x00DA,
0 x00DB,
0 x00DC,
0 x00DD,
0 x00DE,
0 x00DF,
/* 0xD0 */
0 x00E0,
0 x00E1,
0 x00E2,
0 x00E3,
0 x00E4,
0 x00E5,
0 x00E6,
0 x00E7,
/* 0xE0 */
0 x00E8,
0 x00E9,
0 x00EA,
0 x00EB,
0 x00EC,
0 x00ED,
0 x00EE,
0 x00EF,
/* 0xE0 */
0 x00F0,
0 x00F1,
0 x00F2,
0 x00F3,
0 x00F4,
0 x00F5,
0 x00F6,
0 x00F7,
/* 0xF0 */
0 x00F8,
0 x00F9,
0 x00FA,
0 x00FB,
0 x00FC,
0 x00FD,
0 x00FE,
0 x00FF
/* 0xF0 */
};
/* ----------------------------------------------------------------------- */
ImplByteConvertData
const aImplISO88591ByteCvtData =
{
aImplISO88591ToUniTab,
aImpl8090SameToUniTab,
aImplA0FFSameToCharTab,
aImpl8090SameToCharTab,
nullptr,
ISO88591UNI_START, ISO88591UNI_END,
SAME8090UNI_START, SAME8090UNI_END,
SAMEA0FFCHAR_START, SAMEA0FFCHAR_END,
SAME8090CHAR_START, SAME8090CHAR_END,
0
};
/* ----------------------------------------------------------------------- */
ImplTextEncodingData
const aImplISO88591TextEncodingData
= { { &aImplISO88591ByteCvtData,
sal::detail::textenc::convertCharToUnicode,
sal::detail::textenc::convertUnicodeToChar,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr },
"iso8859-1" ,
"iso-8859-1" ,
1 ,
1 ,
1 ,
0 ,
RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME };
/* SCRIPT_LATIN, pc code page 850 */
/* ======================================================================= */
/* US-ASCII */
/* 7-Bit ASCII */
/* 1-Byte, 0x00-0x7F ASCII without exception */
/* For the import we use ISO-8859-1 with MS extension (MS-1252), because */
/* when the 8-Bit is set, the chance, that this is an ISO-8859-1 character */
/* is the greatest. For the export all chars greater than 127 are not */
/* converted and are replaced by the replacement character. */
/* Last-Changes from us: */
/* ----------------------------------------------------------------------- */
ImplByteConvertData
const aImplUSASCIIByteCvtData =
{
aImplMS1252ToUniTab,
nullptr,
nullptr,
nullptr,
nullptr,
MS1252UNI_START, MS1252UNI_END,
NOTABUNI_START, NOTABUNI_END,
NOTABCHAR_START, NOTABCHAR_END,
NOTABCHAR_START, NOTABCHAR_END,
0
};
/* ----------------------------------------------------------------------- */
ImplTextEncodingData
const aImplUSASCIITextEncodingData
= { { &aImplUSASCIIByteCvtData,
sal::detail::textenc::convertCharToUnicode,
sal::detail::textenc::convertUnicodeToChar,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr },
"iso8859-1" ,
"us-ascii" ,
1 ,
1 ,
1 ,
0 ,
RTL_TEXTENCODING_INFO_ASCII
| RTL_TEXTENCODING_INFO_7BIT
| RTL_TEXTENCODING_INFO_MIME };
/* SCRIPT_LATIN, pc code page 437 */
ImplTextEncodingData
const aImplUTF8TextEncodingData
= { { nullptr,
&ImplConvertUtf8ToUnicode,
&ImplConvertUnicodeToUtf8,
&ImplCreateUtf8ToUnicodeContext,
&ImplDestroyUtf8ToUnicodeContext,
&ImplResetUtf8ToUnicodeContext,
&ImplCreateUnicodeToUtf8Context,
&ImplDestroyUnicodeToUtf8Context,
&ImplResetUnicodeToUtf8Context },
"iso8859-1" ,
"utf-8" ,
1 ,
6 ,
1 ,
0 ,
RTL_TEXTENCODING_INFO_ASCII
| RTL_TEXTENCODING_INFO_UNICODE
| RTL_TEXTENCODING_INFO_MULTIBYTE
| RTL_TEXTENCODING_INFO_MIME };
/* SCRIPT_UNICODE, pc code page 850 */
static char aImplJavaUtf8TextConverterTag;
/* The value of this tag is irrelevant. Only its address != NULL is used to
distinguish between RTL_TEXTENCODING_UTF8 and
RTL_TEXTENCODING_JAVA_UTF8. */
ImplTextEncodingData
const aImplJavaUtf8TextEncodingData
= { { &aImplJavaUtf8TextConverterTag,
&ImplConvertUtf8ToUnicode,
&ImplConvertUnicodeToUtf8,
&ImplCreateUtf8ToUnicodeContext,
&ImplDestroyUtf8ToUnicodeContext,
&ImplResetUtf8ToUnicodeContext,
&ImplCreateUnicodeToUtf8Context,
&ImplDestroyUnicodeToUtf8Context,
&ImplResetUnicodeToUtf8Context },
nullptr,
nullptr,
1 ,
3 ,
1 ,
0 ,
RTL_TEXTENCODING_INFO_UNICODE | RTL_TEXTENCODING_INFO_MULTIBYTE };
namespace {
#ifndef COND_LIB_SAL_TEXTENC
extern "C" ImplTextEncodingData
const * sal_getFullTextEncodingData(
rtl_TextEncoding);
// from tables.cxx in sal_textenc library
class FullTextEncodingData {
public :
ImplTextEncodingData
const * get(rtl_TextEncoding encoding) {
(
void )
this ;
// loplugin:staticmethods
return sal_getFullTextEncodingData(encoding);
}
FullTextEncodingData() =
default ;
FullTextEncodingData(
const FullTextEncodingData&) =
delete ;
FullTextEncodingData&
operator =(
const FullTextEncodingData&) =
delete ;
};
#else
extern "C" {
typedef ImplTextEncodingData
const * TextEncodingFunction(rtl_TextEncoding);
void thisModule() {}
}
class FullTextEncodingData {
public :
FullTextEncodingData() {
if (!module_.loadRelative(&thisModule, SAL_MODULENAME(
"sal_textenclo" )))
{
SAL_WARN(
"sal.textenc" ,
"Loading sal_textenc library failed" );
std::abort();
}
function_ =
reinterpret_cast < TextEncodingFunction * >(
module_.getFunctionSymbol(
"sal_getFullTextEncodingData" ));
if (function_ == nullptr) {
SAL_WARN(
"sal.textenc" ,
"Obtaining sal_getFullTextEncodingData function from sal_textenc"
" library failed" );
std::abort();
}
}
ImplTextEncodingData
const * get(rtl_TextEncoding encoding)
const {
return (*function_)(encoding);
}
FullTextEncodingData(
const FullTextEncodingData&) =
delete ;
FullTextEncodingData&
operator =(
const FullTextEncodingData&) =
delete ;
private :
osl::Module module_;
TextEncodingFunction * function_;
};
#endif
}
ImplTextEncodingData
const *
Impl_getTextEncodingData(rtl_TextEncoding nEncoding)
{
switch (nEncoding)
{
case RTL_TEXTENCODING_ASCII_US:
return &aImplUSASCIITextEncodingData;
break ;
case RTL_TEXTENCODING_MS_1252:
return &aImplMS1252TextEncodingData;
break ;
case RTL_TEXTENCODING_UTF8:
return &aImplUTF8TextEncodingData;
break ;
case RTL_TEXTENCODING_JAVA_UTF8:
return &aImplJavaUtf8TextEncodingData;
break ;
case RTL_TEXTENCODING_ISO_8859_1:
return &aImplISO88591TextEncodingData;
break ;
default :
{
static FullTextEncodingData gFullTextEncodingData;
return gFullTextEncodingData.get(nEncoding);
}
}
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Messung V0.5 in Prozent C=88 H=98 G=93
¤ Dauer der Verarbeitung: 0.12 Sekunden
(vorverarbeitet am 2026-06-06)
¤
*© Formatika GbR, Deutschland