Quellcodebibliothek Statistik Leitseite products/Sources/formale Sprachen/C/LibreOffice/sal/textenc/   (Office von Apache Version 25.8.3.2©)  Datei vom 5.10.2025 mit Größe 21 kB image not shown  

Quelle  tcvtutf7.cxx   Sprache: C

 
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * This file incorporates work covered by the following license notice:
 *
 *   Licensed to the Apache Software Foundation (ASF) under one or more
 *   contributor license agreements. See the NOTICE file distributed
 *   with this work for additional information regarding copyright
 *   ownership. The ASF licenses this file to you under the Apache
 *   License, Version 2.0 (the "License"); you may not use this file
 *   except in compliance with the License. You may obtain a copy of
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
 */


#include <sal/config.h>

#include <rtl/textcvt.h>

#include "tenchelp.hxx"
#include "unichars.hxx"

/* ======================================================================= */

unsigned char const aImplBase64Tab[64] =
{
    /* A-Z */
          0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
    0x58, 0x59, 0x5A,
    /* a-z */
          0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    0x78, 0x79, 0x7A,
    /* 0-9,+,/ */
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
    0x38, 0x39, 0x2B, 0x2F
};

/* Index in Base64Tab or 0xFF, when is an invalid character */
unsigned char const aImplBase64IndexTab[128] =
{
    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,     /* 0x00-0x07 */
    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,     /* 0x08-0x0F */
    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,     /* 0x10-0x17 */
    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,     /* 0x18-0x1F */
    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,     /* 0x20-0x27  !"#$%&' */
    0xFF, 0xFF, 0xFF,   62, 0xFF, 0xFF, 0xFF,   63,     /* 0x28-0x2F ()*+,-./ */
      52,   53,   54,   55,   56,   57,   58,   59,     /* 0x30-0x37 01234567 */
      60,   61, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,     /* 0x38-0x3F 89:;<=>? */
    0xFF,    0,    1,    2,    3,    4,    5,    6,     /* 0x40-0x47 @ABCDEFG */
       7,    8,    9,   10,   11,   12,   13,   14,     /* 0x48-0x4F HIJKLMNO */
      15,   16,   17,   18,   19,   20,   21,   22,     /* 0x50-0x57 PQRSTUVW */
      23,   24,   25, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,     /* 0x58-0x5F XYZ[\]^_ */
    0xFF,   26,   27,   28,   29,   30,   31,   32,     /* 0x60-0x67 `abcdefg */
      33,   34,   35,   36,   37,   38,   39,   40,     /* 0x68-0x6F hijklmno */
      41,   42,   43,   44,   45,   46,   47,   48,     /* 0x70-0x77 pqrstuvw */
      49,   50,   51, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF      /* 0x78-0x7F xyz{|}~ */
};

unsigned char const aImplMustShiftTab[128] =
{
    1, 1, 1, 1, 1, 1, 1, 1,     /* 0x00-0x07 */
    1, 0, 0, 1, 0, 1, 1, 1,     /* 0x08-0x0F 0x09 == HTAB, 0x0A == LF 0x0C == CR */
    1, 1, 1, 1, 1, 1, 1, 1,     /* 0x10-0x17 */
    1, 1, 1, 1, 1, 1, 1, 1,     /* 0x18-0x1F */
    0, 1, 1, 1, 1, 1, 1, 0,     /* 0x20-0x27  !"#$%&' */
    0, 0, 1, 1, 0, 1, 0, 0,     /* 0x28-0x2F ()*+,-./ */
    0, 0, 0, 0, 0, 0, 0, 0,     /* 0x30-0x37 01234567 */
    0, 0, 0, 1, 1, 1, 1, 0,     /* 0x38-0x3F 89:;<=>? */
    1, 0, 0, 0, 0, 0, 0, 0,     /* 0x40-0x47 @ABCDEFG */
    0, 0, 0, 0, 0, 0, 0, 0,     /* 0x48-0x4F HIJKLMNO */
    0, 0, 0, 0, 0, 0, 0, 0,     /* 0x50-0x57 PQRSTUVW */
    0, 0, 0, 1, 1, 1, 1, 1,     /* 0x58-0x5F XYZ[\]^_ */
    1, 0, 0, 0, 0, 0, 0, 0,     /* 0x60-0x67 `abcdefg */
    0, 0, 0, 0, 0, 0, 0, 0,     /* 0x68-0x6F hijklmno */
    0, 0, 0, 0, 0, 0, 0, 0,     /* 0x70-0x77 pqrstuvw */
    0, 0, 0, 1, 1, 1, 1, 1      /* 0x78-0x7F xyz{|}~ */
};

/* + */
#define IMPL_SHIFT_IN_CHAR      0x2B
/* - */
#define IMPL_SHIFT_OUT_CHAR     0x2D

/* ----------------------------------------------------------------------- */

namespace {

struct ImplUTF7ToUCContextData
{
    bool                    mbShifted;
    bool                    mbFirst;
    bool                    mbWroteOne;
    sal_uInt32              mnBitBuffer;
    sal_uInt32              mnBufferBits;
};

}

/* ----------------------------------------------------------------------- */

void* ImplUTF7CreateUTF7TextToUnicodeContext()
{
    ImplUTF7ToUCContextData* pContextData = new ImplUTF7ToUCContextData;
    pContextData->mbShifted         = false;
    pContextData->mbFirst           = false;
    pContextData->mbWroteOne        = false;
    pContextData->mnBitBuffer       = 0;
    pContextData->mnBufferBits      = 0;
    return pContextData;
}

/* ----------------------------------------------------------------------- */

void ImplUTF7DestroyTextToUnicodeContext( void* pContext )
{
    delete static_cast< ImplUTF7ToUCContextData * >(pContext);
}

/* ----------------------------------------------------------------------- */

void ImplUTF7ResetTextToUnicodeContext( void* pContext )
{
    ImplUTF7ToUCContextData* pContextData = static_cast<ImplUTF7ToUCContextData*>(pContext);
    pContextData->mbShifted         = false;
    pContextData->mbFirst           = false;
    pContextData->mbWroteOne        = false;
    pContextData->mnBitBuffer       = 0;
    pContextData->mnBufferBits      = 0;
}

/* ----------------------------------------------------------------------- */

sal_Size ImplUTF7ToUnicode( SAL_UNUSED_PARAMETER const void*, void* pContext,
                            const char* pSrcBuf, sal_Size nSrcBytes,
                            sal_Unicode* pDestBuf, sal_Size nDestChars,
                            sal_uInt32 nFlags, sal_uInt32* pInfo,
                            sal_Size* pSrcCvtBytes )
{
    ImplUTF7ToUCContextData*    pContextData = static_cast<ImplUTF7ToUCContextData*>(pContext);
    unsigned char                   c ='\0';
    unsigned char                   nBase64Value = 0;
    bool                        bEnd = false;
    bool                        bShifted;
    bool                        bFirst;
    bool                        bWroteOne;
    bool                        bBase64End;
    sal_uInt32                  nBitBuffer;
    sal_uInt32                  nBitBufferTemp;
    sal_uInt32                  nBufferBits;
    sal_Unicode*                pEndDestBuf;
    const char*             pEndSrcBuf;

/* !!! Implementation not finished !!!
    if ( pContextData )
    {
        bShifted        = pContextData->mbShifted;
        bFirst          = pContextData->mbFirst;
        bWroteOne       = pContextData->mbWroteOne;
        nBitBuffer      = pContextData->mnBitBuffer;
        nBufferBits     = pContextData->mnBufferBits;
    }
    else
*/

    {
        bShifted        = false;
        bFirst          = false;
        bWroteOne       = false;
        nBitBuffer      = 0;
        nBufferBits     = 0;
    }

    *pInfo = 0;
    pEndDestBuf = pDestBuf+nDestChars;
    pEndSrcBuf  = pSrcBuf+nSrcBytes;
    do
    {
        if ( pSrcBuf < pEndSrcBuf )
        {
            c = static_cast<unsigned char>(*pSrcBuf);

            /* End, when not a base64 character */
            bBase64End = false;
            if ( c <= 0x7F )
            {
                nBase64Value = aImplBase64IndexTab[c];
                if ( nBase64Value == 0xFF )
                    bBase64End = true;
            }
        }
        else
        {
            bEnd = true;
            bBase64End = true;
        }

        if ( bShifted )
        {
            if ( bBase64End )
            {
                bShifted = false;

                /* If the character causing us to drop out was SHIFT_IN */
                /* or SHIFT_OUT, it may be a special escape for SHIFT_IN. */
                /* The test for SHIFT_IN is not necessary, but allows */
                /* an alternate form of UTF-7 where SHIFT_IN is escaped */
                /* by SHIFT_IN. This only works for some values of */
                /* SHIFT_IN. It is so implemented, because this comes */
                /* from the official unicode book (The Unicode Standard, */
                /* Version 2.0) and so I think, that someone of the */
                /* world has used this feature. */
                if ( !bEnd )
                {
                    if ( (c == IMPL_SHIFT_IN_CHAR) || (c == IMPL_SHIFT_OUT_CHAR) )
                    {
                        /* If no base64 character, and the terminating */
                        /* character of the shift sequence was the */
                        /* SHIFT_OUT_CHAR, then it't a special escape */
                        /* for SHIFT_IN_CHAR. */
                        if ( bFirst && (c == IMPL_SHIFT_OUT_CHAR) )
                        {
                            if ( pDestBuf >= pEndDestBuf )
                            {
                                *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
                                break;
                            }
                            *pDestBuf = IMPL_SHIFT_IN_CHAR;
                            pDestBuf++;
                            bWroteOne = true;
                        }

                        /* Skip character */
                        pSrcBuf++;
                        if ( pSrcBuf < pEndSrcBuf )
                            c = static_cast<unsigned char>(*pSrcBuf);
                        else
                            bEnd = true;
                    }
                }

                /* Empty sequence not allowed, so when we don't write one */
                /* valid char, then the sequence is corrupt */
                if ( !bWroteOne )
                {
                    /* When no more bytes in the source buffer, then */
                    /* this buffer may be too small */
                    if ( bEnd )
                        *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL;
                    else
                    {
                        *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
                        if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
                        {
                            if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) {
                                if (!bEnd) {
                                    ++pSrcBuf;
                                }
                            } else {
                                //TODO: move pSrcBuf back to a reasonable starting place
                            }
                            *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
                            break;
                        }
                        /* We insert here no default char, because I think */
                        /* this is better to ignore this */
                    }
                }
            }
            else
            {
                /* Add 6 Bits from character to the bit buffer */
                nBufferBits += 6;
                nBitBuffer |= static_cast<sal_uInt32>(nBase64Value & 0x3F) << (32-nBufferBits);
                bFirst = false;
            }

            /* Extract as many full 16 bit characters as possible from the */
            /* bit buffer. */
            while ( (pDestBuf < pEndDestBuf) && (nBufferBits >= 16) )
            {
                nBitBufferTemp = nBitBuffer >> (32-16);
                *pDestBuf = static_cast<sal_Unicode>(nBitBufferTemp & 0xFFFF);
                pDestBuf++;
                nBitBuffer <<= 16;
                nBufferBits -= 16;
                bWroteOne = true;
            }

            if ( nBufferBits >= 16 )
            {
                *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
                break;
            }

            if ( bBase64End )
            {
                /* Sequence ended and we have some bits, then the */
                /* sequence is corrupted */
                if ( nBufferBits && nBitBuffer )
                {
                    /* When no more bytes in the source buffer, then */
                    /* this buffer may be too small */
                    if ( bEnd )
                        *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL;
                    else
                    {
                        *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
                        if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
                        {
                            if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) {
                                if (!bEnd) {
                                    ++pSrcBuf;
                                }
                            } else {
                                //TODO: move pSrcBuf back to a reasonable starting place
                            }
                            *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
                            break;
                        }
                        if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) != RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
                        {
                            if ( pDestBuf >= pEndDestBuf )
                            {
                                *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
                                break;
                            }
                            *pDestBuf++
                                = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
                        }
                    }

                }

                nBitBuffer = 0;
                nBufferBits = 0;
            }
        }

        if ( !bEnd )
        {
            if ( !bShifted )
            {
                if ( c == IMPL_SHIFT_IN_CHAR )
                {
                    bShifted    = true;
                    bFirst      = true;
                    bWroteOne   = false;
                }
                else
                {
                    /* No direct encoded character, then the buffer is */
                    /* corrupt */
                    if ( c > 0x7F )
                    {
                        *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
                        if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
                        {
                            if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) {
                                ++pSrcBuf;
                            }
                            *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
                            break;
                        }
                        if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) != RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
                        {
                            if ( pDestBuf >= pEndDestBuf )
                            {
                                *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
                                break;
                            }
                            *pDestBuf++
                                = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
                        }
                    }
                    else
                    {
                        /* Write char to unicode buffer */
                        if ( pDestBuf >= pEndDestBuf )
                        {
                            *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
                            break;
                        }
                        *pDestBuf = c;
                        pDestBuf++;

                    }
                }
            }

            pSrcBuf++;
        }
    }
    while ( !bEnd );

    if ( pContextData )
    {
        pContextData->mbShifted         = bShifted;
        pContextData->mbFirst           = bFirst;
        pContextData->mbWroteOne        = bWroteOne;
        pContextData->mnBitBuffer       = nBitBuffer;
        pContextData->mnBufferBits      = nBufferBits;
    }

    *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf);
    return (nDestChars - (pEndDestBuf-pDestBuf));
}

/* ======================================================================= */

namespace {

struct ImplUTF7FromUCContextData
{
    bool                    mbShifted;
    sal_uInt32              mnBitBuffer;
    sal_uInt32              mnBufferBits;
};

}

/* ----------------------------------------------------------------------- */

void* ImplUTF7CreateUnicodeToTextContext()
{
    ImplUTF7FromUCContextData* pContextData = new ImplUTF7FromUCContextData;
    pContextData->mbShifted         = false;
    pContextData->mnBitBuffer       = 0;
    pContextData->mnBufferBits      = 0;
    return pContextData;
}

/* ----------------------------------------------------------------------- */

void ImplUTF7DestroyUnicodeToTextContext( void* pContext )
{
    delete static_cast< ImplUTF7FromUCContextData * >(pContext);
}

/* ----------------------------------------------------------------------- */

void ImplUTF7ResetUnicodeToTextContext( void* pContext )
{
    ImplUTF7FromUCContextData* pContextData = static_cast<ImplUTF7FromUCContextData*>(pContext);
    pContextData->mbShifted         = false;
    pContextData->mnBitBuffer       = 0;
    pContextData->mnBufferBits      = 0;
}

/* ----------------------------------------------------------------------- */

sal_Size ImplUnicodeToUTF7( SAL_UNUSED_PARAMETER const void*, void* pContext,
                            const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
                            char* pDestBuf, sal_Size nDestBytes,
                            SAL_UNUSED_PARAMETER sal_uInt32, sal_uInt32* pInfo,
                            sal_Size* pSrcCvtChars )
{
    ImplUTF7FromUCContextData*  pContextData = static_cast<ImplUTF7FromUCContextData*>(pContext);
    sal_Unicode                 c = '\0';
    bool                        bEnd = false;
    bool                        bShifted;
    bool                        bNeedShift;
    sal_uInt32                  nBitBuffer;
    sal_uInt32                  nBitBufferTemp;
    sal_uInt32                  nBufferBits;
    char*                   pEndDestBuf;
    const sal_Unicode*          pEndSrcBuf;

/* !!! Implementation not finished !!!
    if ( pContextData )
    {
        bShifted        = pContextData->mbShifted;
        nBitBuffer      = pContextData->mnBitBuffer;
        nBufferBits     = pContextData->mnBufferBits;
    }
    else
*/

    {
        bShifted        = false;
        nBitBuffer      = 0;
        nBufferBits     = 0;
    }

    *pInfo = 0;
    pEndDestBuf = pDestBuf+nDestBytes;
    pEndSrcBuf  = pSrcBuf+nSrcChars;
    do
    {
        if ( pSrcBuf < pEndSrcBuf )
        {
            c = *pSrcBuf;

            bNeedShift = (c > 0x7F) || aImplMustShiftTab[c];
            if ( bNeedShift && !bShifted )
            {
                if ( pDestBuf >= pEndDestBuf )
                {
                    *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
                    break;
                }
                *pDestBuf = IMPL_SHIFT_IN_CHAR;
                pDestBuf++;
                /* Special case handling for SHIFT_IN_CHAR */
                if ( c == IMPL_SHIFT_IN_CHAR )
                {
                    if ( pDestBuf >= pEndDestBuf )
                    {
                        *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
                        break;
                    }
                    *pDestBuf = IMPL_SHIFT_OUT_CHAR;
                    pDestBuf++;
                }
                else
                    bShifted = true;
            }
        }
        else
        {
            bEnd = true;
            bNeedShift = false;
        }

        if ( bShifted )
        {
            /* Write the character to the bit buffer, or pad the bit */
            /* buffer out to a full base64 character */
            if ( bNeedShift )
            {
                nBufferBits += 16;
                nBitBuffer |= static_cast<sal_uInt32>(c) << (32-nBufferBits);
            }
            else
                nBufferBits += (6-(nBufferBits%6))%6;

            /* Flush out as many full base64 characters as possible */
            while ( (pDestBuf < pEndDestBuf) && (nBufferBits >= 6) )
            {
                nBitBufferTemp = nBitBuffer >> (32-6);
                *pDestBuf = aImplBase64Tab[nBitBufferTemp];
                pDestBuf++;
                nBitBuffer <<= 6;
                nBufferBits -= 6;
            }

            if ( nBufferBits >= 6 )
            {
                *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
                break;
            }

            /* Write SHIFT_OUT_CHAR, when needed */
            if ( !bNeedShift )
            {
                if ( pDestBuf >= pEndDestBuf )
                {
                    *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
                    break;
                }
                *pDestBuf = IMPL_SHIFT_OUT_CHAR;
                pDestBuf++;
                bShifted = false;
            }
        }

        if ( !bEnd )
        {
            /* Character can be directly encoded */
            if ( !bNeedShift )
            {
                if ( pDestBuf >= pEndDestBuf )
                {
                    *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
                    break;
                }
                *pDestBuf = static_castchar >(static_castunsigned char >(c));
                pDestBuf++;
            }

            pSrcBuf++;
        }
    }
    while ( !bEnd );

    if ( pContextData )
    {
        pContextData->mbShifted     = bShifted;
        pContextData->mnBitBuffer   = nBitBuffer;
        pContextData->mnBufferBits  = nBufferBits;
    }

    *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf);
    return (nDestBytes - (pEndDestBuf-pDestBuf));
}

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Messung V0.5
C=93 H=90 G=91

¤ Dauer der Verarbeitung: 0.2 Sekunden  (vorverarbeitet)  ¤

*© Formatika GbR, Deutschland






Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.