Quellcodebibliothek Statistik Leitseite products/Sources/formale Sprachen/C/LibreOffice/tools/source/fsys/   (Office von Apache Version 25.8.3.2©)  Datei vom 5.10.2025 mit Größe 178 kB image not shown  

Quelle  urlobj.cxx   Sprache: C

 
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * This file incorporates work covered by the following license notice:
 *
 *   Licensed to the Apache Software Foundation (ASF) under one or more
 *   contributor license agreements. See the NOTICE file distributed
 *   with this work for additional information regarding copyright
 *   ownership. The ASF licenses this file to you under the Apache
 *   License, Version 2.0 (the "License"); you may not use this file
 *   except in compliance with the License. You may obtain a copy of
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
 */


#include <sal/config.h>

#include <tools/urlobj.hxx>
#include <tools/debug.hxx>
#include <tools/inetmime.hxx>
#include <tools/stream.hxx>
#include <com/sun/star/uno/Reference.hxx>
#include <com/sun/star/util/XStringWidth.hpp>
#include <o3tl/enumarray.hxx>
#include <osl/diagnose.h>
#include <osl/file.hxx>
#include <rtl/character.hxx>
#include <rtl/string.h>
#include <rtl/textenc.h>
#include <rtl/ustring.hxx>
#include <sal/log.hxx>
#include <sal/types.h>

#include <algorithm>
#include <cassert>
#include <limits>
#include <memory>
#include <string_view>

#include <string.h>

#include <com/sun/star/uno/Sequence.hxx>
#include <comphelper/base64.hxx>
#include <comphelper/string.hxx>

using namespace css;

//  INetURLObject

/* The URI grammar (using RFC 2234 conventions).

   Constructs of the form
       {reference <rule1> using rule2}
   stand for a rule matching the given rule1 specified in the given reference,
   encoded to URI syntax using rule2 (as specified in this URI grammar).


   ; RFC 1738, RFC 2396, RFC 2732, private
   login = [user [":" password] "@"] hostport
   user = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~")
   password = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~")
   hostport = host [":" port]
   host = incomplete-hostname / hostname / IPv4address / IPv6reference
   incomplete-hostname = *(domainlabel ".") domainlabel
   hostname = *(domainlabel ".") toplabel ["."]
   domainlabel = alphanum [*(alphanum / "-") alphanum]
   toplabel = ALPHA [*(alphanum / "-") alphanum]
   IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
   IPv6reference = "[" hexpart [":" IPv4address] "]"
   hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
   hexseq = hex4 *(":" hex4)
   hex4 = 1*4HEXDIG
   port = *DIGIT
   escaped = "%" HEXDIG HEXDIG
   reserved = "$" / "&" / "+" / "," / "/" / ":" / ";" / "=" / "?" / "@" / "[" / "]"
   mark = "!" / "'" / "(" / ")" / "*" / "-" / "." / "_" / "~"
   alphanum = ALPHA / DIGIT
   unreserved = alphanum / mark
   uric = escaped / reserved / unreserved
   pchar = escaped / unreserved / "$" / "&" / "+" / "," / ":" / "=" / "@"


   ; RFC 1738, RFC 2396
   ftp-url = "FTP://" login ["/" segment *("/" segment) [";TYPE=" ("A" / "D" / "I")]]
   segment = *pchar


   ; RFC 1738, RFC 2396
   http-url = "HTTP://" hostport ["/" segment *("/" segment) ["?" *uric]]
   segment = *(pchar / ";")


   ; RFC 1738, RFC 2396, <http://support.microsoft.com/default.aspx?scid=KB;EN-US;Q188997&&gt;
   file-url = "FILE://" [host / "LOCALHOST" / netbios-name] ["/" segment *("/" segment)]
   segment = *pchar
   netbios-name = 1*{<alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "^" / "_" / "{" / "}" / "~"> using (escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "_" / "~")}


   ; RFC 2368, RFC 2396
   mailto-url = "MAILTO:" [to] [headers]
   to = {RFC 822 <#mailbox> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
   headers = "?" header *("&" header)
   header = hname "=" hvalue
   hname = {RFC 822 <field-name> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")} / "BODY"
   hvalue = {RFC 822 <field-body> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}


   ; private (see RFC 1738, RFC 2396)
   vnd-sun-star-webdav-url = "VND.SUN.STAR.WEBDAV://" hostport ["/" segment *("/" segment) ["?" *uric]]
   segment = *(pchar / ";")


   ; private
   private-url = "PRIVATE:" path ["?" *uric]
   path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")


   ; private
   vnd-sun-star-help-url = "VND.SUN.STAR.HELP://" name *("/" segment) ["?" *uric]
   name = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
   segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")


   ; private
   https-url = "HTTPS://" hostport ["/" segment *("/" segment) ["?" *uric]]
   segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")


   ; private
   slot-url = "SLOT:" path ["?" *uric]
   path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")


   ; private
   macro-url = "MACRO:" path ["?" *uric]
   path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")


   ; private
   javascript-url = "JAVASCRIPT:" *uric


   ; RFC 2397
   data-url = "DATA:" [mediatype] [";BASE64"] "," *uric
   mediatype = [type "/" subtype] *(";" attribute "=" value)
   type = {RFC 2045 <type> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
   subtype = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
   attribute = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
   value = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}


   ; RFC 2392, RFC 2396
   cid-url = "CID:" {RFC 822 <addr-spec> using *uric}


   ; private
   vnd-sun-star-hier-url = "VND.SUN.STAR.HIER:" ["//"reg_name] *("/" *pchar)
   reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")


   ; private
   uno-url = ".UNO:" path ["?" *uric]
   path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")


   ; private
   component-url = ".COMPONENT:" path ["?" *uric]
   path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")


   ; private
   vnd-sun-star-pkg-url = "VND.SUN.STAR.PKG://" reg_name *("/" *pchar) ["?" *uric]
   reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")


   ; RFC 2255
   ldap-url = "LDAP://" [hostport] ["/" [dn ["?" [attrdesct *("," attrdesc)] ["?" ["base" / "one" / "sub"] ["?" [filter] ["?" extension *("," extension)]]]]]]
   dn = {RFC 2253 <distinguishedName> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
   attrdesc = {RFC 2251 <AttributeDescription> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
   filter = {RFC 2254 <filter> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
   extension = ["!"] ["X-"] extoken ["=" exvalue]
   extoken = {RFC 2252 <oid> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
   exvalue = {RFC 2251 <LDAPString> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}


   ; private
   db-url = "DB:" *uric


   ; private
   vnd-sun-star-cmd-url = "VND.SUN.STAR.CMD:" opaque_part
   opaque_part = uric_no_slash *uric
   uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","


   ; RFC 1738
   telnet-url = "TELNET://" login ["/"]


   ; private
   vnd-sun-star-expand-url = "VND.SUN.STAR.EXPAND:" opaque_part
   opaque_part = uric_no_slash *uric
   uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","


   ; private
   vnd-sun-star-tdoc-url = "VND.SUN.STAR.TDOC:/" segment *("/" segment)
   segment = *pchar


   ; private
   unknown-url = scheme ":" 1*uric
   scheme = ALPHA *(alphanum / "+" / "-" / ".")


   ; private (http://ubiqx.org/cifs/Appendix-D.html):
   smb-url = "SMB://" login ["/" segment *("/" segment) ["?" *uric]]
   segment = *(pchar / ";")
 */


sal_Int32 INetURLObject::SubString::clear()
{
    sal_Int32 nDelta = -m_nLength;
    m_nBegin = -1;
    m_nLength = 0;
    return nDelta;
}

sal_Int32 INetURLObject::SubString::set(OUStringBuffer & rString,
                                       std::u16string_view rSubString)
{
    sal_Int32 nDelta = rSubString.size() - m_nLength;

    rString.remove(m_nBegin, m_nLength);
    rString.insert(m_nBegin, rSubString);

    m_nLength = rSubString.size();
    return nDelta;
}

sal_Int32 INetURLObject::SubString::set(OUString & rString,
                                       std::u16string_view rSubString)
{
    sal_Int32 nDelta = rSubString.size() - m_nLength;

    rString = OUString::Concat(rString.subView(0, m_nBegin)) + 
             rSubString + rString.subView(m_nBegin + m_nLength);

    m_nLength = rSubString.size();
    return nDelta;
}

sal_Int32 INetURLObject::SubString::set(OUStringBuffer & rString,
                                        std::u16string_view rSubString,
                                        sal_Int32 nTheBegin)
{
    m_nBegin = nTheBegin;
    return set(rString, rSubString);
}

inline void INetURLObject::SubString::operator +=(sal_Int32 nDelta)
{
    if (isPresent())
        m_nBegin = m_nBegin + nDelta;
}

int INetURLObject::SubString::compare(SubString const & rOther,
                                      OUStringBuffer const & rThisString,
                                      OUStringBuffer const & rOtherString) const
{
    sal_Int32 len = std::min(m_nLength, rOther.m_nLength);
    sal_Unicode const * p1 = rThisString.getStr() + m_nBegin;
    sal_Unicode const * end = p1 + len;
    sal_Unicode const * p2 = rOtherString.getStr() + rOther.m_nBegin;
    while (p1 != end) {
        if (*p1 < *p2) {
            return -1;
        } else if (*p1 > *p2) {
            return 1;
        }
        ++p1;
        ++p2;
    }
    return m_nLength < rOther.m_nLength ? -1
        : m_nLength > rOther.m_nLength ? 1
        : 0;
}

struct INetURLObject::SchemeInfo
{
    OUString m_sScheme;
    OUString m_aPrefix;
    bool m_bAuthority;
    bool m_bUser;
    bool m_bAuth;
    bool m_bPassword;
    bool m_bHost;
    bool m_bPort;
    bool m_bHierarchical;
    bool m_bQuery;
};

struct INetURLObject::PrefixInfo
{
    enum class Kind { Official, Internal, External }; // order is important!

    OUString     m_aPrefix;
    OUString     m_aTranslatedPrefix;
    INetProtocol m_eScheme;
    Kind         m_eKind;
};

// static
inline INetURLObject::SchemeInfo const &
INetURLObject::getSchemeInfo(INetProtocol eTheScheme)
{
    static constexpr OUString EMPTY = u""_ustr;
    static constexpr OUString FTP = u"ftp"_ustr;
    static constexpr OUString HTTP = u"http"_ustr;
    static constexpr OUString FILE1 = u"file"_ustr; // because FILE is already defined
    static constexpr OUString MAILTO = u"mailto"_ustr;
    static constexpr OUString VND_WEBDAV = u"vnd.sun.star.webdav"_ustr;
    static constexpr OUString PRIVATE = u"private"_ustr;
    static constexpr OUString VND_HELP = u"vnd.sun.star.help"_ustr;
    static constexpr OUString HTTPS = u"https"_ustr;
    static constexpr OUString SLOT = u"slot"_ustr;
    static constexpr OUString MACRO = u"macro"_ustr;
    static constexpr OUString JAVASCRIPT = u"javascript"_ustr;
    static constexpr OUString DATA = u"data"_ustr;
    static constexpr OUString CID = u"cid"_ustr;
    static constexpr OUString VND_HIER = u"vnd.sun.star.hier"_ustr;
    static constexpr OUString UNO = u".uno"_ustr;
    static constexpr OUString COMPONENT = u".component"_ustr;
    static constexpr OUString VND_PKG = u"vnd.sun.star.pkg"_ustr;
    static constexpr OUString LDAP = u"ldap"_ustr;
    static constexpr OUString DB = u"db"_ustr;
    static constexpr OUString VND_CMD = u"vnd.sun.star.cmd"_ustr;
    static constexpr OUString TELNET = u"telnet"_ustr;
    static constexpr OUString VND_EXPAND = u"vnd.sun.star.expand"_ustr;
    static constexpr OUString VND_TDOC = u"vnd.sun.star.tdoc"_ustr;
    static constexpr OUString SMB = u"smb"_ustr;
    static constexpr OUString HID = u"hid"_ustr;
    static constexpr OUString SFTP = u"sftp"_ustr;
    static constexpr OUString VND_CMIS = u"vnd.libreoffice.cmis"_ustr;

    static o3tl::enumarray<INetProtocol, SchemeInfo> constexpr map = {
        // [-loplugin:redundantfcast]:
        SchemeInfo{
            EMPTY, u""_ustr, falsefalsefalsefalsefalsefalsefalsefalse},
        SchemeInfo{
            FTP, u"ftp://"_ustr, true, true, false, true, true, true, true,
            false},
        SchemeInfo{
            HTTP, u"http://"_ustr, true, false, false, false, true, true, true,
            true},
        SchemeInfo{
            FILE1, u"file://"_ustr, true, false, false, false, true, false, true,
            false},
        SchemeInfo{
            MAILTO, u"mailto:"_ustr, falsefalsefalsefalsefalsefalse,
            falsetrue},
        SchemeInfo{
            VND_WEBDAV, u"vnd.sun.star.webdav://"_ustr, true, false,
            falsefalsetruetruetruetrue},
        SchemeInfo{
            PRIVATE, u"private:"_ustr, falsefalsefalsefalsefalsefalse,
            falsetrue},
        SchemeInfo{
            VND_HELP, u"vnd.sun.star.help://"_ustr, true, false, false,
            falsefalsefalsetruetrue},
        SchemeInfo{
            HTTPS, u"https://"_ustr, true, false, false, false, true, true,
            truetrue},
        SchemeInfo{
            SLOT, u"slot:"_ustr, falsefalsefalsefalsefalsefalsefalse,
            true},
        SchemeInfo{
            MACRO, u"macro:"_ustr, falsefalsefalsefalsefalsefalse,
            falsetrue},
        SchemeInfo{
            JAVASCRIPT, u"javascript:"_ustr, falsefalsefalsefalsefalse,
            falsefalsefalse},
        SchemeInfo{
            DATA, u"data:"_ustr, falsefalsefalsefalsefalsefalsefalse,
            false},
        SchemeInfo{
            CID, u"cid:"_ustr, falsefalsefalsefalsefalsefalsefalse,
            false},
        SchemeInfo{
            VND_HIER, u"vnd.sun.star.hier:"_ustr, truefalsefalse,
            falsefalsefalsetruefalse},
        SchemeInfo{
            UNO, u".uno:"_ustr, falsefalsefalsefalsefalsefalsefalse,
            true},
        SchemeInfo{
            COMPONENT, u".component:"_ustr, falsefalsefalsefalsefalse,
            falsefalsetrue},
        SchemeInfo{
            VND_PKG, u"vnd.sun.star.pkg://"_ustr, true, false, false,
            falsefalsefalsetruetrue},
        SchemeInfo{
            LDAP, u"ldap://"_ustr, true, false, false, false, true, true,
            falsetrue},
        SchemeInfo{
            DB, u"db:"_ustr, falsefalsefalsefalsefalsefalsefalse,
            false},
        SchemeInfo{
            VND_CMD, u"vnd.sun.star.cmd:"_ustr, falsefalsefalse,
            falsefalsefalsefalsefalse},
        SchemeInfo{
            TELNET, u"telnet://"_ustr, true, true, false, true, true, true,
            truefalse},
        SchemeInfo{
            VND_EXPAND, u"vnd.sun.star.expand:"_ustr, falsefalse,
            falsefalsefalsefalsefalsefalse},
        SchemeInfo{
            VND_TDOC, u"vnd.sun.star.tdoc:"_ustr, falsefalsefalse,
            falsefalsefalsetruefalse},
        SchemeInfo{
            EMPTY, u""_ustr, falsefalsefalsefalsetruetruetruefalse },
        SchemeInfo{
            SMB, u"smb://"_ustr, true, true, false, true, true, true, true,
            true},
        SchemeInfo{
            HID, u"hid:"_ustr, falsefalsefalsefalsefalsefalsefalse,
            true},
        SchemeInfo{
            SFTP, u"sftp://"_ustr, true, true, false, true, true, true, true,
            true},
        SchemeInfo{
            VND_CMIS, u"vnd.libreoffice.cmis://"_ustr, true, true,
            falsefalsetruefalsetruetrue} };
    return map[eTheScheme];
};

inline INetURLObject::SchemeInfo const & INetURLObject::getSchemeInfo() const
{
    return getSchemeInfo(m_eScheme);
}

namespace {

sal_Unicode getHexDigit(sal_uInt32 nWeight)
{
    assert(nWeight < 16);
    static const sal_Unicode aDigits[16]
        = { '0''1''2''3''4''5''6''7''8''9''A''B''C',
            'D''E''F' };
    return aDigits[nWeight];
}

}

// static
inline void INetURLObject::appendEscape(OUStringBuffer & rTheText,
                                        sal_uInt32 nOctet)
{
    rTheText.append( '%' );
    rTheText.append( getHexDigit(nOctet >> 4) );
    rTheText.append( getHexDigit(nOctet & 15) );
}

namespace {

enum
{
    PA = INetURLObject::PART_USER_PASSWORD,
    PD = INetURLObject::PART_FPATH,
    PE = INetURLObject::PART_AUTHORITY,
    PF = INetURLObject::PART_REL_SEGMENT_EXTRA,
    PG = INetURLObject::PART_URIC,
    PH = INetURLObject::PART_HTTP_PATH,
    PI = INetURLObject::PART_MESSAGE_ID_PATH,
    PJ = INetURLObject::PART_MAILTO,
    PK = INetURLObject::PART_PATH_BEFORE_QUERY,
    PL = INetURLObject::PART_PCHAR,
    PM = INetURLObject::PART_VISIBLE,
    PN = INetURLObject::PART_VISIBLE_NONSPECIAL,
    PO = INetURLObject::PART_UNO_PARAM_VALUE,
    PP = INetURLObject::PART_UNAMBIGUOUS,
    PQ = INetURLObject::PART_URIC_NO_SLASH,
    PR = INetURLObject::PART_HTTP_QUERY,
};

sal_uInt32 const aMustEncodeMap[128]
    = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/*   */                                              PP,
/* ! */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* " */                                  PM+PN   +PP,
/* # */                                  PM,
/* $ */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* % */                                  PM,
/* & */ PA   +PD+PE+PF+PG+PH+PI   +PK+PL+PM+PN+PO   +PQ+PR,
/* ' */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* ( */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* ) */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* * */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* + */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO   +PQ+PR,
/* , */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN      +PQ+PR,
/* - */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* . */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* / */      +PD      +PG+PH+PI+PJ+PK   +PM+PN+PO,
/* 0 */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* 1 */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* 2 */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* 3 */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* 4 */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* 5 */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* 6 */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* 7 */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* 8 */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* 9 */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* : */      +PD+PE   +PG+PH+PI+PJ+PK+PL+PM+PN+PO   +PQ+PR,
/* ; */ PA      +PE+PF+PG+PH+PI+PJ+PK   +PM         +PQ+PR,
/* < */                     +PI         +PM+PN   +PP,
/* = */ PA   +PD+PE+PF+PG+PH      +PK+PL+PM+PN      +PQ+PR,
/* > */                     +PI         +PM+PN   +PP,
/* ? */               +PG               +PM   +PO   +PQ,
/* @ */      +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* A */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* B */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* C */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* D */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* E */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* F */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* G */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* H */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* I */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* J */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* K */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* L */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* M */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* N */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* O */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* P */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* Q */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* R */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* S */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* T */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* U */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* V */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* W */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* X */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* Y */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* Z */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* [ */                PG               +PM+PN+PO,
/* \ */                                 +PM+PN   +PP,
/* ] */                PG               +PM+PN+PO,
/* ^ */                                  PM+PN   +PP,
/* _ */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* ` */                                  PM+PN   +PP,
/* a */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* b */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* c */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* d */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* e */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* f */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* g */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* h */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* i */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* j */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* k */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* l */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* m */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* n */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* o */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* p */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* q */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* r */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* s */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* t */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* u */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* v */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* w */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* x */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* y */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* z */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* { */                                  PM+PN   +PP,
/* | */                                 +PM+PN   +PP,
/* } */                                  PM+PN   +PP,
/* ~ */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ,
        0 };

bool mustEncode(sal_uInt32 nUTF32, INetURLObject::Part ePart)
{
    return !rtl::isAscii(nUTF32) || !(aMustEncodeMap[nUTF32] & ePart);
}

}

void INetURLObject::setInvalid()
{
    m_aAbsURIRef.setLength(0);
    m_eScheme = INetProtocol::NotValid;
    m_aScheme.clear();
    m_aUser.clear();
    m_aAuth.clear();
    m_aHost.clear();
    m_aPort.clear();
    m_aPath.clear();
    m_aQuery.clear();
    m_aFragment.clear();
}

namespace {

std::unique_ptr<SvMemoryStream> memoryStream(
        void const * data, sal_Int32 length)
{
    std::unique_ptr<char[]> b(
        new char[length]);
    memcpy(b.get(), data, length);
    std::unique_ptr<SvMemoryStream> s(
        new SvMemoryStream(b.get(), length, StreamMode::READ));
    s->ObjectOwnsMemory(true);
    // coverity[leaked_storage : FALSE] - belongs to SvMemoryStream s at this point
    b.release();
    return s;
}

}

std::unique_ptr<SvMemoryStream> INetURLObject::getData() const
{
    if( GetProtocol() != INetProtocol::Data )
    {
        return nullptr;
    }

    OUString sURLPath = GetURLPath( DecodeMechanism::WithCharset, RTL_TEXTENCODING_ISO_8859_1 );
    sal_Unicode const * pSkippedMediatype = INetMIME::scanContentType( sURLPath );
    sal_Int32 nCharactersSkipped = pSkippedMediatype == nullptr
        ? 0 : pSkippedMediatype-sURLPath.getStr();
    if (sURLPath.match(",", nCharactersSkipped))
    {
        nCharactersSkipped += strlen(",");
        OString sURLEncodedData(
            sURLPath.getStr() + nCharactersSkipped,
            sURLPath.getLength() - nCharactersSkipped,
            RTL_TEXTENCODING_ISO_8859_1, OUSTRING_TO_OSTRING_CVTFLAGS);
        return memoryStream(
            sURLEncodedData.getStr(), sURLEncodedData.getLength());
    }
    else if (sURLPath.matchIgnoreAsciiCase(";base64,", nCharactersSkipped))
    {
        nCharactersSkipped += strlen(";base64,");
        std::u16string_view sBase64Data = sURLPath.subView( nCharactersSkipped );
        css::uno::Sequence< sal_Int8 > aDecodedData;
        if (comphelper::Base64::decodeSomeChars(aDecodedData, sBase64Data)
            == sBase64Data.size())
        {
            return memoryStream(
                aDecodedData.getArray(), aDecodedData.getLength());
        }
    }
    return nullptr;
}

namespace {

FSysStyle guessFSysStyleByCounting(sal_Unicode const * pBegin,
                                                  sal_Unicode const * pEnd,
                                                  FSysStyle eStyle)
{
    DBG_ASSERT(eStyle
                   & (FSysStyle::Unix
                          | FSysStyle::Dos),
               "guessFSysStyleByCounting(): Bad style");
    DBG_ASSERT(std::numeric_limits< sal_Int32 >::min() < pBegin - pEnd
               && pEnd - pBegin <= std::numeric_limits< sal_Int32 >::max(),
               "guessFSysStyleByCounting(): Too big");
    sal_Int32 nSlashCount
        = (eStyle & FSysStyle::Unix) ?
              0 : std::numeric_limits< sal_Int32 >::min();
    sal_Int32 nBackslashCount
        = (eStyle & FSysStyle::Dos) ?
              0 : std::numeric_limits< sal_Int32 >::min();
    while (pBegin != pEnd)
        switch (*pBegin++)
        {
            case '/':
                ++nSlashCount;
                break;

            case '\\':
                ++nBackslashCount;
                break;
        }
    return nSlashCount >= nBackslashCount ?
                   FSysStyle::Unix : FSysStyle::Dos;
}

OUString parseScheme(
    sal_Unicode const ** begin, sal_Unicode const * end,
    sal_uInt32 fragmentDelimiter)
{
    sal_Unicode const * p = *begin;
    if (p != end && rtl::isAsciiAlpha(*p)) {
        do {
            ++p;
        } while (p != end
                 && (rtl::isAsciiAlphanumeric(*p) || *p == '+' || *p == '-'
                     || *p == '.'));
        // #i34835# To avoid problems with Windows file paths like "C:\foo",
        // do not accept generic schemes that are only one character long:
        if (end - p > 1 && p[0] == ':' && p[1] != fragmentDelimiter
            && p - *begin >= 2)
        {
            OUString scheme(
                OUString(*begin, p - *begin).toAsciiLowerCase());
            *begin = p + 1;
            return scheme;
        }
    }
    return OUString();
}

}

bool INetURLObject::setAbsURIRef(std::u16string_view rTheAbsURIRef,
                                 EncodeMechanism eMechanism,
                                 rtl_TextEncoding eCharset,
                                 bool bSmart,
                                 FSysStyle eStyle)
{
    sal_Unicode const * pPos = rTheAbsURIRef.data();
    sal_Unicode const * pEnd = pPos + rTheAbsURIRef.size();

    setInvalid();

    sal_uInt32 nFragmentDelimiter = '#';

    m_aAbsURIRef.setLength(0);

    // Parse <scheme>:
    sal_Unicode const * p = pPos;
    PrefixInfo const * pPrefix = getPrefix(p, pEnd);
    if (pPrefix)
    {
        pPos = p;
        m_eScheme = pPrefix->m_eScheme;

        const OUString & rTemp = pPrefix->m_eKind >= PrefixInfo::Kind::External ?
                                             pPrefix->m_aTranslatedPrefix :
                                             pPrefix->m_aPrefix;
        m_aAbsURIRef.append(rTemp);
        m_aScheme = SubString( 0, rTemp.indexOf(':') );
    }
    else
    {
        if (bSmart)
        {
            // For scheme detection, the first (if any) of the following
            // productions that matches the input string (and for which the
            // appropriate style bit is set in eStyle, if applicable)
            // determines the scheme. The productions use the auxiliary rules

            //    domain = label *("." label)
            //    label = alphanum [*(alphanum / "-") alphanum]
            //    alphanum = ALPHA / DIGIT
            //    IPv6reference = "[" IPv6address "]"
            //    IPv6address = hexpart [":" IPv4address]
            //    IPv4address = 1*3DIGIT 3("." 1*3DIGIT)
            //    hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
            //    hexseq = hex4 *(":" hex4)
            //    hex4 = 1*4HEXDIG
            //    UCS4 = <any UCS4 character>

            // 1st Production (known scheme; handled by the "if (pPrefix)" branch above):
            //    <one of the known schemes, ignoring case> ":" *UCS4
            // 2nd Production (mailto):
            //    domain "@" domain
            // 3rd Production (ftp):
            //    "FTP" 2*("." label) ["/" *UCS4]
            // 4th Production (http):
            //    label 2*("." label) ["/" *UCS4]
            // 5th Production (file):
            //    "//" (domain / IPv6reference) ["/" *UCS4]
            // 6th Production (Unix file):
            //    "/" *UCS4
            // 7th Production (UNC file; FSysStyle::Dos only):
            //    "\\" domain ["\" *UCS4]
            // 8th Production (Unix-like DOS file; FSysStyle::Dos only):
            //    ALPHA ":" ["/" *UCS4]
            // 9th Production (DOS file; FSysStyle::Dos only):
            //    ALPHA ":" ["\" *UCS4]
            // 10th Production (any scheme; handled by the "m_eScheme = INetProtocol::Generic;" code
            // after this else branch):
            //    <any scheme> ":" *UCS4

            // For the 'non URL' file productions 6--9, the interpretation of
            // the input as a (degenerate) URI is turned off, i.e., escape
            // sequences and fragments are never detected as such, but are
            // taken as literal characters.

            sal_Unicode const * p1 = pPos;
            if (eStyle & FSysStyle::Dos
                && pEnd - p1 >= 2
                && rtl::isAsciiAlpha(p1[0])
                && p1[1] == ':'
                && (pEnd - p1 == 2 || p1[2] == '/' || p1[2] == '\\'))
            {
                m_eScheme = INetProtocol::File; // 8th, 9th
                eMechanism = EncodeMechanism::All;
                nFragmentDelimiter = 0x80000000;
            }
            else if (eStyle & FSysStyle::Dos
                && pEnd - p1 >= 6
                && p1[0] == '\\' && p1[1] == '\\' && p1[2] == '?' && p1[3] == '\\'
                && rtl::isAsciiAlpha(p1[4])
                && p1[5] == ':'
                && (pEnd - p1 == 6 || p1[6] == '/' || p1[6] == '\\'))
            {
                m_eScheme = INetProtocol::File; // 8th, 9th
                eMechanism = EncodeMechanism::All;
                nFragmentDelimiter = 0x80000000;
            }
            else if (pEnd - p1 >= 2 && p1[0] == '/' && p1[1] == '/')
            {
                p1 += 2;
                if ((scanDomain(p1, pEnd) > 0 || scanIPv6reference(p1, pEnd))
                    && (p1 == pEnd || *p1 == '/'))
                    m_eScheme = INetProtocol::File; // 5th
            }
            else if (p1 != pEnd && *p1 == '/')
            {
                m_eScheme = INetProtocol::File; // 6th
                eMechanism = EncodeMechanism::All;
                nFragmentDelimiter = 0x80000000;
            }
            else if (eStyle & FSysStyle::Dos
                     && pEnd - p1 >= 2
                     && p1[0] == '\\'
                     && p1[1] == '\\')
            {
                p1 += 2;
                if (pEnd - p1 >= 6 && p1[0] == '?' && p1[1] == '\\' && p1[5] == '\\'
                    && rtl::toAsciiLowerCase(p1[2]) == 'u'
                    && rtl::toAsciiLowerCase(p1[3]) == 'n'
                    && rtl::toAsciiLowerCase(p1[4]) == 'c')
                {
                    p1 += 6; // "\\?\UNC\Servername\..."
                }

                sal_Int32 n = rtl_ustr_indexOfChar_WithLength(
                    p1, pEnd - p1, '\\');
                sal_Unicode const * pe = n == -1 ? pEnd : p1 + n;
                if (
                    parseHostOrNetBiosName(
                        p1, pe, EncodeMechanism::All, RTL_TEXTENCODING_DONTKNOW,
                        true, nullptr) ||
                    (scanDomain(p1, pe) > 0 && p1 == pe)
                   )
                {
                    m_eScheme = INetProtocol::File; // 7th
                    eMechanism = EncodeMechanism::All;
                    nFragmentDelimiter = 0x80000000;
                }
            }
            else
            {
                sal_Unicode const * pDomainEnd = p1;
                sal_uInt32 nLabels = scanDomain(pDomainEnd, pEnd);
                if (nLabels > 0 && pDomainEnd != pEnd && *pDomainEnd == '@')
                {
                    ++pDomainEnd;
                    if (scanDomain(pDomainEnd, pEnd) > 0
                        && pDomainEnd == pEnd)
                        m_eScheme = INetProtocol::Mailto; // 2nd
                }
                else if (nLabels >= 3
                         && (pDomainEnd == pEnd || *pDomainEnd == '/'))
                    m_eScheme
                        = pDomainEnd - p1 >= 4
                          && (p1[0] == 'f' || p1[0] == 'F')
                          && (p1[1] == 't' || p1[1] == 'T')
                          && (p1[2] == 'p' || p1[2] == 'P')
                          && p1[3] == '.' ?
                              INetProtocol::Ftp : INetProtocol::Http; // 3rd, 4th
            }
        }

        OUString aSynScheme;
        if (m_eScheme == INetProtocol::NotValid) {
            sal_Unicode const * p1 = pPos;
            aSynScheme = parseScheme(&p1, pEnd, nFragmentDelimiter);
            if (!aSynScheme.isEmpty())
            {
                if (bSmart && m_eSmartScheme != m_eScheme && p1 != pEnd && rtl::isAsciiDigit(*p1))
                {
                    // rTheAbsURIRef doesn't define a known scheme (handled by the "if (pPrefix)"
                    // branch above); but a known scheme is defined in m_eSmartScheme. If this
                    // scheme may have a port in authority component, then avoid misinterpreting
                    // URLs like www.foo.bar:123/baz as using unknown "www.foo.bar" scheme with
                    // 123/baz rootless path. For now, do not try to handle possible colons in
                    // user information, require such ambiguous URLs to have explicit scheme part.
                    // Also ignore possibility of empty port.
                    const SchemeInfo& rInfo = getSchemeInfo(m_eSmartScheme);
                    if (rInfo.m_bAuthority && rInfo.m_bPort)
                    {
                        // Make sure that all characters from colon to [/?#] or to EOL are digits.
                        // Or maybe make it simple, and just assume that "xyz:1..." is more likely
                        // to be host "xyz" and port "1...", than scheme "xyz" and path "1..."?
                        sal_Unicode const* p2 = p1 + 1;
                        while (p2 != pEnd && rtl::isAsciiDigit(*p2))
                            ++p2;
                        if (p2 == pEnd || *p2 == '/' || *p2 == '?' || *p2 == '#')
                            m_eScheme = m_eSmartScheme;
                    }
                }

                if (m_eScheme == INetProtocol::NotValid)
                {
                    m_eScheme = INetProtocol::Generic;
                    pPos = p1;
                }
            }
        }

        if (bSmart && m_eScheme == INetProtocol::NotValid && pPos != pEnd
            && *pPos != nFragmentDelimiter)
        {
            m_eScheme = m_eSmartScheme;
        }

        if (m_eScheme == INetProtocol::NotValid)
        {
            setInvalid();
            return false;
        }

        if (m_eScheme != INetProtocol::Generic) {
            aSynScheme = getSchemeInfo().m_sScheme;
        }
        m_aScheme.set(m_aAbsURIRef, aSynScheme, m_aAbsURIRef.getLength());
        m_aAbsURIRef.append(':');
    }

    sal_uInt32 nSegmentDelimiter = '/';
    sal_uInt32 nAltSegmentDelimiter = 0x80000000;
    bool bSkippedInitialSlash = false;

    // Parse //<user>;AUTH=<auth>@<host>:<port> or
    // //<user>:<password>@<host>:<port> or
    // //<reg_name>
    if (getSchemeInfo().m_bAuthority)
    {
        sal_Unicode const * pUserInfoBegin = nullptr;
        sal_Unicode const * pUserInfoEnd = nullptr;
        sal_Unicode const * pHostPortBegin = nullptr;
        sal_Unicode const * pHostPortEnd = nullptr;

        switch (m_eScheme)
        {
            case INetProtocol::VndSunStarHelp:
            {
                if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '/')
                {
                    setInvalid();
                    return false;
                }
                m_aAbsURIRef.append("//");
                OUStringBuffer aSynAuthority;
                while (pPos < pEnd
                       && *pPos != '/' && *pPos != '?'
                       && *pPos != nFragmentDelimiter)
                {
                    EscapeType eEscapeType;
                    sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
                                                 eMechanism,
                                                 eCharset, eEscapeType);
                    appendUCS4(aSynAuthority, nUTF32, eEscapeType,
                               PART_AUTHORITY, eCharset, false);
                }
                m_aHost.set(m_aAbsURIRef,
                            aSynAuthority,
                            m_aAbsURIRef.getLength());
                    // misusing m_aHost to store the authority
                break;
            }

            case INetProtocol::VndSunStarHier:
            {
                if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
                {
                    pPos += 2;
                    m_aAbsURIRef.append("//");
                    OUStringBuffer aSynAuthority;
                    while (pPos < pEnd
                           && *pPos != '/' && *pPos != '?'
                           && *pPos != nFragmentDelimiter)
                    {
                        EscapeType eEscapeType;
                        sal_uInt32 nUTF32 = getUTF32(pPos,
                                                     pEnd,
                                                     eMechanism,
                                                     eCharset,
                                                     eEscapeType);
                        appendUCS4(aSynAuthority,
                                   nUTF32,
                                   eEscapeType,
                                   PART_AUTHORITY,
                                   eCharset,
                                   false);
                    }
                    if (aSynAuthority.isEmpty())
                    {
                        setInvalid();
                        return false;
                    }
                    m_aHost.set(m_aAbsURIRef,
                                aSynAuthority,
                                m_aAbsURIRef.getLength());
                        // misusing m_aHost to store the authority
                }
                break;
            }

            case INetProtocol::VndSunStarPkg:
            case INetProtocol::Cmis:
            {
                if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '/')
                {
                    setInvalid();
                    return false;
                }
                m_aAbsURIRef.append("//");
                OUStringBuffer aSynUser(128);

                bool bHasUser = false;
                while (pPos < pEnd && *pPos != '@'
                       && *pPos != '/' && *pPos != '?'
                       && *pPos != nFragmentDelimiter)
                {
                    EscapeType eEscapeType;
                    sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
                                                 eMechanism,
                                                 eCharset, eEscapeType);
                    appendUCS4(aSynUser, nUTF32, eEscapeType,
                               PART_USER_PASSWORD, eCharset, false);

                    bHasUser = *pPos == '@';
                }

                OUStringBuffer aSynAuthority(64);
                if ( !bHasUser )
                {
                    aSynAuthority = std::move(aSynUser);
                }
                else
                {
                    m_aUser.set(m_aAbsURIRef,
                            aSynUser,
                            m_aAbsURIRef.getLength());
                    m_aAbsURIRef.append("@");
                    ++pPos;

                    while (pPos < pEnd
                           && *pPos != '/' && *pPos != '?'
                           && *pPos != nFragmentDelimiter)
                    {
                        EscapeType eEscapeType;
                        sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
                                                     eMechanism,
                                                     eCharset, eEscapeType);
                        appendUCS4(aSynAuthority, nUTF32, eEscapeType,
                                   PART_AUTHORITY, eCharset, false);
                    }
                }
                if (aSynAuthority.isEmpty())
                {
                    setInvalid();
                    return false;
                }
                m_aHost.set(m_aAbsURIRef,
                            aSynAuthority,
                            m_aAbsURIRef.getLength());
                    // misusing m_aHost to store the authority
                break;
            }

            case INetProtocol::File:
                if (bSmart)
                {
                    // The first of the following seven productions that
                    // matches the rest of the input string (and for which the
                    // appropriate style bit is set in eStyle, if applicable)
                    // determines the used notation.  The productions use the
                    // auxiliary rules

                    //    domain = label *("." label)
                    //    label = alphanum [*(alphanum / "-") alphanum]
                    //    alphanum = ALPHA / DIGIT
                    //    IPv6reference = "[" IPv6address "]"
                    //    IPv6address = hexpart [":" IPv4address]
                    //    IPv4address = 1*3DIGIT 3("." 1*3DIGIT)
                    //    hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
                    //    hexseq = hex4 *(":" hex4)
                    //    hex4 = 1*4HEXDIG
                    //    path = <any UCS4 character except "#">
                    //    UCS4 = <any UCS4 character>

                    // 1st Production (URL):
                    //    "//" [domain / IPv6reference] ["/" *path]
                    //        ["#" *UCS4]
                    //  becomes
                    //    "file://" domain "/" *path ["#" *UCS4]
                    if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
                    {
                        sal_Unicode const * p1 = pPos + 2;
                        while (p1 != pEnd && *p1 != '/' &&
                               *p1 != nFragmentDelimiter)
                        {
                            ++p1;
                        }
                        if (parseHostOrNetBiosName(
                                pPos + 2, p1, EncodeMechanism::All,
                                RTL_TEXTENCODING_DONTKNOW, true, nullptr))
                        {
                            m_aAbsURIRef.append("//");
                            pHostPortBegin = pPos + 2;
                            pHostPortEnd = p1;
                            pPos = p1;
                            break;
                        }
                    }

                    // 2nd Production (MS IE generated 1; FSysStyle::Dos only):
                    //    "//" ALPHA ":" ["/" *path] ["#" *UCS4]
                    //  becomes
                    //    "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
                    //  replacing "\" by "/" within <*path>
                    // 3rd Production (MS IE generated 2; FSysStyle::Dos only):
                    //    "//" ALPHA ":" ["\" *path] ["#" *UCS4]
                    //  becomes
                    //    "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
                    //  replacing "\" by "/" within <*path>
                    // 4th Production (miscounted slashes):
                    //    "//" *path ["#" *UCS4]
                    //  becomes
                    //    "file:///" *path ["#" *UCS4]
                    if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
                    {
                        m_aAbsURIRef.append("//");
                        pPos += 2;
                        bSkippedInitialSlash = true;
                        if ((eStyle & FSysStyle::Dos)
                            && pEnd - pPos >= 2
                            && rtl::isAsciiAlpha(pPos[0])
                            && pPos[1] == ':'
                            && (pEnd - pPos == 2
                                || pPos[2] == '/' || pPos[2] == '\\'))
                            nAltSegmentDelimiter = '\\';
                        break;
                    }

                    // 5th Production (Unix):
                    //    "/" *path ["#" *UCS4]
                    //  becomes
                    //    "file:///" *path ["#" *UCS4]
                    if (pPos < pEnd && *pPos == '/')
                    {
                        m_aAbsURIRef.append("//");
                        break;
                    }

                    // 6th Production (UNC; FSysStyle::Dos only):
                    //    "\\" domain ["\" *path] ["#" *UCS4]
                    //  becomes
                    //    "file://" domain "/" *path ["#" *UCS4]
                    //  replacing "\" by "/" within <*path>
                    if (eStyle & FSysStyle::Dos
                        && pEnd - pPos >= 2
                        && pPos[0] == '\\'
                        && pPos[1] == '\\')
                    {
                        sal_Unicode const * p1 = pPos + 2;
                        sal_Unicode const * pHostPortTentativeBegin = p1;
                        if (pEnd - p1 >= 6 && p1[0] == '?' && p1[1] == '\\' && p1[5] == '\\'
                            && rtl::toAsciiLowerCase(p1[2]) == 'u'
                            && rtl::toAsciiLowerCase(p1[3]) == 'n'
                            && rtl::toAsciiLowerCase(p1[4]) == 'c')
                        {
                            p1 += 6; // "\\?\UNC\Servername\..."
                            pHostPortTentativeBegin = p1;
                        }

                        sal_Unicode const * pe = p1;
                        while (pe < pEnd && *pe != '\\' &&
                               *pe != nFragmentDelimiter)
                        {
                            ++pe;
                        }
                        if (
                             parseHostOrNetBiosName(
                                p1, pe, EncodeMechanism::All,
                                RTL_TEXTENCODING_DONTKNOW, true, nullptr) ||
                             (scanDomain(p1, pe) > 0 && p1 == pe)
                           )
                        {
                            m_aAbsURIRef.append("//");
                            pHostPortBegin = pHostPortTentativeBegin;
                            pHostPortEnd = pe;
                            pPos = pe;
                            nSegmentDelimiter = '\\';
                            break;
                        }
                    }

                    // 7th Production (Unix-like DOS; FSysStyle::Dos only):
                    //    ALPHA ":" ["/" *path] ["#" *UCS4]
                    //  becomes
                    //    "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
                    //  replacing "\" by "/" within <*path>
                    // 8th Production (DOS; FSysStyle::Dos only):
                    //    ALPHA ":" ["\" *path] ["#" *UCS4]
                    //  becomes
                    //    "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
                    //  replacing "\" by "/" within <*path>
                    if (eStyle & FSysStyle::Dos)
                    {
                        sal_Unicode const* p1 = pPos;
                        if (pEnd - p1 >= 4 && p1[0] == '\\' && p1[1] == '\\' && p1[2] == '?'
                            && p1[3] == '\\')
                            p1 += 4; // "\\?\c:\..."

                        if (pEnd - p1 >= 2
                            && rtl::isAsciiAlpha(p1[0])
                            && p1[1] == ':'
                            && (pEnd - p1 == 2
                                || p1[2] == '/'
                                || p1[2] == '\\'))
                        {
                            pPos = p1;
                            m_aAbsURIRef.append("//");
                            nAltSegmentDelimiter = '\\';
                            bSkippedInitialSlash = true;
                            break;
                        }
                    }

                    // 9th Production (any):
                    //    *path ["#" *UCS4]
                    //  becomes
                    //    "file:///" *path ["#" *UCS4]
                    //  replacing the delimiter by "/" within <*path>.  The
                    //  delimiter is that character from the set { "/", "\"}
                    // which appears most often in <*path> (if FSysStyle::Unix
                    //  is not among the style bits, "/" is removed from the
                    //  set; if FSysStyle::Dos is not among the style bits, "\" is
                    //  removed from the set).  If two or
                    //  more characters appear the same number of times, the
                    //  character mentioned first in that set is chosen.  If
                    //  the first character of <*path> is the delimiter, that
                    //  character is not copied
                    if (eStyle & (FSysStyle::Unix | FSysStyle::Dos))
                    {
                        m_aAbsURIRef.append("//");
                        switch (guessFSysStyleByCounting(pPos, pEnd, eStyle))
                        {
                            case FSysStyle::Unix:
                                nSegmentDelimiter = '/';
                                break;

                            case FSysStyle::Dos:
                                nSegmentDelimiter = '\\';
                                break;

                            default:
                                OSL_FAIL(
                                    "INetURLObject::setAbsURIRef():"
                                        " Bad guessFSysStyleByCounting");
                                break;
                        }
                        bSkippedInitialSlash
                            = pPos != pEnd && *pPos != nSegmentDelimiter;
                        break;
                    }
                }
                [[fallthrough]];
            default:
            {
                // For INetProtocol::File, allow an empty authority ("//") to be
                // missing if the following path starts with an explicit "/"
                // (Java is notorious in generating such file URLs, so be
                // liberal here):
                if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
                    pPos += 2;
                else if (!bSmart
                         && !(m_eScheme == INetProtocol::File
                              && pPos != pEnd && *pPos == '/'))
                {
                    setInvalid();
                    return false;
                }
                m_aAbsURIRef.append("//");

                sal_Unicode const * pAuthority = pPos;
                sal_uInt32 c = getSchemeInfo().m_bQuery ? '?' : 0x80000000;
                while (pPos < pEnd && *pPos != '/' && *pPos != c
                       && *pPos != nFragmentDelimiter)
                    ++pPos;
                if (getSchemeInfo().m_bUser)
                    if (getSchemeInfo().m_bHost)
                    {
                        sal_Unicode const * p1 = pAuthority;
                        while (p1 < pPos && *p1 != '@')
                            ++p1;
                        if (p1 == pPos)
                        {
                            pHostPortBegin = pAuthority;
                            pHostPortEnd = pPos;
                        }
                        else
                        {
                            pUserInfoBegin = pAuthority;
                            pUserInfoEnd = p1;
                            pHostPortBegin = p1 + 1;
                            pHostPortEnd = pPos;
                        }
                    }
                    else
                    {
                        pUserInfoBegin = pAuthority;
                        pUserInfoEnd = pPos;
                    }
                else if (getSchemeInfo().m_bHost)
                {
                    pHostPortBegin = pAuthority;
                    pHostPortEnd = pPos;
                }
                else if (pPos != pAuthority)
                {
                    setInvalid();
                    return false;
                }
                break;
            }
        }

        if (pUserInfoBegin)
        {
            Part ePart = PART_USER_PASSWORD;
            bool bSupportsPassword = getSchemeInfo().m_bPassword;
            bool bSupportsAuth
                = !bSupportsPassword && getSchemeInfo().m_bAuth;
            bool bHasAuth = false;
            OUStringBuffer aSynUser;
            sal_Unicode const * p1 = pUserInfoBegin;
            while (p1 < pUserInfoEnd)
            {
                EscapeType eEscapeType;
                sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd,
                                             eMechanism, eCharset, eEscapeType);
                if (eEscapeType == EscapeType::NONE)
                {
                    if (nUTF32 == ':' && bSupportsPassword)
                    {
                        bHasAuth = true;
                        break;
                    }
                    else if (nUTF32 == ';' && bSupportsAuth
                             && pUserInfoEnd - p1
                                    > RTL_CONSTASCII_LENGTH("auth=")
                             && INetMIME::equalIgnoreCase(
                                    p1,
                                    p1 + RTL_CONSTASCII_LENGTH("auth="),
                                    "auth="))
                    {
                        p1 += RTL_CONSTASCII_LENGTH("auth=");
                        bHasAuth = true;
                        break;
                    }
                }
                appendUCS4(aSynUser, nUTF32, eEscapeType, ePart,
                           eCharset, false);
            }
            m_aUser.set(m_aAbsURIRef, aSynUser, m_aAbsURIRef.getLength());
            if (bHasAuth)
            {
                if (bSupportsPassword)
                {
                    m_aAbsURIRef.append(':');
                    OUStringBuffer aSynAuth;
                    while (p1 < pUserInfoEnd)
                    {
                        EscapeType eEscapeType;
                        sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd,
                                                     eMechanism, eCharset,
                                                     eEscapeType);
                        appendUCS4(aSynAuth, nUTF32, eEscapeType,
                                   ePart, eCharset, false);
                    }
                    m_aAuth.set(m_aAbsURIRef, aSynAuth, m_aAbsURIRef.getLength());
                }
                else
                {
                    m_aAbsURIRef.append(";AUTH=");
                    OUStringBuffer aSynAuth;
                    while (p1 < pUserInfoEnd)
                    {
                        EscapeType eEscapeType;
                        sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd,
                                                     eMechanism, eCharset,
                                                     eEscapeType);
                        if (!INetMIME::isIMAPAtomChar(nUTF32))
                        {
                            setInvalid();
                            return false;
                        }
                        appendUCS4(aSynAuth, nUTF32, eEscapeType,
                                   ePart, eCharset, false);
                    }
                    m_aAuth.set(m_aAbsURIRef, aSynAuth, m_aAbsURIRef.getLength());
                }
            }
            if (pHostPortBegin)
                m_aAbsURIRef.append('@');
        }

        if (pHostPortBegin)
        {
            sal_Unicode const * pPort = pHostPortEnd;
            if ( getSchemeInfo().m_bPort && pHostPortBegin < pHostPortEnd )
            {
                sal_Unicode const * p1 = pHostPortEnd - 1;
                while (p1 > pHostPortBegin && rtl::isAsciiDigit(*p1))
                    --p1;
                if (*p1 == ':')
                    pPort = p1;
            }
            bool bNetBiosName = false;
            switch (m_eScheme)
            {
                case INetProtocol::File:
                    // If the host equals "LOCALHOST" (unencoded and ignoring
                    // case), turn it into an empty host:
                    if (INetMIME::equalIgnoreCase(pHostPortBegin, pPort,
                                                  "localhost"))
                        pHostPortBegin = pPort;
                    bNetBiosName = true;
                    break;

                case INetProtocol::Ldap:
                case INetProtocol::Smb:
                    if (pHostPortBegin == pPort && pPort != pHostPortEnd)
                    {
                        setInvalid();
                        return false;
                    }
                    break;
                default:
                    if (pHostPortBegin == pPort)
                    {
                        setInvalid();
                        return false;
                    }
                    break;
            }
            sal_Int32 nLenBeforeHost = m_aAbsURIRef.getLength();
            if (!parseHostOrNetBiosName(
                    pHostPortBegin, pPort, eMechanism, eCharset,
                    bNetBiosName, &m_aAbsURIRef))
            {
                setInvalid();
                return false;
            }
            m_aHost = SubString(nLenBeforeHost, m_aAbsURIRef.getLength() - nLenBeforeHost);
            if (pPort != pHostPortEnd)
            {
                m_aAbsURIRef.append(':');
                m_aPort.set(m_aAbsURIRef,
                    std::u16string_view{pPort + 1, static_cast<size_t>(pHostPortEnd - (pPort + 1))},
                    m_aAbsURIRef.getLength());
            }
        }
    }

    // Parse <path>
    sal_Int32 nBeforePathLength = m_aAbsURIRef.getLength();
    if (!parsePath(m_eScheme, &pPos, pEnd, eMechanism, eCharset,
                   bSkippedInitialSlash, nSegmentDelimiter,
                   nAltSegmentDelimiter,
                   getSchemeInfo().m_bQuery ? '?' : 0x80000000,
                   nFragmentDelimiter, m_aAbsURIRef))
    {
        setInvalid();
        return false;
    }
    m_aPath = SubString(nBeforePathLength, m_aAbsURIRef.getLength() - nBeforePathLength);

    // Parse ?<query>
    if (getSchemeInfo().m_bQuery && pPos < pEnd && *pPos == '?')
    {
        m_aAbsURIRef.append('?');
        OUStringBuffer aSynQuery;
        for (++pPos; pPos < pEnd && *pPos != nFragmentDelimiter;)
        {
            EscapeType eEscapeType;
            sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
                                         eMechanism, eCharset, eEscapeType);
            appendUCS4(aSynQuery, nUTF32, eEscapeType,
                       PART_URIC, eCharset, true);
        }
        m_aQuery.set(m_aAbsURIRef, aSynQuery, m_aAbsURIRef.getLength());
    }

    // Parse #<fragment>
    if (pPos < pEnd && *pPos == nFragmentDelimiter)
    {
        m_aAbsURIRef.append(sal_Unicode(nFragmentDelimiter));
        OUStringBuffer aSynFragment;
        for (++pPos; pPos < pEnd;)
        {
            EscapeType eEscapeType;
            sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
                                         eMechanism, eCharset, eEscapeType);
            appendUCS4(aSynFragment, nUTF32, eEscapeType, PART_URIC,
                       eCharset, true);
        }
        m_aFragment.set(m_aAbsURIRef, aSynFragment, m_aAbsURIRef.getLength());
    }

    if (pPos != pEnd)
    {
        setInvalid();
        return false;
    }

    return true;
}

void INetURLObject::changeScheme(INetProtocol eTargetScheme) {
    sal_Int32 oldSchemeLen = 0;
    const OUString& rOldSchemeName = getSchemeInfo().m_sScheme;
    if (m_eScheme == INetProtocol::Generic)
        oldSchemeLen = m_aScheme.getLength();
    else
        oldSchemeLen = rOldSchemeName.getLength();
    m_eScheme=eTargetScheme;
    const OUString& rNewSchemeName = getSchemeInfo().m_sScheme;
    sal_Int32 newSchemeLen = rNewSchemeName.getLength();
    m_aAbsURIRef.remove(0, oldSchemeLen);
    m_aAbsURIRef.insert(0, rNewSchemeName);
    sal_Int32 delta=newSchemeLen-oldSchemeLen;
    m_aUser+=delta;
    m_aAuth+=delta;
    m_aHost+=delta;
    m_aPort+=delta;
    m_aPath+=delta;
    m_aQuery+=delta;
    m_aFragment+=delta;
}

bool INetURLObject::convertRelToAbs(OUString const & rTheRelURIRef,
                                    INetURLObject & rTheAbsURIRef,
                                    bool & rWasAbsolute,
                                    EncodeMechanism eMechanism,
                                    rtl_TextEncoding eCharset,
                                    bool bIgnoreFragment, bool bSmart,
                                    bool bRelativeNonURIs, FSysStyle eStyle)
    const
{
    sal_Unicode const * p = rTheRelURIRef.getStr();
    sal_Unicode const * pEnd = p + rTheRelURIRef.getLength();

    sal_Unicode const * pPrefixBegin = p;
    PrefixInfo const * pPrefix = getPrefix(pPrefixBegin, pEnd);
    bool hasScheme = pPrefix != nullptr;
    if (!hasScheme) {
        pPrefixBegin = p;
        hasScheme = !parseScheme(&pPrefixBegin, pEnd, '#').isEmpty();
    }

    sal_uInt32 nSegmentDelimiter = '/';
    sal_uInt32 nQueryDelimiter
        = !bSmart || getSchemeInfo().m_bQuery ? '?' : 0x80000000;
    sal_uInt32 nFragmentDelimiter = '#';
    Part ePart = PART_VISIBLE;

    if (!hasScheme && bSmart)
    {
        // If the input matches any of the following productions (for which
        // the appropriate style bit is set in eStyle), it is assumed to be an
        // absolute file system path, rather than a relative URI reference.
        // (This is only a subset of the productions used for scheme detection
        // in INetURLObject::setAbsURIRef(), because most of those productions
        // interfere with the syntax of relative URI references.)  The
        // productions use the auxiliary rules

--> --------------------

--> maximum size reached

--> --------------------

Messung V0.5
C=90 H=94 G=91

¤ Dauer der Verarbeitung: 0.31 Sekunden  (vorverarbeitet)  ¤

*© Formatika GbR, Deutschland






Normalansicht

Bemerkung:

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Anfrage:

Dauer der Verarbeitung:

Sekunden

sprechenden Kalenders