// converts the openoffice text/html clipboard format to the HTML Format // well known under MS Windows // the MS HTML Format has a header before the real html data
// Version:1.0 Version number of the clipboard. Starting is 0.9 // StartHTML: Byte count from the beginning of the clipboard to the start // of the context, or -1 if no context // EndHTML: Byte count from the beginning of the clipboard to the end // of the context, or -1 if no context // StartFragment: Byte count from the beginning of the clipboard to the // start of the fragment // EndFragment: Byte count from the beginning of the clipboard to the // end of the fragment // StartSelection: Byte count from the beginning of the clipboard to the // start of the selection // EndSelection: Byte count from the beginning of the clipboard to the // end of the selection
// StartSelection and EndSelection are optional // The fragment should be preceded and followed by the HTML comments // <!--StartFragment--> and <!--EndFragment--> (no space between !-- and the // text
// the office always writes the start and end html tag in upper cases and // without spaces both tags don't allow parameters const std::string TAG_HTML("<html>"); const std::string TAG_END_HTML("</html>");
// The body tag may have parameters so we need to search for the // closing '>' manually e.g. <BODY param> #92840# const std::string TAG_BODY("<body"); const std::string TAG_END_BODY("</body");
if (aTextHtml.getLength() <= 0) return Sequence<sal_Int8>();
// fill the buffer with dummy values to calc the exact length
std::string dummyHtmlHeader = GetHtmlFormatHeader(0, 0, 0, 0);
size_t lHtmlFormatHeader = dummyHtmlHeader.length();
std::string::size_type nStartHtml = textHtml.find(TAG_HTML) + lHtmlFormatHeader - 1; // we start one before '<HTML>' Word 2000 does also so
std::string::size_type nEndHtml = textHtml.find(TAG_END_HTML) + lHtmlFormatHeader + TAG_END_HTML.length() + 1; // our SOffice 5.2 wants 2 behind </HTML>?
// The body tag may have parameters so we need to search for the // closing '>' manually e.g. <BODY param> #92840#
std::string::size_type nStartFragment = textHtml.find(">", textHtml.find(TAG_BODY)) + lHtmlFormatHeader + 1;
std::string::size_type nEndFragment = textHtml.find(TAG_END_BODY) + lHtmlFormatHeader;
assert(htmlStartTag && "Seems to be no HTML at all");
// It doesn't seem to be HTML? Well then simply return what has been // provided in non-debug builds if (htmlStartTag == nullptr)
{ return aHTMLFormat;
}
sal_Int32 len = dataEnd - htmlStartTag;
Sequence<sal_Int8> plainHtmlData(len);
/* A simple format detection. We are just comparing the first few bytes oftheprovidedbytesequencetoseewhetherornotitistheMS OfficeHtmlformat.Ifitshowsthatthisisnotreliableenoughwe canimprovethis
*/ constchar HtmlFormatStart[] = "Version:"; intconst HtmlFormatStartLen = sizeof(HtmlFormatStart) - 1;
bool isHTMLFormat(const Sequence<sal_Int8>& aHtmlSequence)
{ if (aHtmlSequence.getLength() < HtmlFormatStartLen) returnfalse;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.