class ICU4XDataProvider; class ICU4XWordSegmenter; #include"ICU4XError.hpp" class ICU4XWordBreakIteratorUtf8; class ICU4XWordBreakIteratorUtf16; class ICU4XWordBreakIteratorLatin1;
/** * A destruction policy for using ICU4XWordSegmenter with std::unique_ptr.
*/ struct ICU4XWordSegmenterDeleter { voidoperator()(capi::ICU4XWordSegmenter* l) const noexcept {
capi::ICU4XWordSegmenter_destroy(l);
}
};
/** * Construct an [`ICU4XWordSegmenter`] with automatically selecting the best available LSTM * or dictionary payload data. * * Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese, * Khmer, Lao, and Thai. * * See the [Rust documentation for `new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_auto) for more information.
*/ static diplomat::result<ICU4XWordSegmenter, ICU4XError> create_auto(const ICU4XDataProvider& provider);
/** * Construct an [`ICU4XWordSegmenter`] with LSTM payload data for Burmese, Khmer, Lao, and * Thai. * * Warning: [`ICU4XWordSegmenter`] created by this function doesn't handle Chinese or * Japanese. * * See the [Rust documentation for `new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_lstm) for more information.
*/ static diplomat::result<ICU4XWordSegmenter, ICU4XError> create_lstm(const ICU4XDataProvider& provider);
/** * Construct an [`ICU4XWordSegmenter`] with dictionary payload data for Chinese, Japanese, * Burmese, Khmer, Lao, and Thai. * * See the [Rust documentation for `new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_dictionary) for more information.
*/ static diplomat::result<ICU4XWordSegmenter, ICU4XError> create_dictionary(const ICU4XDataProvider& provider);
/** * Segments a string. * * Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according * to the WHATWG Encoding Standard. * * See the [Rust documentation for `segment_utf8`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.segment_utf8) for more information. * * Lifetimes: `this`, `input` must live at least as long as the output.
*/
ICU4XWordBreakIteratorUtf8 segment_utf8(const std::string_view input) const;
/** * Segments a string. * * Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according * to the WHATWG Encoding Standard. * * See the [Rust documentation for `segment_utf16`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.segment_utf16) for more information. * * Lifetimes: `this`, `input` must live at least as long as the output.
*/
ICU4XWordBreakIteratorUtf16 segment_utf16(const std::u16string_view input) const;
/** * Segments a Latin-1 string. * * See the [Rust documentation for `segment_latin1`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.segment_latin1) for more information. * * Lifetimes: `this`, `input` must live at least as long as the output.
*/
ICU4XWordBreakIteratorLatin1 segment_latin1(const diplomat::span<const uint8_t> input) const; inlineconst capi::ICU4XWordSegmenter* AsFFI() const { return this->inner.get(); } inline capi::ICU4XWordSegmenter* AsFFIMut() { return this->inner.get(); } inlineexplicit ICU4XWordSegmenter(capi::ICU4XWordSegmenter* i) : inner(i) {}
ICU4XWordSegmenter() = default;
ICU4XWordSegmenter(ICU4XWordSegmenter&&) noexcept = default;
ICU4XWordSegmenter& operator=(ICU4XWordSegmenter&& other) noexcept = default; private:
std::unique_ptr<capi::ICU4XWordSegmenter, ICU4XWordSegmenterDeleter> inner;
};
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.