ICU 77.1 77.1
brkiter.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4********************************************************************************
5* Copyright (C) 1997-2016, International Business Machines
6* Corporation and others. All Rights Reserved.
7********************************************************************************
8*
9* File brkiter.h
10*
11* Modification History:
12*
13* Date Name Description
14* 02/18/97 aliu Added typedef for TextCount. Made DONE const.
15* 05/07/97 aliu Fixed DLL declaration.
16* 07/09/97 jfitz Renamed BreakIterator and interface synced with JDK
17* 08/11/98 helena Sync-up JDK1.2.
18* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
19********************************************************************************
20*/
21
22#ifndef BRKITER_H
23#define BRKITER_H
24
25#include "unicode/utypes.h"
26
31
32#include "unicode/utypes.h"
33
34#if U_SHOW_CPLUSPLUS_API
35
36#if UCONFIG_NO_BREAK_ITERATION
37
38U_NAMESPACE_BEGIN
39
40/*
41 * Allow the declaration of APIs with pointers to BreakIterator
42 * even when break iteration is removed from the build.
43 */
44class BreakIterator;
45
46U_NAMESPACE_END
47
48#else
49
50#include "unicode/uobject.h"
51#include "unicode/unistr.h"
52#include "unicode/chariter.h"
53#include "unicode/locid.h"
54#include "unicode/ubrk.h"
55#include "unicode/strenum.h"
56#include "unicode/utext.h"
57#include "unicode/umisc.h"
58
59U_NAMESPACE_BEGIN
60
61class CharString;
62
109public:
114 virtual ~BreakIterator();
115
129 virtual bool operator==(const BreakIterator&) const = 0;
130
137 bool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
138
144 virtual BreakIterator* clone() const = 0;
145
151 virtual UClassID getDynamicClassID() const override = 0;
152
157 virtual CharacterIterator& getText() const = 0;
158
173 virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0;
174
186 virtual void setText(const UnicodeString &text) = 0;
187
206 virtual void setText(UText *text, UErrorCode &status) = 0;
207
216 virtual void adoptText(CharacterIterator* it) = 0;
217
218 enum {
224 DONE = static_cast<int32_t>(-1)
225 };
226
232 virtual int32_t first() = 0;
233
239 virtual int32_t last() = 0;
240
247 virtual int32_t previous() = 0;
248
255 virtual int32_t next() = 0;
256
262 virtual int32_t current() const = 0;
263
272 virtual int32_t following(int32_t offset) = 0;
273
282 virtual int32_t preceding(int32_t offset) = 0;
283
292 virtual UBool isBoundary(int32_t offset) = 0;
293
303 virtual int32_t next(int32_t n) = 0;
304
318 virtual int32_t getRuleStatus() const;
319
348 virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
349
369 static BreakIterator* U_EXPORT2
370 createWordInstance(const Locale& where, UErrorCode& status);
371
393 static BreakIterator* U_EXPORT2
394 createLineInstance(const Locale& where, UErrorCode& status);
395
415 static BreakIterator* U_EXPORT2
417
436 static BreakIterator* U_EXPORT2
438
439#ifndef U_HIDE_DEPRECATED_API
462 static BreakIterator* U_EXPORT2
463 createTitleInstance(const Locale& where, UErrorCode& status);
464#endif /* U_HIDE_DEPRECATED_API */
465
475 static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
476
486 static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
487 const Locale& displayLocale,
488 UnicodeString& name);
489
498 static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
499 UnicodeString& name);
500
501#ifndef U_FORCE_HIDE_DEPRECATED_API
521 virtual BreakIterator * createBufferClone(void *stackBuffer,
522 int32_t &BufferSize,
523 UErrorCode &status) = 0;
524#endif // U_FORCE_HIDE_DEPRECATED_API
525
526#ifndef U_HIDE_DEPRECATED_API
527
534 inline UBool isBufferClone();
535
536#endif /* U_HIDE_DEPRECATED_API */
537
538#if !UCONFIG_NO_SERVICE
554 static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt,
555 const Locale& locale,
557 UErrorCode& status);
558
571 static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
572
580#endif
581
588
589#ifndef U_HIDE_INTERNAL_API
596 const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
597#endif /* U_HIDE_INTERNAL_API */
598
624 virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0;
625
626 private:
627 static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status);
628 static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);
629 static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);
630
631 friend class ICUBreakIteratorFactory;
632 friend class ICUBreakIteratorService;
633
634protected:
635 // Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API
636 // or else the compiler will create a public ones.
641#ifndef U_HIDE_INTERNAL_API
643 BreakIterator (const Locale& valid, const Locale &actual);
645 BreakIterator &operator = (const BreakIterator &other);
646#endif /* U_HIDE_INTERNAL_API */
647
648private:
649
651 CharString* actualLocale = nullptr;
652 CharString* validLocale = nullptr;
653 CharString* requestLocale = nullptr;
654};
655
656#ifndef U_HIDE_DEPRECATED_API
657
659{
660 return false;
661}
662
663#endif /* U_HIDE_DEPRECATED_API */
664
665U_NAMESPACE_END
666
667#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
668
669#endif /* U_SHOW_CPLUSPLUS_API */
670
671#endif // BRKITER_H
672//eof
C++ API: Character Iterator.
virtual bool operator==(const BreakIterator &) const =0
Return true if another object is semantically equal to this one.
static StringEnumeration * getAvailableLocales()
Return a StringEnumeration over the locales available at the time of the call, including registered l...
virtual ~BreakIterator()
destructor
BreakIterator(const Locale &valid, const Locale &actual)
BreakIterator(const BreakIterator &other)
virtual int32_t previous()=0
Set the iterator position to the boundary preceding the current boundary.
virtual int32_t getRuleStatus() const
For RuleBasedBreakIterators, return the status tag from the break rule that determined the boundary a...
virtual void adoptText(CharacterIterator *it)=0
Change the text over which this operates.
virtual CharacterIterator & getText() const =0
Return a CharacterIterator over the text being analyzed.
static BreakIterator * createCharacterInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for character-breaks using specified locale Returns an instance of a BreakIterat...
virtual void setText(UText *text, UErrorCode &status)=0
Reset the break iterator to operate over the text represented by the UText.
virtual int32_t last()=0
Set the iterator position to the index immediately BEYOND the last character in the text being scanne...
virtual BreakIterator * clone() const =0
Return a polymorphic copy of this object.
virtual int32_t next()=0
Advance the iterator to the boundary following the current boundary.
static BreakIterator * createWordInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for word-breaks using the given locale.
virtual int32_t first()=0
Sets the current iteration position to the beginning of the text, position zero.
const char * getLocaleID(ULocDataLocaleType type, UErrorCode &status) const
Get the locale for this break iterator object.
static UnicodeString & getDisplayName(const Locale &objectLocale, const Locale &displayLocale, UnicodeString &name)
Get name of the object for the desired Locale, in the desired language.
Locale getLocale(ULocDataLocaleType type, UErrorCode &status) const
Returns the locale for this break iterator.
bool operator!=(const BreakIterator &rhs) const
Returns the complement of the result of operator==.
Definition brkiter.h:137
virtual void setText(const UnicodeString &text)=0
Change the text over which this operates.
static BreakIterator * createTitleInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for title-casing breaks using the specified locale Returns an instance of a Brea...
virtual int32_t next(int32_t n)=0
Set the iterator position to the nth boundary from the current boundary.
virtual int32_t preceding(int32_t offset)=0
Set the iterator position to the first boundary preceding the specified offset.
virtual int32_t current() const =0
Return character index of the current iterator position within the text.
virtual UBool isBoundary(int32_t offset)=0
Return true if the specified position is a boundary position.
static UnicodeString & getDisplayName(const Locale &objectLocale, UnicodeString &name)
Get name of the object for the desired Locale, in the language of the default locale.
virtual UClassID getDynamicClassID() const override=0
Return a polymorphic class ID for this object.
static BreakIterator * createSentenceInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for sentence-breaks using specified locale Returns an instance of a BreakIterato...
virtual BreakIterator & refreshInputText(UText *input, UErrorCode &status)=0
Set the subject text string upon which the break iterator is operating without changing any other asp...
UBool isBufferClone()
Determine whether the BreakIterator was created in user memory by createBufferClone(),...
Definition brkiter.h:658
static UBool unregister(URegistryKey key, UErrorCode &status)
Unregister a previously-registered BreakIterator using the key returned from the register call.
virtual UText * getUText(UText *fillIn, UErrorCode &status) const =0
Get a UText for the text being analyzed.
static BreakIterator * createLineInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for line-breaks using specified locale.
virtual BreakIterator * createBufferClone(void *stackBuffer, int32_t &BufferSize, UErrorCode &status)=0
Deprecated functionality.
virtual int32_t following(int32_t offset)=0
Advance the iterator to the first boundary following the specified offset.
@ DONE
DONE is returned by previous() and next() after all valid boundaries have been returned.
Definition brkiter.h:224
static URegistryKey registerInstance(BreakIterator *toAdopt, const Locale &locale, UBreakIteratorType kind, UErrorCode &status)
Register a new break iterator of the indicated kind, to use in the given locale.
static const Locale * getAvailableLocales(int32_t &count)
Get the set of Locales for which TextBoundaries are installed.
virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status)
For RuleBasedBreakIterators, get the status (tag) values from the break rule(s) that determined the b...
Abstract class that defines an API for iteration on text objects.
Definition chariter.h:361
A Locale object represents a specific geographical, political, or cultural region.
Definition locid.h:195
Base class for 'pure' C++ implementations of uenum api.
Definition strenum.h:61
UObject is the common ICU "boilerplate" class.
Definition uobject.h:223
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition unistr.h:296
C++ API: Locale ID object.
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
C++ API: String Enumeration.
UText struct.
Definition utext.h:1328
C API: BreakIterator.
UBreakIteratorType
The possible types of text boundaries.
Definition ubrk.h:102
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested,...
Definition uloc.h:338
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition umachine.h:247
C API: Miscellaneous definitions.
const void * URegistryKey
Opaque type returned by registerInstance, registerFactory and unregister for service registration.
Definition umisc.h:57
C++ API: Unicode String.
C++ API: Common ICU base class UObject.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition uobject.h:96
C API: Abstract Unicode Text API.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition utypes.h:430
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition utypes.h:315