ICU 77.1 77.1
uset.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6* Copyright (C) 2002-2014, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: uset.h
11* encoding: UTF-8
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2002mar07
16* created by: Markus W. Scherer
17*
18* C version of UnicodeSet.
19*/
20
21
28
29#ifndef __USET_H__
30#define __USET_H__
31
32#include "unicode/utypes.h"
33#include "unicode/uchar.h"
34
35#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
36#include <string>
37#include <string_view>
38#include "unicode/char16ptr.h"
40#include "unicode/utf16.h"
41#endif
42
43#ifndef USET_DEFINED
44
45#ifndef U_IN_DOXYGEN
46#define USET_DEFINED
47#endif
54typedef struct USet USet;
55#endif
56
68enum {
74
102
115
129};
130
186typedef enum USetSpanCondition {
235#ifndef U_HIDE_DEPRECATED_API
241#endif // U_HIDE_DEPRECATED_API
243
244enum {
252};
253
281
282/*********************************************************************
283 * USet API
284 *********************************************************************/
285
293U_CAPI USet* U_EXPORT2
295
306U_CAPI USet* U_EXPORT2
308
318U_CAPI USet* U_EXPORT2
319uset_openPattern(const UChar* pattern, int32_t patternLength,
320 UErrorCode* ec);
321
335U_CAPI USet* U_EXPORT2
336uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
337 uint32_t options,
338 UErrorCode* ec);
339
346U_CAPI void U_EXPORT2
348
349#if U_SHOW_CPLUSPLUS_API
350
351U_NAMESPACE_BEGIN
352
363
364U_NAMESPACE_END
365
366#endif
367
377U_CAPI USet * U_EXPORT2
378uset_clone(const USet *set);
379
389U_CAPI UBool U_EXPORT2
390uset_isFrozen(const USet *set);
391
406U_CAPI void U_EXPORT2
408
419U_CAPI USet * U_EXPORT2
421
431U_CAPI void U_EXPORT2
433 UChar32 start, UChar32 end);
434
459U_CAPI int32_t U_EXPORT2
461 const UChar *pattern, int32_t patternLength,
462 uint32_t options,
463 UErrorCode *status);
464
487U_CAPI void U_EXPORT2
489 UProperty prop, int32_t value, UErrorCode* ec);
490
526U_CAPI void U_EXPORT2
528 const UChar *prop, int32_t propLength,
529 const UChar *value, int32_t valueLength,
530 UErrorCode* ec);
531
541U_CAPI UBool U_EXPORT2
542uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
543 int32_t pos);
544
560U_CAPI int32_t U_EXPORT2
562 UChar* result, int32_t resultCapacity,
563 UBool escapeUnprintable,
564 UErrorCode* ec);
565
574U_CAPI void U_EXPORT2
576
589U_CAPI void U_EXPORT2
590uset_addAll(USet* set, const USet *additionalSet);
591
601U_CAPI void U_EXPORT2
603
613U_CAPI void U_EXPORT2
614uset_addString(USet* set, const UChar* str, int32_t strLen);
615
625U_CAPI void U_EXPORT2
626uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
627
636U_CAPI void U_EXPORT2
638
648U_CAPI void U_EXPORT2
650
660U_CAPI void U_EXPORT2
661uset_removeString(USet* set, const UChar* str, int32_t strLen);
662
672U_CAPI void U_EXPORT2
673uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length);
674
686U_CAPI void U_EXPORT2
687uset_removeAll(USet* set, const USet* removeSet);
688
701U_CAPI void U_EXPORT2
702uset_retain(USet* set, UChar32 start, UChar32 end);
703
715U_CAPI void U_EXPORT2
716uset_retainString(USet *set, const UChar *str, int32_t length);
717
727U_CAPI void U_EXPORT2
728uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length);
729
742U_CAPI void U_EXPORT2
743uset_retainAll(USet* set, const USet* retain);
744
753U_CAPI void U_EXPORT2
755
769U_CAPI void U_EXPORT2
771
785U_CAPI void U_EXPORT2
787
798U_CAPI void U_EXPORT2
799uset_complementString(USet *set, const UChar *str, int32_t length);
800
810U_CAPI void U_EXPORT2
811uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length);
812
824U_CAPI void U_EXPORT2
825uset_complementAll(USet* set, const USet* complement);
826
834U_CAPI void U_EXPORT2
836
865U_CAPI void U_EXPORT2
866uset_closeOver(USet* set, int32_t attributes);
867
874U_CAPI void U_EXPORT2
876
884U_CAPI UBool U_EXPORT2
885uset_isEmpty(const USet* set);
886
892U_CAPI UBool U_EXPORT2
894
903U_CAPI UBool U_EXPORT2
905
915U_CAPI UBool U_EXPORT2
916uset_containsRange(const USet* set, UChar32 start, UChar32 end);
917
926U_CAPI UBool U_EXPORT2
927uset_containsString(const USet* set, const UChar* str, int32_t strLen);
928
939U_CAPI int32_t U_EXPORT2
940uset_indexOf(const USet* set, UChar32 c);
941
957U_CAPI UChar32 U_EXPORT2
958uset_charAt(const USet* set, int32_t charIndex);
959
975U_CAPI int32_t U_EXPORT2
976uset_size(const USet* set);
977
987U_CAPI int32_t U_EXPORT2
989
990#ifndef U_HIDE_DRAFT_API
991
1000U_CAPI int32_t U_EXPORT2
1002
1015U_CAPI const UChar* U_EXPORT2
1016uset_getString(const USet *set, int32_t index, int32_t *pLength);
1017
1018#endif // U_HIDE_DRAFT_API
1019
1030U_CAPI int32_t U_EXPORT2
1032
1063U_CAPI int32_t U_EXPORT2
1064uset_getItem(const USet* set, int32_t itemIndex,
1065 UChar32* start, UChar32* end,
1066 UChar* str, int32_t strCapacity,
1067 UErrorCode* ec);
1068
1077U_CAPI UBool U_EXPORT2
1078uset_containsAll(const USet* set1, const USet* set2);
1079
1090U_CAPI UBool U_EXPORT2
1091uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
1092
1101U_CAPI UBool U_EXPORT2
1102uset_containsNone(const USet* set1, const USet* set2);
1103
1112U_CAPI UBool U_EXPORT2
1113uset_containsSome(const USet* set1, const USet* set2);
1114
1134U_CAPI int32_t U_EXPORT2
1135uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1136
1155U_CAPI int32_t U_EXPORT2
1156uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1157
1177U_CAPI int32_t U_EXPORT2
1178uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1179
1198U_CAPI int32_t U_EXPORT2
1199uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1200
1209U_CAPI UBool U_EXPORT2
1210uset_equals(const USet* set1, const USet* set2);
1211
1212/*********************************************************************
1213 * Serialized set API
1214 *********************************************************************/
1215
1265U_CAPI int32_t U_EXPORT2
1266uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
1267
1276U_CAPI UBool U_EXPORT2
1277uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
1278
1286U_CAPI void U_EXPORT2
1288
1297U_CAPI UBool U_EXPORT2
1299
1309U_CAPI int32_t U_EXPORT2
1311
1325U_CAPI UBool U_EXPORT2
1326uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
1327 UChar32* pStart, UChar32* pEnd);
1328
1329#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
1330#ifndef U_HIDE_DRAFT_API
1331
1332namespace U_HEADER_ONLY_NAMESPACE {
1333
1334// Note: Not U_COMMON_API, and not a subclass of UMemory, because this is a header-only class,
1335// not intended to be used via export from the ICU DLL.
1336
1342public:
1345
1347 bool operator==(const USetCodePointIterator &other) const {
1348 // No need to compare rangeCount & end given private constructor
1349 // and assuming we don't compare iterators across the set being modified.
1350 // And comparing rangeIndex is redundant with comparing c.
1351 // We might even skip comparing uset.
1352 // Unless we want operator==() to be "correct" for more than iteration.
1353 return uset == other.uset && c == other.c;
1354 }
1355
1357 bool operator!=(const USetCodePointIterator &other) const { return !operator==(other); }
1358
1360 UChar32 operator*() const { return c; }
1361
1367 if (c < end) {
1368 ++c;
1369 } else if (rangeIndex < rangeCount) {
1370 UErrorCode errorCode = U_ZERO_ERROR;
1371 int32_t result = uset_getItem(uset, rangeIndex, &c, &end, nullptr, 0, &errorCode);
1372 if (U_SUCCESS(errorCode) && result == 0) {
1373 ++rangeIndex;
1374 } else {
1375 c = end = U_SENTINEL;
1376 }
1377 } else {
1378 c = end = U_SENTINEL;
1379 }
1380 return *this;
1381 }
1382
1388 USetCodePointIterator result(*this);
1389 operator++();
1390 return result;
1391 }
1392
1393private:
1394 friend class USetCodePoints;
1395
1396 USetCodePointIterator(const USet *pUset, int32_t nRangeIndex, int32_t nRangeCount)
1397 : uset(pUset), rangeIndex(nRangeIndex), rangeCount(nRangeCount),
1398 c(U_SENTINEL), end(U_SENTINEL) {
1399 // Fetch the first range.
1400 operator++();
1401 }
1402
1403 const USet *uset;
1404 int32_t rangeIndex;
1405 int32_t rangeCount;
1406 UChar32 c, end;
1407};
1408
1428public:
1433 USetCodePoints(const USet *pUset) : uset(pUset), rangeCount(uset_getRangeCount(pUset)) {}
1434
1436 USetCodePoints(const USetCodePoints &other) = default;
1437
1440 return USetCodePointIterator(uset, 0, rangeCount);
1441 }
1442
1445 return USetCodePointIterator(uset, rangeCount, rangeCount);
1446 }
1447
1448private:
1449 const USet *uset;
1450 int32_t rangeCount;
1451};
1452
1462 struct iterator {
1464 iterator(UChar32 aC) : c(aC) {}
1465
1467 bool operator==(const iterator &other) const { return c == other.c; }
1469 bool operator!=(const iterator &other) const { return !operator==(other); }
1470
1472 UChar32 operator*() const { return c; }
1473
1479 ++c;
1480 return *this;
1481 }
1482
1488 return c++;
1489 }
1490
1496 };
1497
1501 CodePointRange(const CodePointRange &other) = default;
1503 size_t size() const { return (rangeEnd + 1) - rangeStart; }
1505 iterator begin() const { return rangeStart; }
1507 iterator end() const { return rangeEnd + 1; }
1508
1519};
1520
1526public:
1528 USetRangeIterator(const USetRangeIterator &other) = default;
1529
1531 bool operator==(const USetRangeIterator &other) const {
1532 // No need to compare rangeCount given private constructor
1533 // and assuming we don't compare iterators across the set being modified.
1534 // We might even skip comparing uset.
1535 // Unless we want operator==() to be "correct" for more than iteration.
1536 return uset == other.uset && rangeIndex == other.rangeIndex;
1537 }
1538
1540 bool operator!=(const USetRangeIterator &other) const { return !operator==(other); }
1541
1544 if (rangeIndex < rangeCount) {
1545 UChar32 start, end;
1546 UErrorCode errorCode = U_ZERO_ERROR;
1547 int32_t result = uset_getItem(uset, rangeIndex, &start, &end, nullptr, 0, &errorCode);
1548 if (U_SUCCESS(errorCode) && result == 0) {
1549 return CodePointRange(start, end);
1550 }
1551 }
1553 }
1554
1560 ++rangeIndex;
1561 return *this;
1562 }
1563
1569 USetRangeIterator result(*this);
1570 ++rangeIndex;
1571 return result;
1572 }
1573
1574private:
1575 friend class USetRanges;
1576
1577 USetRangeIterator(const USet *pUset, int32_t nRangeIndex, int32_t nRangeCount)
1578 : uset(pUset), rangeIndex(nRangeIndex), rangeCount(nRangeCount) {}
1579
1580 const USet *uset;
1581 int32_t rangeIndex;
1582 int32_t rangeCount;
1583};
1584
1609public:
1614 USetRanges(const USet *pUset) : uset(pUset), rangeCount(uset_getRangeCount(pUset)) {}
1615
1617 USetRanges(const USetRanges &other) = default;
1618
1621 return USetRangeIterator(uset, 0, rangeCount);
1622 }
1623
1626 return USetRangeIterator(uset, rangeCount, rangeCount);
1627 }
1628
1629private:
1630 const USet *uset;
1631 int32_t rangeCount;
1632};
1633
1639public:
1642
1644 bool operator==(const USetStringIterator &other) const {
1645 // No need to compare count given private constructor
1646 // and assuming we don't compare iterators across the set being modified.
1647 // We might even skip comparing uset.
1648 // Unless we want operator==() to be "correct" for more than iteration.
1649 return uset == other.uset && index == other.index;
1650 }
1651
1653 bool operator!=(const USetStringIterator &other) const { return !operator==(other); }
1654
1656 std::u16string_view operator*() const {
1657 if (index < count) {
1658 int32_t length;
1659 const UChar *uchars = uset_getString(uset, index, &length);
1660 // assert uchars != nullptr;
1661 return {uprv_char16PtrFromUChar(uchars), static_cast<size_t>(length)};
1662 }
1663 return {};
1664 }
1665
1671 ++index;
1672 return *this;
1673 }
1674
1680 USetStringIterator result(*this);
1681 ++index;
1682 return result;
1683 }
1684
1685private:
1686 friend class USetStrings;
1687
1688 USetStringIterator(const USet *pUset, int32_t nIndex, int32_t nCount)
1689 : uset(pUset), index(nIndex), count(nCount) {}
1690
1691 const USet *uset;
1692 int32_t index;
1693 int32_t count;
1694};
1695
1719public:
1724 USetStrings(const USet *pUset) : uset(pUset), count(uset_getStringCount(pUset)) {}
1725
1727 USetStrings(const USetStrings &other) = default;
1728
1731 return USetStringIterator(uset, 0, count);
1732 }
1733
1736 return USetStringIterator(uset, count, count);
1737 }
1738
1739private:
1740 const USet *uset;
1741 int32_t count;
1742};
1743#endif // U_HIDE_DRAFT_API
1744
1745#ifndef U_HIDE_DRAFT_API
1751public:
1754
1756 bool operator==(const USetElementIterator &other) const {
1757 // No need to compare rangeCount & end given private constructor
1758 // and assuming we don't compare iterators across the set being modified.
1759 // We might even skip comparing uset.
1760 // Unless we want operator==() to be "correct" for more than iteration.
1761 return uset == other.uset && c == other.c && index == other.index;
1762 }
1763
1765 bool operator!=(const USetElementIterator &other) const { return !operator==(other); }
1766
1768 std::u16string operator*() const {
1769 if (c >= 0) {
1770 return c <= 0xffff ?
1771 std::u16string({static_cast<char16_t>(c)}) :
1772 std::u16string({U16_LEAD(c), U16_TRAIL(c)});
1773 } else if (index < totalCount) {
1774 int32_t length;
1775 const UChar *uchars = uset_getString(uset, index - rangeCount, &length);
1776 // assert uchars != nullptr;
1777 return {uprv_char16PtrFromUChar(uchars), static_cast<size_t>(length)};
1778 } else {
1779 return {};
1780 }
1781 }
1782
1788 if (c < end) {
1789 ++c;
1790 } else if (index < rangeCount) {
1791 UErrorCode errorCode = U_ZERO_ERROR;
1792 int32_t result = uset_getItem(uset, index, &c, &end, nullptr, 0, &errorCode);
1793 if (U_SUCCESS(errorCode) && result == 0) {
1794 ++index;
1795 } else {
1796 c = end = U_SENTINEL;
1797 }
1798 } else if (c >= 0) {
1799 // assert index == rangeCount;
1800 // Switch from the last range to the first string.
1801 c = end = U_SENTINEL;
1802 } else {
1803 ++index;
1804 }
1805 return *this;
1806 }
1807
1813 USetElementIterator result(*this);
1814 operator++();
1815 return result;
1816 }
1817
1818private:
1819 friend class USetElements;
1820
1821 USetElementIterator(const USet *pUset, int32_t nIndex, int32_t nRangeCount, int32_t nTotalCount)
1822 : uset(pUset), index(nIndex), rangeCount(nRangeCount), totalCount(nTotalCount),
1823 c(U_SENTINEL), end(U_SENTINEL) {
1824 if (index < rangeCount) {
1825 // Fetch the first range.
1826 operator++();
1827 }
1828 // Otherwise don't move beyond the (index - rangeCount)-th string.
1829 }
1830
1831 const USet *uset;
1832 int32_t index;
1834 int32_t rangeCount;
1844 int32_t totalCount;
1845 UChar32 c, end;
1846};
1847
1876public:
1881 USetElements(const USet *pUset)
1882 : uset(pUset), rangeCount(uset_getRangeCount(pUset)),
1883 stringCount(uset_getStringCount(pUset)) {}
1884
1886 USetElements(const USetElements &other) = default;
1887
1890 return USetElementIterator(uset, 0, rangeCount, rangeCount + stringCount);
1891 }
1892
1895 return USetElementIterator(uset, rangeCount + stringCount, rangeCount, rangeCount + stringCount);
1896 }
1897
1898private:
1899 const USet *uset;
1900 int32_t rangeCount, stringCount;
1901};
1902
1903} // namespace U_HEADER_ONLY_NAMESPACE
1904
1905#endif // U_HIDE_DRAFT_API
1906#endif // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
1907
1908#endif // __USET_H__
C++ API: char16_t pointer wrappers with implicit conversion from bit-compatible raw pointer types.
"Smart pointer" class, closes a USet via uset_close().
Iterator returned by USetCodePoints.
Definition uset.h:1341
bool operator==(const USetCodePointIterator &other) const
Definition uset.h:1347
USetCodePointIterator & operator++()
Pre-increment.
Definition uset.h:1366
bool operator!=(const USetCodePointIterator &other) const
Definition uset.h:1357
USetCodePointIterator operator++(int)
Post-increment.
Definition uset.h:1387
USetCodePointIterator(const USetCodePointIterator &other)=default
C++ "range" for iterating over the code points of a USet.
Definition uset.h:1427
USetCodePoints(const USetCodePoints &other)=default
USetCodePointIterator end() const
Definition uset.h:1444
USetCodePoints(const USet *pUset)
Constructs a C++ "range" object over the code points of the USet.
Definition uset.h:1433
USetCodePointIterator begin() const
Definition uset.h:1439
Iterator returned by USetElements.
Definition uset.h:1750
USetElementIterator operator++(int)
Post-increment.
Definition uset.h:1812
bool operator==(const USetElementIterator &other) const
Definition uset.h:1756
bool operator!=(const USetElementIterator &other) const
Definition uset.h:1765
USetElementIterator & operator++()
Pre-increment.
Definition uset.h:1787
USetElementIterator(const USetElementIterator &other)=default
A C++ "range" for iterating over all of the elements of a USet.
Definition uset.h:1875
USetElements(const USetElements &other)=default
USetElementIterator end() const
Definition uset.h:1894
USetElements(const USet *pUset)
Constructs a C++ "range" object over all of the elements of the USet.
Definition uset.h:1881
USetElementIterator begin() const
Definition uset.h:1889
Iterator returned by USetRanges.
Definition uset.h:1525
USetRangeIterator & operator++()
Pre-increment.
Definition uset.h:1559
CodePointRange operator*() const
Definition uset.h:1543
bool operator==(const USetRangeIterator &other) const
Definition uset.h:1531
USetRangeIterator operator++(int)
Post-increment.
Definition uset.h:1568
bool operator!=(const USetRangeIterator &other) const
Definition uset.h:1540
USetRangeIterator(const USetRangeIterator &other)=default
C++ "range" for iterating over the code point ranges of a USet.
Definition uset.h:1608
USetRangeIterator end() const
Definition uset.h:1625
USetRangeIterator begin() const
Definition uset.h:1620
USetRanges(const USet *pUset)
Constructs a C++ "range" object over the code point ranges of the USet.
Definition uset.h:1614
USetRanges(const USetRanges &other)=default
Iterator returned by USetStrings.
Definition uset.h:1638
USetStringIterator & operator++()
Pre-increment.
Definition uset.h:1670
USetStringIterator(const USetStringIterator &other)=default
bool operator!=(const USetStringIterator &other) const
Definition uset.h:1653
std::u16string_view operator*() const
Definition uset.h:1656
USetStringIterator operator++(int)
Post-increment.
Definition uset.h:1679
bool operator==(const USetStringIterator &other) const
Definition uset.h:1644
C++ "range" for iterating over the empty and multi-character strings of a USet.
Definition uset.h:1718
USetStrings(const USetStrings &other)=default
USetStringIterator begin() const
Definition uset.h:1730
USetStringIterator end() const
Definition uset.h:1735
USetStrings(const USet *pUset)
Constructs a C++ "range" object over the strings of the USet.
Definition uset.h:1724
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
A serialized form of a Unicode set.
Definition uset.h:259
uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]
A small buffer for the array to reduce memory allocations.
Definition uset.h:279
int32_t bmpLength
The length of the array that contains BMP characters.
Definition uset.h:269
const uint16_t * array
The serialized Unicode Set.
Definition uset.h:264
int32_t length
The total length of the array.
Definition uset.h:274
bool operator!=(const iterator &other) const
Definition uset.h:1469
UChar32 c
The current code point in the range.
Definition uset.h:1495
iterator & operator++()
Pre-increment.
Definition uset.h:1478
iterator operator++(int)
Post-increment.
Definition uset.h:1487
bool operator==(const iterator &other) const
Definition uset.h:1467
A contiguous range of code points in a USet/UnicodeSet.
Definition uset.h:1460
CodePointRange(UChar32 start, UChar32 end)
Definition uset.h:1499
UChar32 rangeEnd
Inclusive end of a USet/UnicodeSet range of code points.
Definition uset.h:1518
CodePointRange(const CodePointRange &other)=default
UChar32 rangeStart
Start of a USet/UnicodeSet range of code points.
Definition uset.h:1513
C API: Unicode Properties.
UProperty
Selection constants for Unicode properties.
Definition uchar.h:196
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition umachine.h:378
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition umachine.h:427
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition umachine.h:247
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition umachine.h:110
#define U_SENTINEL
This value is intended for sentinel values for APIs that (take or) return single code points (UChar32...
Definition umachine.h:447
U_CAPI UBool uset_containsAll(const USet *set1, const USet *set2)
Returns true if set1 contains all the characters and strings of set2.
U_CAPI UBool uset_hasStrings(const USet *set)
U_CAPI int32_t uset_getStringCount(const USet *set)
U_CAPI USet * uset_open(UChar32 start, UChar32 end)
Creates a USet object that contains the range of characters start..end, inclusive.
U_CAPI UBool uset_serializedContains(const USerializedSet *set, UChar32 c)
Returns true if the given USerializedSet contains the given character.
U_CAPI UBool uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcLength)
Given a serialized array, fill in the given serialized set object.
U_CAPI void uset_set(USet *set, UChar32 start, UChar32 end)
Causes the USet object to represent the range start - end.
U_CAPI void uset_addString(USet *set, const UChar *str, int32_t strLen)
Adds the given string to the given USet.
U_CAPI UBool uset_containsString(const USet *set, const UChar *str, int32_t strLen)
Returns true if the given USet contains the given string.
U_CAPI UBool uset_isEmpty(const USet *set)
Returns true if the given USet contains no characters and no strings.
U_CAPI int32_t uset_getItemCount(const USet *set)
Returns the number of items in this set.
@ USET_IGNORE_SPACE
Ignore white space within patterns unless quoted or escaped.
Definition uset.h:73
@ USET_ADD_CASE_MAPPINGS
Adds all case mappings for each element in the set.
Definition uset.h:114
@ USET_CASE_INSENSITIVE
Enable case insensitive matching.
Definition uset.h:101
@ USET_SIMPLE_CASE_INSENSITIVE
Enable case insensitive matching.
Definition uset.h:128
@ USET_SERIALIZED_STATIC_ARRAY_CAPACITY
Capacity of USerializedSet::staticArray.
Definition uset.h:251
U_CAPI int32_t uset_getSerializedRangeCount(const USerializedSet *set)
Returns the number of disjoint ranges of characters contained in the given serialized set.
U_CAPI UChar32 uset_charAt(const USet *set, int32_t charIndex)
Returns the character at the given index within this set, where the set is ordered by ascending code ...
U_CAPI void uset_retainAll(USet *set, const USet *retain)
Retains only the elements in this set that are contained in the specified set.
U_CAPI void uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c)
Set the USerializedSet to contain the given character (and nothing else).
U_CAPI void uset_complementString(USet *set, const UChar *str, int32_t length)
Complements the specified string in this set.
U_CAPI void uset_applyIntPropertyValue(USet *set, UProperty prop, int32_t value, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given binary or enum...
U_CAPI void uset_complementRange(USet *set, UChar32 start, UChar32 end)
Complements the specified range in this set.
U_CAPI UBool uset_resemblesPattern(const UChar *pattern, int32_t patternLength, int32_t pos)
Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet patt...
U_CAPI void uset_addRange(USet *set, UChar32 start, UChar32 end)
Adds the given range of characters to the given USet.
U_CAPI void uset_add(USet *set, UChar32 c)
Adds the given character to the given USet.
U_CAPI void uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length)
Complements EACH of the characters in this string.
U_CAPI int32_t uset_size(const USet *set)
Returns the number of characters and strings contained in this set.
U_CAPI int32_t uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
U_CAPI USet * uset_openPatternOptions(const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *ec)
Creates a set from the given pattern.
U_CAPI void uset_removeAllStrings(USet *set)
Remove all strings from this set.
U_CAPI void uset_clear(USet *set)
Removes all of the elements from this set.
U_CAPI void uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length)
Removes EACH of the characters in this string.
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition uset.h:186
@ USET_SPAN_NOT_CONTAINED
Continues a span() while there is no set element at the current position.
Definition uset.h:199
@ USET_SPAN_CONTAINED
Spans the longest substring that is a concatenation of set elements (characters or strings).
Definition uset.h:214
@ USET_SPAN_CONDITION_COUNT
One more than the last span condition.
Definition uset.h:240
@ USET_SPAN_SIMPLE
Continues a span() while there is a set element at the current position.
Definition uset.h:234
U_CAPI void uset_remove(USet *set, UChar32 c)
Removes the given character from the given USet.
U_CAPI USet * uset_openPattern(const UChar *pattern, int32_t patternLength, UErrorCode *ec)
Creates a set from the given pattern.
U_CAPI void uset_removeRange(USet *set, UChar32 start, UChar32 end)
Removes the given range of characters from the given USet.
U_CAPI USet * uset_openEmpty(void)
Create an empty USet object.
U_CAPI UBool uset_equals(const USet *set1, const USet *set2)
Returns true if set1 contains all of the characters and strings of set2, and vis versa.
U_CAPI UBool uset_isFrozen(const USet *set)
Determines whether the set has been frozen (made immutable) or not.
U_CAPI int32_t uset_getRangeCount(const USet *set)
U_CAPI USet * uset_cloneAsThawed(const USet *set)
Clone the set and make the clone mutable.
U_CAPI UBool uset_containsAllCodePoints(const USet *set, const UChar *str, int32_t strLen)
Returns true if this set contains all the characters of the given string.
U_CAPI int32_t uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Serializes this set into an array of 16-bit integers.
U_CAPI int32_t uset_indexOf(const USet *set, UChar32 c)
Returns the index of the given character within this set, where the set is ordered by ascending code ...
U_CAPI USet * uset_clone(const USet *set)
Returns a copy of this object.
U_CAPI void uset_freeze(USet *set)
Freeze the set (make it immutable).
U_CAPI int32_t uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
U_CAPI void uset_retain(USet *set, UChar32 start, UChar32 end)
Retain only the elements in this set that are contained in the specified range.
U_CAPI int32_t uset_toPattern(const USet *set, UChar *result, int32_t resultCapacity, UBool escapeUnprintable, UErrorCode *ec)
Returns a string representation of this set.
U_CAPI int32_t uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
U_CAPI void uset_complement(USet *set)
This is equivalent to uset_complementRange(set, 0, 0x10FFFF).
U_CAPI UBool uset_containsSome(const USet *set1, const USet *set2)
Returns true if set1 contains some of the characters and strings of set2.
U_CAPI int32_t uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
U_CAPI UBool uset_containsNone(const USet *set1, const USet *set2)
Returns true if set1 contains none of the characters and strings of set2.
U_CAPI void uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length)
Retains EACH of the characters in this string.
U_CAPI UBool uset_contains(const USet *set, UChar32 c)
Returns true if the given USet contains the given character.
U_CAPI int32_t uset_applyPattern(USet *set, const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *status)
Modifies the set to represent the set specified by the given pattern.
U_CAPI void uset_complementAll(USet *set, const USet *complement)
Complements in this set all elements contained in the specified set.
U_CAPI void uset_applyPropertyAlias(USet *set, const UChar *prop, int32_t propLength, const UChar *value, int32_t valueLength, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given property.
U_CAPI void uset_close(USet *set)
Disposes of the storage used by a USet object.
struct USet USet
USet is the C API type corresponding to C++ class UnicodeSet.
Definition uset.h:54
U_CAPI const UChar * uset_getString(const USet *set, int32_t index, int32_t *pLength)
Returns the index-th string (empty or multi-character) in the set.
U_CAPI void uset_addAll(USet *set, const USet *additionalSet)
Adds all of the elements in the specified set to this set if they're not already present.
U_CAPI void uset_removeString(USet *set, const UChar *str, int32_t strLen)
Removes the given string to the given USet.
U_CAPI void uset_compact(USet *set)
Reallocate this objects internal structures to take up the least possible space, without changing thi...
U_CAPI int32_t uset_getItem(const USet *set, int32_t itemIndex, UChar32 *start, UChar32 *end, UChar *str, int32_t strCapacity, UErrorCode *ec)
Returns an item of this set.
U_CAPI void uset_closeOver(USet *set, int32_t attributes)
Close this set over the given attribute.
U_CAPI void uset_addAllCodePoints(USet *set, const UChar *str, int32_t strLen)
Adds each of the characters in this string to the set.
U_CAPI void uset_retainString(USet *set, const UChar *str, int32_t length)
Retains only the specified string from this set if it is present.
U_CAPI void uset_removeAll(USet *set, const USet *removeSet)
Removes from this set all of its elements that are contained in the specified set.
U_CAPI UBool uset_containsRange(const USet *set, UChar32 start, UChar32 end)
Returns true if the given USet contains all characters c where start <= c && c <= end.
U_CAPI UBool uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex, UChar32 *pStart, UChar32 *pEnd)
Returns a range of characters contained in the given serialized set.
C API: 16-bit Unicode handling macros.
#define U16_TRAIL(supplementary)
Get the trail surrogate (0xdc00..0xdfff) for a supplementary code point (0x10000.....
Definition utf16.h:132
#define U16_LEAD(supplementary)
Get the lead surrogate (0xd800..0xdbff) for a supplementary code point (0x10000..0x10ffff).
Definition utf16.h:123
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition utypes.h:430
@ U_ZERO_ERROR
No error, no warning.
Definition utypes.h:465
#define U_SUCCESS(x)
Does the error code indicate success?
Definition utypes.h:743