uniset.h

Go to the documentation of this file.
00001 /*
00002 ***************************************************************************
00003 * Copyright (C) 1999-2007, International Business Machines Corporation
00004 * and others. All Rights Reserved.
00005 ***************************************************************************
00006 *   Date        Name        Description
00007 *   10/20/99    alan        Creation.
00008 ***************************************************************************
00009 */
00010 
00011 #ifndef UNICODESET_H
00012 #define UNICODESET_H
00013 
00014 #include "unicode/unifilt.h"
00015 #include "unicode/unistr.h"
00016 #include "unicode/uset.h"
00017 
00023 U_NAMESPACE_BEGIN
00024 
00025 class BMPSet;
00026 class ParsePosition;
00027 class SymbolTable;
00028 class UnicodeSetStringSpan;
00029 class UVector;
00030 class RuleCharacterIterator;
00031 
00263 class U_COMMON_API UnicodeSet : public UnicodeFilter {
00264 
00265     int32_t len; // length of list used; 0 <= len <= capacity
00266     int32_t capacity; // capacity of list
00267     UChar32* list; // MUST be terminated with HIGH
00268     BMPSet *bmpSet; // The set is frozen iff either bmpSet or stringSpan is not NULL.
00269     UChar32* buffer; // internal buffer, may be NULL
00270     int32_t bufferCapacity; // capacity of buffer
00271     int32_t patLen;
00272 
00282     UChar *pat;
00283     UVector* strings; // maintained in sorted order
00284     UnicodeSetStringSpan *stringSpan;
00285 
00286 public:
00287 
00288     enum {
00293         MIN_VALUE = 0,
00294 
00299         MAX_VALUE = 0x10ffff
00300     };
00301 
00302     //----------------------------------------------------------------
00303     // Constructors &c
00304     //----------------------------------------------------------------
00305 
00306 public:
00307 
00312     UnicodeSet();
00313 
00322     UnicodeSet(UChar32 start, UChar32 end);
00323 
00332     UnicodeSet(const UnicodeString& pattern,
00333                UErrorCode& status);
00334 
00347     UnicodeSet(const UnicodeString& pattern,
00348                uint32_t options,
00349                const SymbolTable* symbols,
00350                UErrorCode& status);
00351 
00365     UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
00366                uint32_t options,
00367                const SymbolTable* symbols,
00368                UErrorCode& status);
00369 
00374     UnicodeSet(const UnicodeSet& o);
00375 
00380     virtual ~UnicodeSet();
00381 
00387     UnicodeSet& operator=(const UnicodeSet& o);
00388 
00400     virtual UBool operator==(const UnicodeSet& o) const;
00401 
00407     UBool operator!=(const UnicodeSet& o) const;
00408 
00418     virtual UnicodeFunctor* clone() const;
00419 
00427     virtual int32_t hashCode(void) const;
00428 
00429     //----------------------------------------------------------------
00430     // Freezable API
00431     //----------------------------------------------------------------
00432 
00441     inline UBool isFrozen() const;
00442 
00456     UnicodeFunctor *freeze();
00457 
00466     UnicodeFunctor *cloneAsThawed() const;
00467 
00468     //----------------------------------------------------------------
00469     // Public API
00470     //----------------------------------------------------------------
00471 
00482     UnicodeSet& set(UChar32 start, UChar32 end);
00483 
00489     static UBool resemblesPattern(const UnicodeString& pattern,
00490                                   int32_t pos);
00491 
00504     UnicodeSet& applyPattern(const UnicodeString& pattern,
00505                              UErrorCode& status);
00506 
00523     UnicodeSet& applyPattern(const UnicodeString& pattern,
00524                              uint32_t options,
00525                              const SymbolTable* symbols,
00526                              UErrorCode& status);
00527 
00559     UnicodeSet& applyPattern(const UnicodeString& pattern,
00560                              ParsePosition& pos,
00561                              uint32_t options,
00562                              const SymbolTable* symbols,
00563                              UErrorCode& status);
00564 
00578     virtual UnicodeString& toPattern(UnicodeString& result,
00579                              UBool escapeUnprintable = FALSE) const;
00580 
00603     UnicodeSet& applyIntPropertyValue(UProperty prop,
00604                                       int32_t value,
00605                                       UErrorCode& ec);
00606 
00636     UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
00637                                    const UnicodeString& value,
00638                                    UErrorCode& ec);
00639 
00648     virtual int32_t size(void) const;
00649 
00656     virtual UBool isEmpty(void) const;
00657 
00665     virtual UBool contains(UChar32 c) const;
00666 
00675     virtual UBool contains(UChar32 start, UChar32 end) const;
00676 
00684     UBool contains(const UnicodeString& s) const;
00685 
00693     virtual UBool containsAll(const UnicodeSet& c) const;
00694 
00702     UBool containsAll(const UnicodeString& s) const;
00703 
00712     UBool containsNone(UChar32 start, UChar32 end) const;
00713 
00721     UBool containsNone(const UnicodeSet& c) const;
00722 
00730     UBool containsNone(const UnicodeString& s) const;
00731 
00740     inline UBool containsSome(UChar32 start, UChar32 end) const;
00741 
00749     inline UBool containsSome(const UnicodeSet& s) const;
00750 
00758     inline UBool containsSome(const UnicodeString& s) const;
00759 
00778     int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
00779 
00797     int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
00798 
00817     int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
00818 
00836     int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
00837 
00842     virtual UMatchDegree matches(const Replaceable& text,
00843                          int32_t& offset,
00844                          int32_t limit,
00845                          UBool incremental);
00846 
00847 private:
00869     static int32_t matchRest(const Replaceable& text,
00870                              int32_t start, int32_t limit,
00871                              const UnicodeString& s);
00872 
00882     int32_t findCodePoint(UChar32 c) const;
00883 
00884 public:
00885 
00893     virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
00894 
00903     int32_t indexOf(UChar32 c) const;
00904 
00914     UChar32 charAt(int32_t index) const;
00915 
00930     virtual UnicodeSet& add(UChar32 start, UChar32 end);
00931 
00939     UnicodeSet& add(UChar32 c);
00940 
00952     UnicodeSet& add(const UnicodeString& s);
00953 
00954  private:
00960     static int32_t getSingleCP(const UnicodeString& s);
00961 
00962     void _add(const UnicodeString& s);
00963 
00964  public:
00973     UnicodeSet& addAll(const UnicodeString& s);
00974 
00983     UnicodeSet& retainAll(const UnicodeString& s);
00984 
00993     UnicodeSet& complementAll(const UnicodeString& s);
00994 
01003     UnicodeSet& removeAll(const UnicodeString& s);
01004 
01013     static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);
01014 
01015 
01023     static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);
01024 
01038     virtual UnicodeSet& retain(UChar32 start, UChar32 end);
01039 
01040 
01046     UnicodeSet& retain(UChar32 c);
01047 
01061     virtual UnicodeSet& remove(UChar32 start, UChar32 end);
01062 
01070     UnicodeSet& remove(UChar32 c);
01071 
01081     UnicodeSet& remove(const UnicodeString& s);
01082 
01090     virtual UnicodeSet& complement(void);
01091 
01106     virtual UnicodeSet& complement(UChar32 start, UChar32 end);
01107 
01115     UnicodeSet& complement(UChar32 c);
01116 
01127     UnicodeSet& complement(const UnicodeString& s);
01128 
01141     virtual UnicodeSet& addAll(const UnicodeSet& c);
01142 
01154     virtual UnicodeSet& retainAll(const UnicodeSet& c);
01155 
01167     virtual UnicodeSet& removeAll(const UnicodeSet& c);
01168 
01179     virtual UnicodeSet& complementAll(const UnicodeSet& c);
01180 
01187     virtual UnicodeSet& clear(void);
01188 
01214     UnicodeSet& closeOver(int32_t attribute);
01215 
01223     virtual int32_t getRangeCount(void) const;
01224 
01232     virtual UChar32 getRangeStart(int32_t index) const;
01233 
01241     virtual UChar32 getRangeEnd(int32_t index) const;
01242 
01291     int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
01292 
01299     virtual UnicodeSet& compact();
01300 
01312     static UClassID U_EXPORT2 getStaticClassID(void);
01313 
01322     virtual UClassID getDynamicClassID(void) const;
01323 
01324 private:
01325 
01326     // Private API for the USet API
01327 
01328     friend class USetAccess;
01329 
01330     int32_t getStringCount() const;
01331 
01332     const UnicodeString* getString(int32_t index) const;
01333 
01334     //----------------------------------------------------------------
01335     // RuleBasedTransliterator support
01336     //----------------------------------------------------------------
01337 
01338 private:
01339 
01345     virtual UBool matchesIndexValue(uint8_t v) const;
01346 
01347 private:
01348 
01349     //----------------------------------------------------------------
01350     // Implementation: Clone as thawed (see ICU4J Freezable)
01351     //----------------------------------------------------------------
01352 
01353     UnicodeSet(const UnicodeSet& o, UBool /* asThawed */);
01354 
01355     //----------------------------------------------------------------
01356     // Implementation: Pattern parsing
01357     //----------------------------------------------------------------
01358 
01359     void applyPattern(RuleCharacterIterator& chars,
01360                       const SymbolTable* symbols,
01361                       UnicodeString& rebuiltPat,
01362                       uint32_t options,
01363                       UErrorCode& ec);
01364 
01365     //----------------------------------------------------------------
01366     // Implementation: Utility methods
01367     //----------------------------------------------------------------
01368 
01369     void ensureCapacity(int32_t newLen);
01370 
01371     void ensureBufferCapacity(int32_t newLen);
01372 
01373     void swapBuffers(void);
01374 
01375     UBool allocateStrings(UErrorCode &status);
01376 
01377     UnicodeString& _toPattern(UnicodeString& result,
01378                               UBool escapeUnprintable) const;
01379 
01380     UnicodeString& _generatePattern(UnicodeString& result,
01381                                     UBool escapeUnprintable) const;
01382 
01383     static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
01384 
01385     static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
01386 
01387     //----------------------------------------------------------------
01388     // Implementation: Fundamental operators
01389     //----------------------------------------------------------------
01390 
01391     void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
01392 
01393     void add(const UChar32* other, int32_t otherLen, int8_t polarity);
01394 
01395     void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
01396 
01402     static UBool resemblesPropertyPattern(const UnicodeString& pattern,
01403                                           int32_t pos);
01404 
01405     static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
01406                                           int32_t iterOpts);
01407 
01446     UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
01447                                      ParsePosition& ppos,
01448                                      UErrorCode &ec);
01449 
01450     void applyPropertyPattern(RuleCharacterIterator& chars,
01451                               UnicodeString& rebuiltPat,
01452                               UErrorCode& ec);
01453 
01458     typedef UBool (*Filter)(UChar32 codePoint, void* context);
01459 
01469     void applyFilter(Filter filter,
01470                      void* context,
01471                      int32_t src,
01472                      UErrorCode &status);
01473 
01477     void setPattern(const UnicodeString& newPat);
01481     void releasePattern();
01482 
01483     friend class UnicodeSetIterator;
01484 };
01485 
01486 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
01487     return !operator==(o);
01488 }
01489 
01490 inline UBool UnicodeSet::isFrozen() const {
01491     return (UBool)(bmpSet!=NULL || stringSpan!=NULL);
01492 }
01493 
01494 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
01495     return !containsNone(start, end);
01496 }
01497 
01498 inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
01499     return !containsNone(s);
01500 }
01501 
01502 inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
01503     return !containsNone(s);
01504 }
01505 
01506 U_NAMESPACE_END
01507 
01508 #endif

Generated on Fri Sep 14 12:59:21 2007 for ICU 3.8 by  doxygen 1.5.1