Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members

fxunicode.h

Go to the documentation of this file.
00001 /********************************************************************************
00002 *                                                                               *
00003 *                   U N I C O D E   C h a r a c t e r   I n f o                 *
00004 *                                                                               *
00005 *********************************************************************************
00006 * Copyright (C) 2005,2009 by Jeroen van der Zijp.   All Rights Reserved.        *
00007 *********************************************************************************
00008 * This library is free software; you can redistribute it and/or modify          *
00009 * it under the terms of the GNU Lesser General Public License as published by   *
00010 * the Free Software Foundation; either version 3 of the License, or             *
00011 * (at your option) any later version.                                           *
00012 *                                                                               *
00013 * This library is distributed in the hope that it will be useful,               *
00014 * but WITHOUT ANY WARRANTY; without even the implied warranty of                *
00015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the                 *
00016 * GNU Lesser General Public License for more details.                           *
00017 *                                                                               *
00018 * You should have received a copy of the GNU Lesser General Public License      *
00019 * along with this program.  If not, see <http://www.gnu.org/licenses/>          *
00020 *********************************************************************************
00021 * $Id: fxunicode.h,v 1.8 2009/01/06 13:07:29 fox Exp $                          *
00022 ********************************************************************************/
00023 #ifndef FXUNICODE_H
00024 #define FXUNICODE_H
00025 
00026 
00027 namespace FX {
00028 
00029 
00030 /// General Category
00031 enum {
00032   CatNotAssigned          = 0,     /// Cn Other, Not Assigned (no characters in the file have this property)
00033   CatControl              = 1,     /// Cc Other, Control
00034   CatFormat               = 2,     /// Cf Other, Format
00035   CatSurrogate            = 3,     /// Cs Other, Surrogate
00036   CatOther                = 4,     /// Co Other, Private Use
00037   CatMarkNonSpacing       = 5,     /// Mn Mark, Nonspacing
00038   CatMarkSpacingCombining = 6,     /// Mc Mark, Spacing Combining
00039   CatMarkEnclosing        = 7,     /// Me Mark, Enclosing
00040   CatSeparatorSpace       = 8,     /// Zs Separator, Space
00041   CatSeparatorLine        = 9,     /// Zl Separator, Line
00042   CatSeparatorParagraph   = 10,    /// Zp Separator, Paragraph
00043   CatLetterUpper          = 11,    /// Lu Letter, Uppercase
00044   CatLetterLower          = 12,    /// Ll Letter, Lowercase
00045   CatLetterTitle          = 13,    /// Lt Letter, Titlecase
00046   CatLetterModifier       = 14,    /// Lm Letter, Modifier
00047   CatLetterOther          = 15,    /// Lo Letter, Other
00048   CatNumberLetter         = 16,    /// Nl Number, Letter
00049   CatNumberDecimal        = 17,    /// Nd Number, Decimal Digit
00050   CatNumberOther          = 18,    /// No Number, Other
00051   CatPunctConnector       = 19,    /// Pc Punctuation, Connector
00052   CatPunctDash            = 20,    /// Pd Punctuation, Dash
00053   CatPunctOpen            = 21,    /// Ps Punctuation, Open
00054   CatPunctClose           = 22,    /// Pe Punctuation, Close
00055   CatPunctInitial         = 23,    /// Pi Punctuation, Initial quote (may behave like Ps or Pe depending on usage)
00056   CatPunctFinal           = 24,    /// Pf Punctuation, Final quote (may behave like Ps or Pe depending on usage)
00057   CatPunctOther           = 25,    /// Po Punctuation, Other
00058   CatSymbolMath           = 26,    /// Sm Symbol, Math
00059   CatSymbolCurrency       = 27,    /// Sc Symbol, Currency
00060   CatSymbolModifier       = 28,    /// Sk Symbol, Modifier
00061   CatSymbolOther          = 29     /// So Symbol, Other
00062   };
00063 
00064 
00065 /// Bidi types
00066 enum {
00067   DirL   = 0,           /// Left-to-Right
00068   DirLRE = 1,           /// Left-to-Right Embedding
00069   DirLRO = 2,           /// Left-to-Right Override
00070   DirR   = 3,           /// Right-to-Left
00071   DirAL  = 4,           /// Right-to-Left Arabic
00072   DirRLE = 5,           /// Right-to-Left Embedding
00073   DirRLO = 6,           /// Right-to-Left Override
00074   DirPDF = 7,           /// Pop Directional Format
00075   DirEN  = 8,           /// European Number
00076   DirES  = 9,           /// European Number Separator
00077   DirET  = 10,          /// European Number Terminator
00078   DirAN  = 11,          /// Arabic Number
00079   DirCS  = 12,          /// Common Number Separator
00080   DirNSM = 13,          /// Non-Spacing Mark
00081   DirBN  = 14,          /// Boundary Neutral
00082   DirB   = 15,          /// Paragraph Separator
00083   DirS   = 16,          /// Segment Separator
00084   DirWS  = 17,          /// Whitespace
00085   DirON  = 18           /// Other Neutrals
00086   };
00087 
00088 
00089 /// Arabic joining
00090 enum {
00091   NonJoining   = 0,
00092   RightJoining = 1,
00093   DualJoining  = 2,
00094   JoinCausing  = 3
00095   };
00096 
00097 
00098 /// Combining class
00099 enum {
00100   CombBelowLeftAtt  = 200,     /// Below left attached
00101   CombBelowAtt      = 202,     /// Below attached
00102   CombBelowRightAtt = 204,     /// Below right attached
00103   CombLeftAtt       = 208,     /// Left attached (reordrant around single base character)
00104   CombRightAtt      = 210,     /// Right attached
00105   CombAboveLeftAtt  = 212,     /// Above left attached
00106   CombAboveAtt      = 214,     /// Above attached
00107   CombAboveRightAtt = 216,     /// Above right attached
00108   CombBelowLeft     = 218,     /// Below left
00109   CombBelow         = 220,     /// Below
00110   CombBelowRight    = 222,     /// Below right
00111   CombLeft          = 224,     /// Left (reordrant around single base character)
00112   CombRight         = 226,     /// Right
00113   CombAboveLeft     = 228,     /// Above left
00114   CombAbove         = 230,     /// Above
00115   CombAboveRight    = 232,     /// Above right
00116   CombDoubleBelow   = 233,     /// Double below
00117   CombDoubleAbove   = 234,     /// Double above
00118   CombIotaSub       = 240      /// Below (iota subscript)
00119   };
00120 
00121 
00122 /// Decompose types
00123 enum {
00124   DecNone      = 0,     /// Non-decomposable
00125   DecFont      = 1,     /// A font variant (e.g. a blackletter form)
00126   DecNoBreak   = 2,     /// A no-break version of a space or hyphen
00127   DecInitial   = 3,     /// An initial presentation form (Arabic)
00128   DecMedial    = 4,     /// A medial presentation form (Arabic)
00129   DecFinal     = 5,     /// A final presentation form (Arabic)
00130   DecIsolated  = 6,     /// An isolated presentation form (Arabic)
00131   DecCircle    = 7,     /// An encircled form
00132   DecSuper     = 8,     /// A superscript form
00133   DecSub       = 9,     /// A subscript form
00134   DecVertical  = 10,    /// A vertical layout presentation form
00135   DecWide      = 11,    /// A wide (or zenkaku) compatibility character
00136   DecNarrow    = 12,    /// A narrow (or hankaku) compatibility character
00137   DecSmall     = 13,    /// A small variant form (CNS compatibility)
00138   DecSquare    = 14,    /// A CJK squared font variant
00139   DecFraction  = 15,    /// A vulgar fraction form
00140   DecCompat    = 16,    /// Compatible
00141   DecCanonical = 17     /// Canonical (equivalent)
00142   };
00143 
00144 
00145 /// Line break types
00146 enum {
00147   BreakUnknown    = 0,          /// XX Unknown
00148 
00149   BreakMandarory  = 1,          /// BK Mandatory Break
00150   BreakReturn     = 2,          /// CR Carriage Return
00151   BreakLineFeed   = 3,          /// LF Line Feed
00152   BreakCombMark   = 4,          /// CM Attached Characters and Combining Marks
00153   BreakNextLine   = 5,          /// NL Next Line
00154   BreakSurrogate  = 6,          /// SG Surrogates
00155   BreakWordJoiner = 7,          /// WJ Word Joiner
00156   BreakZWSpace    = 8,          /// ZW Zero Width Space
00157   BreakGlue       = 9,          /// GL Non-breaking Glue
00158   BreakContingent = 10,         /// CB Contingent Break Opportunity
00159   BreakSpace      = 11,         /// SP Space
00160 
00161   BreakBoth       = 12,         /// B2 Break Opportunity Before and After
00162   BreakAfter      = 13,         /// BA Break Opportunity After
00163   BreakBefore     = 14,         /// BB Break Opportunity Before
00164   BreakHyphen     = 15,         /// HY Hyphen
00165 
00166   BreakOpen       = 16,         /// OP Opening Punctuation
00167   BreakClose      = 17,         /// CL Closing Punctuation
00168   BreakQuote      = 18,         /// QU Ambiguous Quotation
00169   BreakExclaim    = 19,         /// EX Exclamation/Interrogation
00170   BreakInsep      = 20,         /// IN Inseparable
00171   BreakNonStart   = 21,         /// NS Non Starter
00172 
00173   BreakInfix      = 22,         /// IS Infix Separator (Numeric)
00174   BreakNumeric    = 23,         /// NU Numeric
00175   BreakPostfix    = 24,         /// PO Postfix (Numeric)
00176   BreakPrefix     = 25,         /// PR Prefix (Numeric)
00177   BreakSymbol     = 26,         /// SY Symbols Allowing Breaks
00178 
00179   BreakOrdinary   = 27,         /// AL Ordinary Alphabetic and Symbol Characters
00180   BreakIdeograph  = 28,         /// ID Ideographic
00181   BreakComplex    = 29          /// SA Complex Context (South East Asian)
00182   };
00183 
00184 
00185 /// Scripts
00186 enum {
00187   ScriptCommon             = 0,       /// Zyyy
00188   ScriptInherited          = 1,       /// Qaai
00189 
00190   ScriptLatin              = 2,       /// Latn  European scripts
00191   ScriptGreek              = 3,       /// Grek
00192   ScriptCyrillic           = 4,       /// Cyrl (Cyrs)
00193   ScriptArmenian           = 5,       /// Armn
00194   ScriptGeorgian           = 6,       /// Geor (Geon, Geoa)
00195   ScriptRunic              = 7,       /// Runr
00196   ScriptOgham              = 8,       /// Ogam
00197 
00198   ScriptHebrew             = 9,       /// Hebr  Middle eastern
00199   ScriptArabic             = 10,      /// Arab
00200   ScriptSyriac             = 11,      /// Syrc (Syrj, Syrn, Syre)
00201   ScriptThaana             = 12,      /// Thaa
00202 
00203   ScriptDevanagari         = 13,      /// Deva  Indic
00204   ScriptBengali            = 14,      /// Beng
00205   ScriptGurmukhi           = 15,      /// Guru
00206   ScriptGujarati           = 16,      /// Gujr
00207   ScriptOriya              = 17,      /// Orya
00208   ScriptTamil              = 18,      /// Taml
00209   ScriptTelugu             = 19,      /// Telu
00210   ScriptKannada            = 20,      /// Knda
00211   ScriptMalayalam          = 21,      /// Mlym
00212   ScriptSinhala            = 22,      /// Sinh
00213   ScriptThai               = 23,      /// Thai
00214   ScriptLao                = 24,      /// Laoo
00215   ScriptTibetan            = 25,      /// Tibt
00216   ScriptMyanmar            = 26,      /// Mymr
00217   ScriptKhmer              = 27,      /// Khmr
00218 
00219   ScriptHan                = 28,      /// Hani  Asian
00220   ScriptHiragana           = 29,      /// Hira
00221   ScriptKatakana           = 30,      /// Kana
00222   ScriptHangul             = 31,      /// Hang
00223   ScriptBopomofo           = 32,      /// Bopo
00224   ScriptYi                 = 33,      /// Yiii
00225 
00226   ScriptEthiopic           = 34,      /// Ethi  Misc
00227   ScriptCherokee           = 35,      /// Cher
00228   ScriptCanadianAboriginal = 36,      /// Cans
00229   ScriptMongolian          = 37,      /// Mong
00230   ScriptGothic             = 38,      /// Goth
00231 
00232   ScriptTagalog            = 39,      /// Tglg
00233   ScriptHanunoo            = 40,      /// Hano
00234   ScriptBuhid              = 41,      /// Buhd
00235   ScriptTagbanwa           = 42,      /// Tagb
00236   ScriptLimbu              = 43,      /// Limb
00237   ScriptTaiLe              = 44,      /// Tale
00238   ScriptUgaritic           = 45,      /// Ugar
00239   ScriptOsmanya            = 46,      /// Osma
00240   ScriptCypriot            = 47,      /// Cprt
00241   ScriptShavian            = 48,      /// Shaw
00242   ScriptDeseret            = 49,      /// Dsrt
00243   ScriptKatakanaHiragana   = 50       /// Hrkt
00244   };
00245 
00246 
00247 /// Unicode versions of common character functions
00248 namespace Unicode {
00249 
00250 /// Character wide character category
00251 extern FXAPI FXuint charCategory(FXwchar ucs);
00252 
00253 /// Get character wide character direction
00254 extern FXAPI FXuint charDirection(FXwchar ucs);
00255 
00256 /// Get wide character decompose type
00257 extern FXAPI FXuint decomposeType(FXwchar ucs);
00258 
00259 /// Return number of wide characters in decomposition
00260 extern FXAPI FXuint charNumDecompose(FXwchar ucs);
00261 
00262 /// Return wide character decomposition
00263 extern FXAPI const FXwchar* charDecompose(FXwchar ucs);
00264 
00265 /// Return wide character composition from ucsa and ucsb
00266 extern FXAPI FXwchar charCompose(FXwchar ucsa,FXwchar ucsb);
00267 
00268 /// Get wide character joining
00269 extern FXAPI FXuint joiningType(FXwchar ucs);
00270 
00271 /// Get wide character symmetry
00272 extern FXAPI FXuint isSymmetric(FXwchar ucs);
00273 
00274 /// Get wide character combining type; zero means starter
00275 extern FXAPI FXuint charCombining(FXwchar ucs);
00276 
00277 /// Get numeric value of wide character (this includes hex value)
00278 extern FXAPI FXint digitValue(FXwchar ucs);
00279 
00280 /// Get linebreak type of wide character
00281 extern FXAPI FXuint lineBreakType(FXwchar ucs);
00282 
00283 
00284 /// Get mirror image of wide character or character itself
00285 extern FXAPI FXwchar mirrorImage(FXwchar ucs);
00286 
00287 /// Script type of wide character
00288 extern FXAPI FXuint scriptType(FXwchar ucs);
00289 
00290 
00291 /// Unicode flavor of common functions
00292 extern FXAPI FXbool hasCase(FXwchar ucs);
00293 extern FXAPI FXbool isUpper(FXwchar ucs);
00294 extern FXAPI FXbool isLower(FXwchar ucs);
00295 extern FXAPI FXbool isTitle(FXwchar ucs);
00296 extern FXAPI FXbool isAscii(FXwchar ucs);
00297 extern FXAPI FXbool isLetter(FXwchar ucs);
00298 extern FXAPI FXbool isDigit(FXwchar ucs);
00299 extern FXAPI FXbool isAlphaNumeric(FXwchar ucs);
00300 extern FXAPI FXbool isControl(FXwchar ucs);
00301 extern FXAPI FXbool isSpace(FXwchar ucs);
00302 extern FXAPI FXbool isBlank(FXwchar ucs);
00303 extern FXAPI FXbool isPunct(FXwchar ucs);
00304 extern FXAPI FXbool isGraph(FXwchar ucs);
00305 extern FXAPI FXbool isPrint(FXwchar ucs);
00306 extern FXAPI FXbool isHexDigit(FXwchar ucs);
00307 extern FXAPI FXbool isSymbol(FXwchar ucs);
00308 extern FXAPI FXbool isMark(FXwchar ucs);
00309 extern FXAPI FXbool isSep(FXwchar ucs);
00310 
00311 /// Case conversion
00312 extern FXAPI FXwchar toUpper(FXwchar ucs);
00313 extern FXAPI FXwchar toLower(FXwchar ucs);
00314 extern FXAPI FXwchar toTitle(FXwchar ucs);
00315 
00316 }
00317 
00318 
00319 }
00320 
00321 #endif

Copyright © 1997-2009 Jeroen van der Zijp