*** misc/icu/source/common/brkiter.cpp Thu Aug 8 10:39:14 2002 --- misc/build/icu/source/common/brkiter.cpp Tue May 27 15:21:32 2003 *************** *** 52,64 **** // Creates a break iterator for word breaks. BreakIterator* ! BreakIterator::makeWordInstance(const Locale& key, UErrorCode& status) { // WARNING: This routine is currently written specifically to handle only the // default rules files and the alternate rules files for Thai. This function // will have to be made fully general at some time in the future! BreakIterator* result = NULL; - const char* filename = "word"; if (U_FAILURE(status)) return NULL; --- 52,63 ---- // Creates a break iterator for word breaks. BreakIterator* ! BreakIterator::makeWordInstance(const Locale& key, const char* filename, UErrorCode& status) { // WARNING: This routine is currently written specifically to handle only the // default rules files and the alternate rules files for Thai. This function // will have to be made fully general at some time in the future! BreakIterator* result = NULL; if (U_FAILURE(status)) return NULL; *************** *** 90,95 **** --- 89,100 ---- } return result; + } + + BreakIterator* + BreakIterator::createWordInstance(const Locale& key, UErrorCode& status) + { + return createWordInstance(key, "word", status); } // ------------------------------------- *** misc/icu/source/common/unicode/brkiter.h Thu Aug 8 10:39:14 2002 --- misc/build/icu/source/common/unicode/brkiter.h Tue May 27 15:21:32 2003 *************** *** 369,374 **** --- 369,376 ---- */ static BreakIterator* createWordInstance(const Locale& where, UErrorCode& status); + static BreakIterator* createWordInstance(const Locale& where, const char* filename, + UErrorCode& status); /** * Create BreakIterator for line-breaks using specified locale. *** misc/icu/source/config/mh-linux Fri Jan 25 04:35:04 2002 --- misc/build/icu/source/config/mh-linux Tue May 27 15:21:32 2003 *************** *** 22,27 **** --- 22,34 ---- LD_RPATH= LD_RPATH_PRE = -Wl,-rpath, + ## Force RPATH=$ORIGIN to locate own dependencies w/o need for LD_LIBRARY_PATH + ENABLE_RPATH=YES + RPATHLDFLAGS=${LD_RPATH_PRE}'$$ORIGIN' + + #SH# ENABLE_RPATH=YES + #SH# RPATHLDFLAGS="${LD_RPATH_PRE}'$$ORIGIN'" + ## Compiler switch to embed a library name LD_SONAME = -Wl,-soname -Wl,$(MIDDLE_SO_TARGET) *** misc/icu/source/config/mh-solaris Tue Jul 30 12:46:10 2002 --- misc/build/icu/source/config/mh-solaris Tue May 27 15:21:32 2003 *************** *** 23,38 **** ## Commands to link ## For Sun Workshop, use CC to link to bring in C++ runtime ! LINK.c= $(CXX) $(CXXFLAGS) $(LDFLAGS) ! LINK.cc= $(CXX) $(CXXFLAGS) $(LDFLAGS) ## Commands to make a shared library SHLIB.c= $(CC) $(CFLAGS) $(LDFLAGS) -G ! SHLIB.cc= $(CXX) $(CXXFLAGS) $(LDFLAGS) -G ## Compiler switch to embed a runtime search path LD_RPATH= -R LD_RPATH_PRE= ## Compiler switch to embed a library name LD_SONAME = -h $(MIDDLE_SO_TARGET) --- 23,46 ---- ## Commands to link ## For Sun Workshop, use CC to link to bring in C++ runtime ! ## For Sun Workshop, -norunpath stops compiler to record a useless RPATH ! LINK.c= $(CXX) $(CXXFLAGS) $(LDFLAGS) -norunpath ! LINK.cc= $(CXX) $(CXXFLAGS) $(LDFLAGS) -norunpath ## Commands to make a shared library SHLIB.c= $(CC) $(CFLAGS) $(LDFLAGS) -G ! SHLIB.cc= $(CXX) $(CXXFLAGS) $(LDFLAGS) -G -norunpath ## Compiler switch to embed a runtime search path LD_RPATH= -R LD_RPATH_PRE= + + ## Force RPATH=$ORIGIN to locate own dependencies w/o need for LD_LIBRARY_PATH + ENABLE_RPATH=YES + RPATHLDFLAGS=${LD_RPATH}'$$ORIGIN' + + #SH# ENABLE_RPATH=YES + #SH# RPATHLDFLAGS="${LD_RPATH}'$$ORIGIN'" ## Compiler switch to embed a library name LD_SONAME = -h $(MIDDLE_SO_TARGET) *** misc/icu/source/data/Makefile.in Fri Aug 16 03:08:02 2002 --- misc/build/icu/source/data/Makefile.in Tue May 27 15:21:32 2003 *************** *** 141,147 **** ## BRK files # ALL of these files can be deleted (the following BRK files) - they are copied ! BRK_FILES_SHORT=char.brk line.brk line_th.brk sent.brk word.brk title.brk word_th.brk BRK_FILES=$(BRK_FILES_SHORT:%=$(BUILDDIR)/$(ICUDT)%) # don't include thaidict.brk - it goes into a resource bundle - plus it isn't deleted --- 141,147 ---- ## BRK files # ALL of these files can be deleted (the following BRK files) - they are copied ! BRK_FILES_SHORT=char.brk line.brk line_th.brk sent.brk word.brk edit_word.brk dict_word.brk count_word.brk title.brk word_th.brk BRK_FILES=$(BRK_FILES_SHORT:%=$(BUILDDIR)/$(ICUDT)%) # don't include thaidict.brk - it goes into a resource bundle - plus it isn't deleted *** misc/icu/source/data/brkitr/count_word.txt Tue May 27 15:41:24 2003 --- misc/build/icu/source/data/brkitr/count_word.txt Tue May 27 15:21:32 2003 *************** *** 1 **** ! dummy --- 1,127 ---- ! # ! # Copyright (C) 2002, International Business Machines Corporation and others. ! # All Rights Reserved. ! # ! # file: word.txt ! # ! # ICU Word Break Rules ! # See Unicode Technical Report #29. ! # These rules are based on the proposed draft dated 2002-08-06 ! # ! ! ! ! #################################################################################### ! # ! # Definitions imported from Line Break Rules. ! # ! #################################################################################### ! $Numeric = [ \u0030-\u0039 \u0660-\u0669 \u06F0-\u06F9 \u0966-\u096F \u09E6-\u09EF ! \u0A66-\u0A6F \u0AE6-\u0AEF \u0B66-\u0B6F \u0BE7-\u0BEF \u0C66-\u0C6F ! \u0CE6-\u0CEF \u0D66-\u0D6F \u0E50-\u0E59 \u0ED0-\u0ED9 \u0F20-\u0F29 ! \u1040-\u1049 \u1369-\u1371 \u17E0-\u17E9 \u1810-\u1819 \U0001D7CE-\U0001D7FF]; ! ! ! ! #################################################################################### ! # ! # Definitions imported from Character Break Rules. ! # ! #################################################################################### ! # ! # Character Class Definitions. ! # The names are those from TR29. ! # ! $Control = [[:Zl:] [:Zp:] [:Cc:] [:Cf:]]; ! ! # Note on $Extend: Earlier versions of TR29 included Mc characters. ! # To avoid test breakage, Mc is still included for the time being. ! # $Extend = [[:Mn:] [:Me:] \uff9e-\uff9f]; # FF9E..FF9F ; Other_Grapheme_Extend ! $Extend = [[:Mn:] [:Me:] [:Mc:] \uff9e-\uff9f]; # FF9E..FF9F ; Other_Grapheme_Extend ! ! ! ! ! #################################################################################### ! # ! # Word Break Rules. Definitions and Rules specific to word break begin Here. ! # ! #################################################################################### ! ! $Katakana = [[:Kana:] \u30fc \uff70 \uff9e-\uff9f]; ! $Hiragana = [[:Hira:]]; ! $Letter = [[[:Alphabetic:] \u02b9-\u02ba \u02c2-\u02cf \u02d2-\u02df \u02e5-\u02ed \u05f3] - ! [[:THAI:] [:LAO:] $Hiragana $Katakana ]]; ! $Format = [[:Cf:]]; ! ! $MidLetter = [\u0027 \u00ad \u05f4 \u2019]; ! ! $MidNumLet = [\u002e]; ! ! ! # From Line Break, IS - Numeric Separator (Infix) ! # $IS = [\u002c \u002e \u003a \u003b \u0589]; ! $MidNum = [\u002c \u003b \u0589]; ! ! # ! # "Extended" definitions. Classes of characters including trailing combining chars and, ! # for types of chars that can appear in the interior of a word only, ! # trailing format characters. ! # ! $LetterEx = $Letter $Extend*; ! $NumericEx = $Numeric $Extend*; ! $MidNumExF = $MidNum $Extend* $Format*; ! $MidNumLetExF = $MidNumLet $Extend* $Format*; ! $MidLetterExF = $MidLetter $Extend* $Format*; ! ! $word_pad=[[:P:][:S:][:Z:][:C:]]; ! ! # ! # Numbers. Rules 6, 9, 10 form the TR. ! # ! $NumberSequence = $NumericEx ($Format* ($MidNumExF | $MidNumLetExF)? $NumericEx)*; ! $NumberSequence $word_pad* {100}; ! ! # ! # Words. Alpha-numerics. Rule 3 - 10 ! # - must include at least one letter. ! # - may include both letters and numbers. ! # - may include MideLetter, MidNumber punctuation. ! # ! $LetterSequence = $LetterEx ($Format* ($MidLetterExF | $MidNumLetExF)? $LetterEx)*; ! $NumberSequence? $LetterSequence ($NumberSequence | $LetterSequence)* $word_pad* {200}; ! ! # ! # Hiragana and Katakana ! # ! $Hiragana $Extend* {300}; ! $Katakana $Extend* ($Format* $Katakana $Extend*)* {300}; ! ! # ! # Ideographic Characters. Stand by themselves as words. ! # ! # [:IDEOGRAPHIC:] $Extend* $word_pad* {400}; ! ! # ! # Everything Else, with no tag. ! # Non-Control chars combine with $Extend (combining) chars. ! # Controls are returned by themselves. ! # ! [^$Control] $Extend* $word_pad*; ! \r\n; ! .; ! ! # ! # Reverse Rules. Back up over any of the chars that can group together. ! # (Reverse rules do not need to be exact; they can back up too far, ! # but must back up at least enough, and must stop on a boundary.) ! # ! ! # NonStarters are the set of all characters that can appear at the 2nd - nth position of ! # a word. (They may also be the first.) The reverse rule skips over these, until it ! # reaches something that can only be the start (and probably only) char in a "word". ! # A space or punctuation meets the test. ! # ! $NonStarters = [$Numeric $Letter $Katakana $MidLetter $MidNum $MidNumLet $Extend $Format \u000a]; ! ! ! $NonStarters* .; *** misc/icu/source/data/brkitr/dict_word.txt Tue May 27 15:41:24 2003 --- misc/build/icu/source/data/brkitr/dict_word.txt Tue May 27 15:21:32 2003 *************** *** 1 **** ! dummy --- 1,126 ---- ! # ! # Copyright (C) 2002, International Business Machines Corporation and others. ! # All Rights Reserved. ! # ! # file: word.txt ! # ! # ICU Word Break Rules ! # See Unicode Technical Report #29. ! # These rules are based on the proposed draft dated 2002-08-06 ! # ! ! ! ! #################################################################################### ! # ! # Definitions imported from Line Break Rules. ! # ! #################################################################################### ! $Numeric = [ \u0030-\u0039 \u0660-\u0669 \u06F0-\u06F9 \u0966-\u096F \u09E6-\u09EF ! \u0A66-\u0A6F \u0AE6-\u0AEF \u0B66-\u0B6F \u0BE7-\u0BEF \u0C66-\u0C6F ! \u0CE6-\u0CEF \u0D66-\u0D6F \u0E50-\u0E59 \u0ED0-\u0ED9 \u0F20-\u0F29 ! \u1040-\u1049 \u1369-\u1371 \u17E0-\u17E9 \u1810-\u1819 \U0001D7CE-\U0001D7FF]; ! ! ! ! #################################################################################### ! # ! # Definitions imported from Character Break Rules. ! # ! #################################################################################### ! # ! # Character Class Definitions. ! # The names are those from TR29. ! # ! $Control = [[:Zl:] [:Zp:] [:Cc:] [:Cf:]]; ! ! # Note on $Extend: Earlier versions of TR29 included Mc characters. ! # To avoid test breakage, Mc is still included for the time being. ! # $Extend = [[:Mn:] [:Me:] \uff9e-\uff9f]; # FF9E..FF9F ; Other_Grapheme_Extend ! $Extend = [[:Mn:] [:Me:] [:Mc:] \uff9e-\uff9f]; # FF9E..FF9F ; Other_Grapheme_Extend ! ! ! ! ! #################################################################################### ! # ! # Word Break Rules. Definitions and Rules specific to word break begin Here. ! # ! #################################################################################### ! ! $Katakana = [[:Kana:] \u30fc \uff70 \uff9e-\uff9f]; ! $Hiragana = [[:Hira:]]; ! $Letter = [[[:Alphabetic:] \u0002 \u002e \u0040 \u02b9-\u02ba \u02c2-\u02cf \u02d2-\u02df \u02e5-\u02ed \u05f3] - ! [[:THAI:] [:LAO:] $Hiragana $Katakana ]]; ! $Format = [[:Cf:]]; ! ! $MidLetter = [\u0027 \u0060 \u0084 \u00ad \u0384 \u05f4 \u2016 \u2018 \u2019 \u2032 ]; ! ! ! # From Line Break, IS - Numeric Separator (Infix) ! # $IS = [\u002c \u002e \u003a \u003b \u0589]; ! $MidNum = [\u002c \u002e \u003b \u0040 \u0084 \u0384 \u0589 \u066b \u2018 \u2019 \u201b \u2032]; ! ! # ! # "Extended" definitions. Classes of characters including trailing combining chars and, ! # for types of chars that can appear in the interior of a word only, ! # trailing format characters. ! # ! $LetterEx = $Letter $Extend*; ! $NumericEx = $Numeric $Extend*; ! $MidNumExF = $MidNum $Extend* $Format*; ! $MidLetterExF = $MidLetter $Extend* $Format*; ! ! ! # ! # Numbers. Rules 6, 9, 10 form the TR. ! # ! $NumberSequence = $NumericEx ($Format* $MidNumExF? $NumericEx)*; ! $NumberSequence {100}; ! ! # ! # Words. Alpha-numerics. Rule 3 - 10 ! # - must include at least one letter. ! # - may include both letters and numbers. ! # - may include MideLetter, MidNumber punctuation. ! # ! $LetterSequence = $LetterEx ($Format* $MidLetterExF? $LetterEx)*; ! $NumberSequence? $LetterSequence ($NumberSequence | $LetterSequence)* {200}; ! ! # puctuations by themselves ! [:P:]*; ! ! # ! # Hiragana and Katakana ! # ! $Hiragana $Extend* {300}; ! $Katakana $Extend* ($Format* $Katakana $Extend*)* {300}; ! ! # ! # Ideographic Characters. Stand by themselves as words. ! # ! # [:IDEOGRAPHIC:] $Extend* {400}; ! ! # ! # Everything Else, with no tag. ! # Non-Control chars combine with $Extend (combining) chars. ! # Controls are returned by themselves. ! # ! [^$Control] $Extend*; ! \r\n; ! .; ! ! # ! # Reverse Rules. Back up over any of the chars that can group together. ! # (Reverse rules do not need to be exact; they can back up too far, ! # but must back up at least enough, and must stop on a boundary.) ! # ! ! # NonStarters are the set of all characters that can appear at the 2nd - nth position of ! # a word. (They may also be the first.) The reverse rule skips over these, until it ! # reaches something that can only be the start (and probably only) char in a "word". ! # A space or punctuation meets the test. ! # ! $NonStarters = [$Numeric $Letter $Katakana $MidLetter $MidNum $Extend $Format \u000a]; ! ! ! $NonStarters* .; *** misc/icu/source/data/brkitr/edit_word.txt Tue May 27 15:41:24 2003 --- misc/build/icu/source/data/brkitr/edit_word.txt Tue May 27 15:21:32 2003 *************** *** 1 **** ! dummy --- 1,130 ---- ! # ! # Copyright (C) 2002, International Business Machines Corporation and others. ! # All Rights Reserved. ! # ! # file: word.txt ! # ! # ICU Word Break Rules ! # See Unicode Technical Report #29. ! # These rules are based on the proposed draft dated 2002-08-06 ! # ! ! ! ! #################################################################################### ! # ! # Definitions imported from Line Break Rules. ! # ! #################################################################################### ! $Numeric = [ \u0030-\u0039 \u0660-\u0669 \u06F0-\u06F9 \u0966-\u096F \u09E6-\u09EF ! \u0A66-\u0A6F \u0AE6-\u0AEF \u0B66-\u0B6F \u0BE7-\u0BEF \u0C66-\u0C6F ! \u0CE6-\u0CEF \u0D66-\u0D6F \u0E50-\u0E59 \u0ED0-\u0ED9 \u0F20-\u0F29 ! \u1040-\u1049 \u1369-\u1371 \u17E0-\u17E9 \u1810-\u1819 \U0001D7CE-\U0001D7FF]; ! ! ! ! #################################################################################### ! # ! # Definitions imported from Character Break Rules. ! # ! #################################################################################### ! # ! # Character Class Definitions. ! # The names are those from TR29. ! # ! $Control = [[:Zl:] [:Zp:] [:Cc:] [:Cf:]]; ! ! # Note on $Extend: Earlier versions of TR29 included Mc characters. ! # To avoid test breakage, Mc is still included for the time being. ! # $Extend = [[:Mn:] [:Me:] \uff9e-\uff9f]; # FF9E..FF9F ; Other_Grapheme_Extend ! $Extend = [[:Mn:] [:Me:] [:Mc:] \uff9e-\uff9f]; # FF9E..FF9F ; Other_Grapheme_Extend ! ! ! ! ! #################################################################################### ! # ! # Word Break Rules. Definitions and Rules specific to word break begin Here. ! # ! #################################################################################### ! ! $Katakana = [[:Kana:] \u30fc \uff70 \uff9e-\uff9f]; ! $Hiragana = [[:Hira:]]; ! $Letter = [[[:Alphabetic:] \u0002 \u00a0 \u02b9-\u02ba \u02c2-\u02cf \u02d2-\u02df \u02e5-\u02ed \u05f3] - ! [[:THAI:] [:LAO:] $Hiragana $Katakana ]]; ! $Format = [[:Cf:]]; ! ! $MidLetter = [\u0027 \u00ad \u05f4 \u2019]; ! ! $MidNumLet = [\u002e]; ! ! ! # From Line Break, IS - Numeric Separator (Infix) ! # $IS = [\u002c \u002e \u003a \u003b \u0589]; ! $MidNum = [\u002c \u003b \u0589]; ! ! # ! # "Extended" definitions. Classes of characters including trailing combining chars and, ! # for types of chars that can appear in the interior of a word only, ! # trailing format characters. ! # ! $LetterEx = $Letter $Extend*; ! $NumericEx = $Numeric $Extend*; ! $MidNumExF = $MidNum $Extend* $Format*; ! $MidNumLetExF = $MidNumLet $Extend* $Format*; ! $MidLetterExF = $MidLetter $Extend* $Format*; ! ! ! # ! # Numbers. Rules 6, 9, 10 form the TR. ! # ! $NumberSequence = $NumericEx ($Format* ($MidNumExF | $MidNumLetExF)? $NumericEx)*; ! $NumberSequence {100}; ! ! # ! # Words. Alpha-numerics. Rule 3 - 10 ! # - must include at least one letter. ! # - may include both letters and numbers. ! # - may include MideLetter, MidNumber punctuation. ! # ! $LetterSequence = $LetterEx ($Format* ($MidLetterExF | $MidNumLetExF)? $LetterEx)*; ! $NumberSequence? $LetterSequence ($NumberSequence | $LetterSequence)* {200}; ! ! # Punctuations by themselves ! [[:P:]-[\u002E]]*; ! [\u002E]*; ! ! # ! # Hiragana and Katakana ! # ! $Hiragana $Extend* {300}; ! $Katakana $Extend* ($Format* $Katakana $Extend*)* {300}; ! ! # ! # Ideographic Characters. Stand by themselves as words. ! # ! # [:IDEOGRAPHIC:] $Extend* {400}; ! ! # ! # Everything Else, with no tag. ! # Non-Control chars combine with $Extend (combining) chars. ! # Controls are returned by themselves. ! # ! [^$Control] $Extend*; ! \r\n; ! .; ! ! # ! # Reverse Rules. Back up over any of the chars that can group together. ! # (Reverse rules do not need to be exact; they can back up too far, ! # but must back up at least enough, and must stop on a boundary.) ! # ! ! # NonStarters are the set of all characters that can appear at the 2nd - nth position of ! # a word. (They may also be the first.) The reverse rule skips over these, until it ! # reaches something that can only be the start (and probably only) char in a "word". ! # A space or punctuation meets the test. ! # ! $NonStarters = [$Numeric $Letter $Katakana $MidLetter $MidNum $MidNumLet $Extend $Format \u000a]; ! ! ! $NonStarters* .; *** misc/icu/source/data/brkitr/line.txt Tue Jul 23 08:02:06 2002 --- misc/build/icu/source/data/brkitr/line.txt Tue May 27 15:21:32 2003 *************** *** 102,108 **** # $NumberInterior = $IDcm | ($NUcm | $ALcm | $IS $NUcm)+; $Number = $PR? ($OPcm | $HYcm)? $NumberInterior $CL? $POcm?; # Fancy Number 18 ! $Word = (($IDcm | ($ALcm | $NUcm)+) ($POcm? | $INcm?)) ; # Alpha-numeric. 16, 17 $Dashes = (($B2cm $SP*)*); # Dashes 11a --- 102,108 ---- # $NumberInterior = $IDcm | ($NUcm | $ALcm | $IS $NUcm)+; $Number = $PR? ($OPcm | $HYcm)? $NumberInterior $CL? $POcm?; # Fancy Number 18 ! $Word = (($IDcm | ($ALcm | $NUcm | $EX | $IS | $SY | $PR)+) ($POcm? | $INcm?)) ; # Alpha-numeric. 16, 17 $Dashes = (($B2cm $SP*)*); # Dashes 11a *** misc/icu/source/data/makedata.mak Sat Aug 10 06:55:36 2002 --- misc/build/icu/source/data/makedata.mak Tue May 27 15:21:32 2003 *************** *** 238,244 **** # # Break iterator data files. # ! BRK_FILES = $(ICUDT)sent.brk $(ICUDT)char.brk $(ICUDT)line.brk $(ICUDT)word.brk $(ICUDT)title.brk $(ICUDT)line_th.brk $(ICUDT)word_th.brk #invoke pkgdata for ICU common data # pkgdata will drop all output files (.dat, .dll, .lib) into the target (ICUBLD) directory. --- 238,244 ---- # # Break iterator data files. # ! BRK_FILES = "$(ICUDT)sent.brk" "$(ICUDT)char.brk" "$(ICUDT)line.brk" "$(ICUDT)word.brk" "$(ICUDT)edit_word.brk" "$(ICUDT)dict_word.brk" "$(ICUDT)count_word.brk" "$(ICUDT)title.brk" "$(ICUDT)line_th.brk" "$(ICUDT)word_th.brk" #invoke pkgdata for ICU common data # pkgdata will drop all output files (.dat, .dll, .lib) into the target (ICUBLD) directory. *************** *** 283,288 **** --- 283,297 ---- "$(ICUBLD)\$(ICUDT)word.brk" : "$(ICUBRK)\word.txt" $(BRKDEPS) genbrk -r "$(ICUBRK)\word.txt" -o $@ -d"$(ICUBLD)" -i "$(ICUBLD)\\" + + "$(ICUBLD)\$(ICUDT)edit_word.brk" : "$(ICUBRK)\edit_word.txt" $(BRKDEPS) + genbrk -r "$(ICUBRK)\edit_word.txt" -o $@ -d"$(ICUBLD)" -i "$(ICUBLD)\\" + + "$(ICUBLD)\$(ICUDT)dict_word.brk" : "$(ICUBRK)\dict_word.txt" $(BRKDEPS) + genbrk -r "$(ICUBRK)\dict_word.txt" -o $@ -d"$(ICUBLD)" -i "$(ICUBLD)\\" + + "$(ICUBLD)\$(ICUDT)count_word.brk" : "$(ICUBRK)\count_word.txt" $(BRKDEPS) + genbrk -r "$(ICUBRK)\count_word.txt" -o $@ -d"$(ICUBLD)" -i "$(ICUBLD)\\" "$(ICUBLD)\$(ICUDT)line.brk" : "$(ICUBRK)\line.txt" $(BRKDEPS) genbrk -r "$(ICUBRK)\line.txt" -o $@ -d"$(ICUBLD)" -i "$(ICUBLD)\\" *** misc/icu/source/icudefs.mk.in Thu Aug 1 06:28:32 2002 --- misc/build/icu/source/icudefs.mk.in Tue May 27 15:37:19 2003 *************** *** 136,142 **** --- 136,146 ---- SHLIB.cc= $(CXX) $(CXXFLAGS) $(LDFLAGS) -shared # Environment variable to set a runtime search path + ifeq ($(OS), IRIX) + LDLIBRARYPATH_ENVVAR = LD_LIBRARYN32_PATH + else LDLIBRARYPATH_ENVVAR = LD_LIBRARY_PATH + endif # Versioned target for a shared library. FINAL_SO_TARGET = $(SO_TARGET).$(SO_TARGET_VERSION) *** misc/icu/source/layout/ArabicLayoutEngine.cpp Wed Jul 17 05:29:18 2002 --- misc/build/icu/source/layout/ArabicLayoutEngine.cpp Tue May 27 15:21:32 2003 *************** *** 128,134 **** --- 128,138 ---- GDEFMarkFilter filter(fGDEFTable); adjustMarkGlyphs(glyphs, glyphCount, false, &filter, positions, success); + #if 1 // HDU: #105697# TODO: improve handling of deleted/mark glyphs + } else if( count == glyphCount ) { + #else } else { + #endif GlyphDefinitionTableHeader *gdefTable = (GlyphDefinitionTableHeader *) ArabicShaping::glyphDefinitionTable; GDEFMarkFilter filter(gdefTable); *************** *** 206,211 **** --- 199,219 ---- charIndices = tempCharIndices; + #if 1 // HDU TODO: improve handling of deleted/mark glyphs + // remove deleted glyphs (0xFFFF) and deleted marks (0xFFFE) + // NOTE: we are removing them inplace + int nGlyphCount = 0; + for( int i = 0; i < tempGlyphCount; ++i ) + if( tempGlyphs[i] < 0xFFFE ) + { + tempGlyphs[ nGlyphCount ] = tempGlyphs[ i ]; + charIndices[ nGlyphCount ] = charIndices[ i ]; + ++nGlyphCount; + } + + tempGlyphCount = nGlyphCount; + #endif + ArabicOpenTypeLayoutEngine::mapCharsToGlyphs(tempChars, 0, tempGlyphCount, false, true, glyphs, charIndices, success); LE_DELETE_ARRAY(tempChars); *************** *** 277,282 **** --- 281,291 ---- success = LE_ILLEGAL_ARGUMENT_ERROR; return; } + + #if 1 // HDU: #105697# TODO: improve handling of deleted/mark glyphs + if( count != glyphCount ) + return; + #endif GDEFMarkFilter filter(fGDEFTable); *** misc/icu/source/layout/GXLayoutEngine.cpp Sat Jun 29 09:58:44 2002 --- misc/build/icu/source/layout/GXLayoutEngine.cpp Tue May 27 15:21:32 2003 *************** *** 39,45 **** return 0; } ! mapCharsToGlyphs(chars, offset, count, false, rightToLeft, glyphs, charIndices, success); if (LE_FAILURE(success)) { return 0; --- 39,45 ---- return 0; } ! mapCharsToGlyphs(chars, offset, count, rightToLeft, rightToLeft, glyphs, charIndices, success); if (LE_FAILURE(success)) { return 0; *** misc/icu/source/layout/LESwaps.h Wed Aug 14 10:17:50 2002 --- misc/build/icu/source/layout/LESwaps.h Tue May 27 15:21:32 2003 *************** *** 3,8 **** --- 3,9 ---- * @(#)LESwaps.h 1.3 00/03/15 * * (C) Copyright IBM Corp. 1998-2003 - All Rights Reserved + * with additions by Sun Microsystems 2002 * */ *************** *** 14,35 **** U_NAMESPACE_BEGIN ! /** ! * A convenience macro which invokes the swapWord member function ! * from a concise call. ! * ! * @draft ICU 2.2 ! */ ! #define SWAPW(value) (LESwaps::isBigEndian() ? (value) : LESwaps::swapWord(value)) ! /** ! * A convenience macro which invokes the swapLong member function ! * from a concise call. ! * ! * @draft ICU 2.2 ! */ ! #define SWAPL(value) (LESwaps::isBigEndian() ? (value) : LESwaps::swapLong(value)) /** * This class is used to access data which stored in big endian order --- 14,50 ---- U_NAMESPACE_BEGIN ! // the ALLOW_UNALIGNED hack prevents crashes with font files ! // containing unaligned tables platforms that do not tolerate ! // unaligned memory accesses; it works by assuming that every ! // use of the SWAPx macros is intended for accessing a big endian ! // value e.g. for "Watanabe Gothic"'s "mort" table ! #define ALLOW_UNALIGNED_HACK + #ifndef ALLOW_UNALIGNED_HACK + /** + * These are convenience macros which invoke the swap functions + * from a concise call. + */ + #define SWAPW(value) (LESwaps::isBigEndian() ? (value) : LESwaps::swapWord(value)) + #define SWAPL(value) (LESwaps::isBigEndian() ? (value) : LESwaps::swapLong(value)) + #else + #define SWAPW(rValue) loadBigEndianWord(reinterpret_cast(rValue)) + #define SWAPL(rValue) loadBigEndianLong(reinterpret_cast(rValue)) ! inline le_uint16 loadBigEndianWord( const le_uint16& rValue ) ! { ! const le_uint8* p = reinterpret_cast(&rValue); ! return ((p[0] << 8) + p[1]); ! } ! ! inline le_uint32 loadBigEndianLong( const le_uint32& rValue ) ! { ! const le_uint8* p = reinterpret_cast(&rValue); ! return ((p[0]<<24) + (p[1]<<16) + (p[2]<<8) + p[3]); ! } ! ! #endif /** * This class is used to access data which stored in big endian order *** misc/icu/source/layout/OpenTypeLayoutEngine.h Wed Aug 14 10:17:50 2002 --- misc/build/icu/source/layout/OpenTypeLayoutEngine.h Tue May 27 15:21:32 2003 *************** *** 318,323 **** --- 316,334 ---- glyphs = tempGlyphs; charIndices = tempCharIndices; + + #if 1 // HDU: remove deleted glyphs (0xFFFF) and deleted marks (0xFFFE) + int nGlyphCount = 0; + for( int i = 0; i < tempGlyphCount; ++i ) + if( glyphs[i] < 0xFFFE ) + { + glyphs[ nGlyphCount ] = glyphs[ i ]; + charIndices[ nGlyphCount ] = charIndices[ i ]; + ++nGlyphCount; + } + + tempGlyphCount = nGlyphCount; + #endif return tempGlyphCount; }; *** misc/icu/source/configure 2003-06-15 21:51:34.000000000 +1000 --- misc/build/icu/source/configure 2003-06-15 22:01:15.000000000 +1000 *************** *** 1542,1551 **** --- 1542,1555 ---- fi ;; *-*-*bsd*) icu_cv_host_frag=mh-bsd-gcc ;; *-*-aix*) + if test "$ac_cv_prog_gcc" = yes; then + icu_cv_host_frag=mh-aix-gcc + else if test -n "`$CXX --help 2>&1 | grep 'IBM C and C++ Compilers$'`"; then icu_cv_host_frag=mh-aix else icu_cv_host_frag=mh-aix-va + fi fi ;; *-*-hpux*) if test "$ac_cv_prog_gcc" = yes; then