Index: i18npool/inc/breakiteratorImpl.hxx =================================================================== RCS file: /cvs/l10n/i18npool/inc/breakiteratorImpl.hxx,v retrieving revision 1.8 diff -u -r1.8 breakiteratorImpl.hxx --- i18npool/inc/breakiteratorImpl.hxx 10 Apr 2008 08:37:03 -0000 1.8 +++ i18npool/inc/breakiteratorImpl.hxx 12 Nov 2008 07:34:29 -0000 @@ -120,7 +120,8 @@ virtual com::sun::star::uno::Sequence< rtl::OUString > SAL_CALL getSupportedServiceNames() throw( com::sun::star::uno::RuntimeException ); - static sal_Int16 SAL_CALL getScriptClass(sal_uInt32 currentChar); + static sal_Int16 SAL_CALL getScriptClass(sal_uInt32 currentChar, sal_uInt32 lastChar = 0); + static sal_Bool SAL_CALL changeToWeak(sal_uInt32 ch, sal_uInt32 prev_ch); protected: Boundary result; // for word break iterator Index: i18npool/source/breakiterator/breakiteratorImpl.cxx =================================================================== RCS file: /cvs/l10n/i18npool/source/breakiterator/breakiteratorImpl.cxx,v retrieving revision 1.27.8.1 diff -u -r1.27.8.1 breakiteratorImpl.cxx --- i18npool/source/breakiterator/breakiteratorImpl.cxx 14 Aug 2008 16:27:31 -0000 1.27.8.1 +++ i18npool/source/breakiterator/breakiteratorImpl.cxx 12 Nov 2008 07:34:29 -0000 @@ -300,10 +300,12 @@ sal_Int32 strLen = Text.getLength(); sal_uInt32 ch=0; + sal_uInt32 prev_ch=Text.getStr()[0]; while(iterateCodePoints(Text, nStartPos, 1, ch) < strLen ) { - sal_Int16 currentCharScriptType = getScriptClass(ch); + sal_Int16 currentCharScriptType = getScriptClass(ch, prev_ch); if(ScriptType != currentCharScriptType && currentCharScriptType != ScriptType::WEAK) break; + prev_ch = ch; } return nStartPos; } @@ -456,19 +458,36 @@ #define scriptListCount sizeof (scriptList) / sizeof (UBlock2Script) -sal_Int16 BreakIteratorImpl::getScriptClass(sal_uInt32 currentChar) +sal_Bool BreakIteratorImpl::changeToWeak(sal_uInt32 ch, sal_uInt32 prev_ch) +{ + switch (ch) { + case 1 : + case 2 : + case 0x20 : + case 0xA0 : + return true; + // Geresh or Gershayim or quotes following Hebrew + case 0x22 : // quotation mark + case 0x05F4 : // Gershayim + case 0x27 : // apostrophe + case 0x05F3 : // Geresh + if ( prev_ch >= 0x05D0 && prev_ch <= 0x05EA) + return true; + default : + return false; + } +} + +sal_Int16 BreakIteratorImpl::getScriptClass(sal_uInt32 currentChar, sal_uInt32 lastChar) { - static sal_uInt32 lastChar = 0; static sal_Int16 nRet = 0; if (currentChar != lastChar) { - lastChar = currentChar; - //JP 21.9.2001: handle specific characters - always as weak // definition of 1 - this breaks a word // 2 - this can be inside a word // 0x20 & 0xA0 - Bug 102975, declare western space and non-break space as WEAK char. - if( 1 == currentChar || 2 == currentChar || 0x20 == currentChar || 0xA0 == currentChar) + if ( changeToWeak(currentChar, lastChar)) nRet = ScriptType::WEAK; // workaround for Coptic else if ( 0x2C80 <= currentChar && 0x2CE3 >= currentChar) @@ -481,6 +500,7 @@ } nRet=(i < scriptListCount && block >= scriptList[i].from) ? scriptList[i].script : ScriptType::WEAK; } + lastChar = currentChar; } return nRet; }