Replicate script:
; ;; correct console display needs decent programming font with Unicode characters (e.g. DejaVu Sans Mono) and Unicode codepage in SciTE: ; ; in SciTEGlobalProperties change to code.page=65001: ;~ # Internationalisation ;~ # Japanese input code page 932 and ShiftJIS character set 128 ;~ #code.page=932 ;~ #character.set=128 ;~ # Unicode ;~ code.page=65001 <<<<<<<<<<<<<<<<<<<<<<<<<<< ;~ #code.page=0 ; $str = e-grave 0x00E8, e-sharp 0x00E9, e-circumflex 0x00EA, space 0x0020, A-caron 0x01CD, a-caron 0x01CE, I-caron 0x01CF, fi ligature 0xFB01 Local $str = "éêè ǍǎǏfi" __ConsoleWrite('StringLen("' & $str & '"): ' & StringLen($str) & @LF) ; correct but only for characters < 0x010000 (~UCS-2 charset) Local $a = StringToASCIIArray($str, Default, Default, 0) ; length correct but values incorrectly masked with 0x00FF Local $b = StringSplit($str, '', 2) __ConsoleWrite('Glyph ' & @TAB & _ArrayToString($b, @TAB) & @LF) ConsoleWrite('UTF-16 ' & @TAB & _ArrayToString($a, @TAB) & @LF) For $i = 0 To UBound($a) - 1 $a[$i] = Hex($a[$i], 4) Next ConsoleWrite(' ' & @TAB & _ArrayToString($a, @TAB) & @LF) ;; UTF-8 is all correct $a = StringToASCIIArray($str, Default, Default, 2) ; length and contents are correct ConsoleWrite('UTF-8 ' & @TAB & _ArrayToString($a, @TAB) & @LF) For $i = 0 To UBound($a) - 1 $a[$i] = Hex($a[$i], 2) Next ConsoleWrite(' ' & @TAB & _ArrayToString($a, @TAB) & @LF) Exit Func __ConsoleWrite($sText) Local $aResult = DllCall("kernel32.dll", "int", "WideCharToMultiByte", "uint", 65001, "dword", 0, "wstr", $sText, "int", -1, _ "ptr", 0, "int", 0, "ptr", 0, "ptr", 0) Local $tText = DllStructCreate("char[" & $aResult[0] & "]") DllCall("Kernel32.dll", "int", "WideCharToMultiByte", "uint", 65001, "dword", 0, "wstr", $sText, "int", -1, _ "ptr", DllStructGetPtr($tText), "int", $aResult[0], "ptr", 0, "ptr", 0) ConsoleWrite(DllStructGetData($tText, 1)) EndFunc ;==>__ConsoleWrite
You may have to follow included instruction for correct display.
Here's the result:
;>Running:(3.3.5.4):C:\Program Files\AutoIt3\beta\autoit3.exe "D:\XLequit\AutoMAT\Test\try.au3" ;StringLen("éêè ǍǎǏfi"): 8 ;Glyph é ê è Ǎ ǎ Ǐ fi ;UTF-16 233 234 232 32 205 206 207 1 ; 00E9 00EA 00E8 0020 00CD 00CE 00CF 0001 ;UTF-8 195 169 195 170 195 168 32 199 141 199 142 199 143 239 172 129 ; C3 A9 C3 AA C3 A8 20 C7 8D C7 8E C7 8F EF AC 81
XP SP3 x86 (if that matters)
Edit: I didn't find the right markup that won't destroy spaces alignment in the result above. Giving up trying for today.
This post has been edited by jchd: 15 February 2010 - 02:07 AM

Sign In
Register
Help
MultiQuote
