Jump to content

Unicode functions


sulfurious
 Share

Recommended Posts

Here are some functions I built to work with some Unicode files.

Use the following in a sript including the functions to see how it works. Built them to handle the flag's in Unicode hex.

Try this for $a -

320035002C00300030002C00340031002C00300030002C00350030002C00300030002C00350030002C00300030002C003400

34002C00300030002C00340031002C00300030002C00350034002C00300030002C00340031002C00300030002C0032003500

2C00300030002C00320035002C00300030002C00350033002C00300030002C00350039002C00300030002C00350033002C00

300030002C00350034002C00300030002C00340035002C00300030002C00340064002C00300030002C00340034002C003000

30002C00350032002C00300030002C00340039002C00300030002C00350036002C00300030002C00340035002C0030003000

2C00320035002C00300030002C00300030002C0030003000

$a = some hex here
MsgBox(0,"Binary string hex value",$a)

;try each of these
$b = _BinaryHexToHex16($a)
$b = _BinaryHexToHex16($a,True)
$b = _BinaryHexToHex16($a,True,"25")

MsgBox(0,"2byte hex converted to 1byte hex",$b)
MsgBox(0,"Example of standard _HexToString",_HexToString($b))
MsgBox(0,"Example of _Hex16toString",_Hex16toString($b))

;and each of these
MsgBox(0,"Example of _StringToHex16",_StringToHex16($b))
MsgBox(0,"Example of _StringToHex16",_StringToHex16($b,True))

;===============================================================================
;
; Function Name:    _BinHexToHex16($HexVal,$StripComma,$HexPad)
; Description:      Binary string values are exported as 2 bytes for each 1 byte value.
;                   Convert this to all 1 byte values. Pad a specific hex value as option.
;                   Strip "comma" from exported value as option.
; Parameter(s):     $HexVal - the entire hex value to convert
;                   $StripComma - True or False
;                   $HexPad - the hex value to find one instance of & duplicate (ie \ to \\)
;                       (note: this must be in single hex value - ie. 5C = \ )
; Requirement(s):   Binary string as hex.
; Return Value(s):  On Success - Returns the converted 2 byte hex to 1 byte hex.
;                   On Failure - -1  and sets @ERROR = 1
; Author(s):        Sulfurious
; Corrected:        2006/09/05
;
;===============================================================================
Func _BinaryHexToHex16($HexVal,$StripComma=False,$HexPad="")
    Local $hxFIN,$PadIt,$ihx,$hxTMP
    $hxFIN = ""
    If StringLen($HexPad)=0  Then 
        $PadIt=False    
    ElseIf StringLen($HexPad)=2 Then
        Switch StringLeft($HexPad,1)
            Case "A","B","C","D","E","F","a","b","c","d","e","f","0","1","2","3","4","5","6","7","8","9" 
                $PadIt=True
            Case Else
                SetError(1)
                Return -1
        EndSwitch
        Switch StringRight($HexPad,1)
            Case "A","B","C","D","E","F","a","b","c","d","e","f","0","1","2","3","4","5","6","7","8","9"
                $PadIt=True
            Case Else
                SetError(1)
                Return -1
        EndSwitch
    Else
        SetError(1)
        Return -1
    EndIf
    If $StripComma=True Then $HexVal=StringReplace($HexVal,"2C00","")
    For $ihx=1 to StringLen($HexVal) Step 8
        $hxTMP=_HexToString(StringMid($HexVal,$ihx,2))
        $hxTMP&=_HexToString(StringMid($HexVal,$ihx+4,2))
        If $PadIt=True Then
            If $hxTMP=$HexPad Then $hxTMP=$HexPad & "00" & $HexPad
        EndIf
        $hxFIN=$hxFIN & $hxTMP
    Next
    Return $hxFIN
EndFunc ;==> _BinaryHexToHex16

;===============================================================================
; TAKEN FROM INCLUDES OF AUTOIT v3.2 - WRITTEN BY Jarvis Stubblefield
Func _HexToString($strHex)
    Local $strChar, $aryHex, $i, $iDec, $Char, $iOne, $iTwo
    
    $aryHex = StringSplit($strHex, "")
    If Mod($aryHex[0], 2) <> 0 Then
        SetError(1)
        Return -1
    EndIf
    
    For $i = 1 To $aryHex[0]
        $iOne = $aryHex[$i]
        $i = $i + 1
        $iTwo = $aryHex[$i]
        $iDec = Dec($iOne & $iTwo)
        If @error <> 0 Then
            SetError(1)
            Return -1
        EndIf
        
        $Char = Chr($iDec)
        $strChar = $strChar & $Char
    Next
    
    Return $strChar
EndFunc   ;==>_HexToString

;===============================================================================
;
; Function Name:    _Hex16toString($HexVal,$StripComma,$HexPad)
; Description:      Hexadecimal in Ascii/Ansi represented as Hex (5C).
;                   Hexadecimal in Unicode represented as Hex (5C00).
;                   Convert Unicode Hex to string.
; Parameter(s):     $HexVal - the entire hex value to convert
; Requirement(s):   String as Unicode hex. No comma's are allowed.
;                   Hex value must be multiple of 4 digits.
; Return Value(s):  On Success - Returns the converted string of characters.
;                   On Failure - -1  and sets @ERROR = 1
; Author(s):        Sulfurious
; Corrected:        2006/09/05
;
;===============================================================================
Func _Hex16toString($hByte)
    Local $i,$hVal,$sResult
    If Mod(StringLen($hByte),4)<>0 Then
        SetError(1)
        Return -1
    EndIf
    For $i=1 to StringLen($hByte) Step 8
        $hVal=Chr(Dec(StringMid($hByte,$i,2))) & Chr(Dec(StringMid($hByte,$i+4,2)))
        $sResult=$sResult & $hVal
    Next
    Return $sResult
EndFunc   ;==> _Hex16toString

;===============================================================================
; TAKEN FROM INCLUDES OF AUTOIT v3.2 - WRITTEN BY Jarvis Stubblefield
Func _StringToHex($strChar)
    Local $aryChar, $i, $iDec, $hChar, $strHex
    
    $aryChar = StringSplit($strChar, "")
    
    For $i = 1 To $aryChar[0]
        $iDec = Asc($aryChar[$i])
        $hChar = Hex($iDec, 2)
        $strHex = $strHex & $hChar
    Next
    
    Return $strHex
    
EndFunc   ;==>_StringToHex

;===============================================================================
;
; Function Name:    _StringToHex16("string")
; Description:      Convert a string of characters to Unicode hexadecimal.
; Parameter(s):     $strChar is the string you want to convert.
;                   Optional: $bComma is boolean to insert a comma between hex values.
; Requirement(s):   String Input.
; Return Value(s):  Returns the converted string in Unicode hexadecimal (ie 5C,00,22,00).
; Author(s):        Sulfurious
; Corrected:        2005/09/05
;
;===============================================================================

Func _StringToHex16($strChar,$bComma=False)
    Local $i,$hSt,$hStr
    For $i=1 To StringLen($strChar)
        If $bComma=True Then
            $hSt=Hex(Asc(StringMid($strChar,$i,1)),2) & ",00,"
        Else
            $hSt=Hex(Asc(StringMid($strChar,$i,1)),2) & "00"
        EndIf
        $hStr=$hStr & $hSt
    Next
    Return $hStr
EndFunc   ;==>_StringToHex16
Link to comment
Share on other sites

  • Moderators

Erifash I believe made the _StringToHex and _HexToString a tad faster...

$s = 'SmoKe_N'
$shex = _StringToHexEx($s)
MsgBox(0, '1', $shex)
$sstring = _HexToStringEx($shex)
MsgBox(0, '2', $sstring)

Func _StringToHexEx($sText, $i0x = False)
    Local $nHex = Hex(BinaryString($sText))
    If $i0x Then Return '0x' & $nHex
    Return $nHex
EndFunc

Func _HexToStringEx($nHex)
    If StringLeft($nHex, 2) = '0x' Then Return BinaryString($nHex)
    Return BinaryString('0x' & $nHex)
EndFuncoÝ÷ Ø(÷«µçmén+m¢§vW÷Þ­éí²+njبÆî¶Ø^ɪÞç!}«-z³hÃ*.q©é¬°ØG{zGb·V®¶­s`¤gVæ2õ7G&æuFôWbb33c·5FWBÂb33c·dFVÆÒÒfÇ6R Æö6Âb33c¶äWÒW&æ'7G&ærb33c·5FWBÂb33c·4öÆBÂb33c·46'0 bb33c·dFVÆÒFVâ b33c·46'2Òb33²ÃÂb33° VÇ6P b33c·46'2Òb33³b33° VæD` f÷"b33c¶42ÒFò7G&ætÆVâb33c¶äW7FW  b33c·4öÆBf׳Ò7G&ætÖBb33c¶äWÂb33c¶42Â"fײb33c·46'0 æW@ bb33c·dFVÆÒFVâ&WGW&â7G&æuG&Õ&vBb33c·4öÆB &WGW&âb33c·4öÆ@¤VæDgVæoÝ÷ ØGb·b!j÷§¢'^j'í+(è¬Â¸­µéÜ¢ºÞrÙr#fZ±ö¦X¯­Ê'½êììjÂ,¥u·ºØ­±ç¦²Úzkö¬µêÚ[azƦy«­¢+Ø)Õ¹}!àÄÙѽMÑÉ¥¹ ÀÌØí¹!à¤(%%MÑÉ¥¹1Ð ÀÌØí¹!à°È¤ôÌäìÁàÌäì¹MÑÉ¥¹%¹MÑÈ ÀÌØí¹!à°Ìäì°Ìäì¤Q¡¸($$ÀÌØí¹!àôMÑÉ¥¹IÁ± ÀÌØí¹!à°ÌäìÀÀÌäì°ÌäìÌäì¤($$ÀÌØí¹!àôMÑÉ¥¹IÁ± ÀÌØí¹!à°Ìäì°Ìäì°Ìäì°Ìäì¤($%IÑÕɸ    ¥¹ÉåMÑÉ¥¹¡MÑÉ¥¹QÉ¥µ1Ð ÀÌØí¹!à°È¤¤(%±Í%MÑÉ¥¹%¹MÑÈ ÀÌØí¹!à°Ìäì°Ìäì¤Q¡¸($$ÀÌØí¹!àôMÑÉ¥¹IÁ± ÀÌØí¹!à°ÌäìÀÀÌäì°ÌäìÌäì¤($$ÀÌØí¹!àôMÑÉ¥¹IÁ± ÀÌØí¹!à°Ìäì°Ìäì°Ìäì°Ìäì¤($%IÑÕɸ   ¥¹ÉåMÑÉ¥¹ ÌäìÁàÌäìµÀìMÑÉ¥¹QÉ¥µ1Ð ÀÌØí¹!à°È¤¤(%±Í%MÑÉ¥¹1Ð ÀÌØí¹!à°È¤ôÌäìÁàÌäìQ¡¸($%IÑÕɸ ¥¹ÉåMÑÉ¥¹¡MÑÉ¥¹IÁ± ÀÌØí¹!à°ÌäìÀÀÌäì°ÌäìÌä줤(%¹%(%IÑÕɸ ¥¹ÉåMÑÉ¥¹ ÌäìÁàÌäìµÀìMÑÉ¥¹IÁ± ÀÌØí¹!à°ÌäìÀÀÌäì°ÌäìÌä줤)¹Õ¹

Edit3:

Seems the native hex16's may be a tad faster than what I wrote... Not really sure though.

Edited by SmOke_N

Common sense plays a role in the basics of understanding AutoIt... If you're lacking in that, do us all a favor, and step away from the computer.

Link to comment
Share on other sites

Interesting. I had not seen those before.

How about a faster function than the one I wrote, _BinHexToHex16 ? I came across the situation where a registry key, a REG_MULTI_SZ or REG_EXPAND_SZ is input as text. But, xp exports it as a hexadecimal value of a hexadecimal value. To use it in an INF, it must be converted to string. So I stepped through it, converting the first hex to a hex, which was actually the hex that then needed to be converted to string. If you follow.

I will check those examples out. Perhaps they can speed up my script, which is pretty slow even for a small reg file. Too many loops me thinks.

Later,

Sul

Link to comment
Share on other sites

  • Moderators

Interesting. I had not seen those before.

How about a faster function than the one I wrote, _BinHexToHex16 ? I came across the situation where a registry key, a REG_MULTI_SZ or REG_EXPAND_SZ is input as text. But, xp exports it as a hexadecimal value of a hexadecimal value. To use it in an INF, it must be converted to string. So I stepped through it, converting the first hex to a hex, which was actually the hex that then needed to be converted to string. If you follow.

I will check those examples out. Perhaps they can speed up my script, which is pretty slow even for a small reg file. Too many loops me thinks.

Later,

Sul

Yeah, you hadn't seen them becaue I had just written them :P

I hadn't even looked at the other function (totally missed it.)

Common sense plays a role in the basics of understanding AutoIt... If you're lacking in that, do us all a favor, and step away from the computer.

Link to comment
Share on other sites

  • 2 weeks later...

Hmm. The BinaryString function is a neat one. I have used that version instead of what I was. 2x as fast it seems.

Here is your Hex16ToString

Func _Hex16toString($nHex)
    If StringLeft($nHex, 2) = '0x' And StringInStr($nHex, ',') Then
        $nHex = StringReplace($nHex, '00', '')
        $nHex = StringReplace($nHex, ',', ',')
        Return BinaryString(StringTrimLeft($nHex, 2))
    ElseIf StringInStr($nHex, ',') Then
        $nHex = StringReplace($nHex, '00', '')
        $nHex = StringReplace($nHex, ',', ',')
        Return BinaryString('0x' & StringTrimLeft($nHex, 2))
    ElseIf StringLeft($nHex, 2) = '0x' Then
        Return BinaryString(StringReplace($nHex, '00', ''))
    EndIf
    Return BinaryString('0x' & StringReplace($nHex, '00', ''))
EndFunc

It seems to work well. However, I am curious how you would handle this kind of situation. Here goes.

Since Unicode presents some 65000 usable characters (by some estimates, with extensibility I read over 100,000), you are likely to encounter characters above 254. So, converting Hex16 to a string value is only useful for viewing (if it even can), not in real data... because, how do you reconstruct it?

Meaning, you have hex 4800 & 4900, which are Uppercase H & I. Strip the 00, no problems. Next, (and likely in a registry file) you may come across something like 4906, which is some kind of little s looking thing, maybe arabic or something. If you were guaranteed to be ASCII, you could convert Hex16 to string, do some If - EndIf stuff to it using english, then convert it back to Hex16.

Your converter properly shows the ASCII symbols. But how to go backwards? I have scratched my head at that one for awhile now. I ended up either putting in short hex values to parse with, or if they were overly long (longer than my lowercase memorization heh heh) I used a string to hex16 converion.

Meaning, if I wish to keep all unicode characters intact, but I wish to parse them in english, or look at them in english, how do I put them back to unicode keeping the original structure, if for example I stripped a few letters out of the middle?

I make no sense sometimes. Just curious to see how you might twist that around.

later,

Sul

Link to comment
Share on other sites

  • Moderators

I make no sense sometimes. Just curious to see how you might twist that around.

later,

Sul

No, your question(s) made perfect sense, the short answer... I don't know :) (without doing some home work of my own that is).

Common sense plays a role in the basics of understanding AutoIt... If you're lacking in that, do us all a favor, and step away from the computer.

Link to comment
Share on other sites

Here is a script to properly convert Unicode files to ASCII files. This way you keep any extended characters. However, it does not much good without some logic to reverse it. And I believe that if any manipulation is performed between time, it is very unlikely to be achieved.

#include <file.au3>
#include <array.au3>
Local $PrepFile1 = _TempFile()
Local $TempFile = FileOpen(@DesktopDir & "\epfull.reg",4)
$Rread = StringReplace(String(FileRead($TempFile,FileGetSize(@DesktopDir & "\epfull.reg"))),"0x","")
$Rread = StringTrimLeft($Rread,4)
$sz = StringLen($Rread)
Dim $arr[$sz][2]
For $x = 1 to $sz - 1 
    $arr[$x][0] = StringMid($Rread,$x,2)
    $arr[$x][1] = StringMid($Rread,$x+2,2)
    $x = $x + 3
Next
$l = ""
For $i = 1 to UBound($arr) - 1
    $l = $l & $arr[$i][0]; & $arr[$i][1] ; comment next line & uncomment this to write out as UTF-16
    If $arr[$i][1] <> "00" Then $l = $l & $arr[$i][1]
Next
$f = FileOpen(@DesktopDir & "\foobar.txt",1)
$fw = FileWrite($f,BinaryString("0x" & $l))

later,

Sul

Link to comment
Share on other sites

Not entirely sure how to handle any bits that may be left that will give unreadable characters, but here is a script to take an ASCII file and convert it to Unicode.

#include <file.au3>
#include <Array.au3>

Local $TempFile = FileOpen(@DesktopDir & "\ep9x.reg",4)
$Rread = StringReplace(String(FileRead($TempFile,FileGetSize(@DesktopDir & "\ep9x.reg"))),"0x","")
$sz = StringLen($Rread)
Dim $arr[$sz][2]
For $x = 1 to $sz - 1
    $arr[$x][0] =  Hex(BinaryString(StringMid($Rread,$x,1)))
    $arr[$x][1] = '00'
Next
$l = ''
for $i = 1 to UBound($arr) - 1
    $l = $l & $arr[$i][0] & $arr[$i][1]
Next
$f = FileOpen(@DesktopDir & "\foobar.txt",1)
$fw = FileWrite($f,BinaryString("0x" & $l))

later,

Sul

Link to comment
Share on other sites

Create an account or sign in to comment

You need to be a member in order to leave a comment

Create an account

Sign up for a new account in our community. It's easy!

Register a new account

Sign in

Already have an account? Sign in here.

Sign In Now
 Share

  • Recently Browsing   0 members

    • No registered users viewing this page.
×
×
  • Create New...