Jump to content
Sign in to follow this  
sulfurious

Unicode functions

Recommended Posts

Here are some functions I built to work with some Unicode files.

Use the following in a sript including the functions to see how it works. Built them to handle the flag's in Unicode hex.

Try this for $a -

320035002C00300030002C00340031002C00300030002C00350030002C00300030002C00350030002C00300030002C003400

34002C00300030002C00340031002C00300030002C00350034002C00300030002C00340031002C00300030002C0032003500

2C00300030002C00320035002C00300030002C00350033002C00300030002C00350039002C00300030002C00350033002C00

300030002C00350034002C00300030002C00340035002C00300030002C00340064002C00300030002C00340034002C003000

30002C00350032002C00300030002C00340039002C00300030002C00350036002C00300030002C00340035002C0030003000

2C00320035002C00300030002C00300030002C0030003000

$a = some hex here
MsgBox(0,"Binary string hex value",$a)

;try each of these
$b = _BinaryHexToHex16($a)
$b = _BinaryHexToHex16($a,True)
$b = _BinaryHexToHex16($a,True,"25")

MsgBox(0,"2byte hex converted to 1byte hex",$b)
MsgBox(0,"Example of standard _HexToString",_HexToString($b))
MsgBox(0,"Example of _Hex16toString",_Hex16toString($b))

;and each of these
MsgBox(0,"Example of _StringToHex16",_StringToHex16($b))
MsgBox(0,"Example of _StringToHex16",_StringToHex16($b,True))

;===============================================================================
;
; Function Name:    _BinHexToHex16($HexVal,$StripComma,$HexPad)
; Description:      Binary string values are exported as 2 bytes for each 1 byte value.
;                   Convert this to all 1 byte values. Pad a specific hex value as option.
;                   Strip "comma" from exported value as option.
; Parameter(s):     $HexVal - the entire hex value to convert
;                   $StripComma - True or False
;                   $HexPad - the hex value to find one instance of & duplicate (ie \ to \\)
;                       (note: this must be in single hex value - ie. 5C = \ )
; Requirement(s):   Binary string as hex.
; Return Value(s):  On Success - Returns the converted 2 byte hex to 1 byte hex.
;                   On Failure - -1  and sets @ERROR = 1
; Author(s):        Sulfurious
; Corrected:        2006/09/05
;
;===============================================================================
Func _BinaryHexToHex16($HexVal,$StripComma=False,$HexPad="")
    Local $hxFIN,$PadIt,$ihx,$hxTMP
    $hxFIN = ""
    If StringLen($HexPad)=0  Then 
        $PadIt=False    
    ElseIf StringLen($HexPad)=2 Then
        Switch StringLeft($HexPad,1)
            Case "A","B","C","D","E","F","a","b","c","d","e","f","0","1","2","3","4","5","6","7","8","9" 
                $PadIt=True
            Case Else
                SetError(1)
                Return -1
        EndSwitch
        Switch StringRight($HexPad,1)
            Case "A","B","C","D","E","F","a","b","c","d","e","f","0","1","2","3","4","5","6","7","8","9"
                $PadIt=True
            Case Else
                SetError(1)
                Return -1
        EndSwitch
    Else
        SetError(1)
        Return -1
    EndIf
    If $StripComma=True Then $HexVal=StringReplace($HexVal,"2C00","")
    For $ihx=1 to StringLen($HexVal) Step 8
        $hxTMP=_HexToString(StringMid($HexVal,$ihx,2))
        $hxTMP&=_HexToString(StringMid($HexVal,$ihx+4,2))
        If $PadIt=True Then
            If $hxTMP=$HexPad Then $hxTMP=$HexPad & "00" & $HexPad
        EndIf
        $hxFIN=$hxFIN & $hxTMP
    Next
    Return $hxFIN
EndFunc ;==> _BinaryHexToHex16

;===============================================================================
; TAKEN FROM INCLUDES OF AUTOIT v3.2 - WRITTEN BY Jarvis Stubblefield
Func _HexToString($strHex)
    Local $strChar, $aryHex, $i, $iDec, $Char, $iOne, $iTwo
    
    $aryHex = StringSplit($strHex, "")
    If Mod($aryHex[0], 2) <> 0 Then
        SetError(1)
        Return -1
    EndIf
    
    For $i = 1 To $aryHex[0]
        $iOne = $aryHex[$i]
        $i = $i + 1
        $iTwo = $aryHex[$i]
        $iDec = Dec($iOne & $iTwo)
        If @error <> 0 Then
            SetError(1)
            Return -1
        EndIf
        
        $Char = Chr($iDec)
        $strChar = $strChar & $Char
    Next
    
    Return $strChar
EndFunc   ;==>_HexToString

;===============================================================================
;
; Function Name:    _Hex16toString($HexVal,$StripComma,$HexPad)
; Description:      Hexadecimal in Ascii/Ansi represented as Hex (5C).
;                   Hexadecimal in Unicode represented as Hex (5C00).
;                   Convert Unicode Hex to string.
; Parameter(s):     $HexVal - the entire hex value to convert
; Requirement(s):   String as Unicode hex. No comma's are allowed.
;                   Hex value must be multiple of 4 digits.
; Return Value(s):  On Success - Returns the converted string of characters.
;                   On Failure - -1  and sets @ERROR = 1
; Author(s):        Sulfurious
; Corrected:        2006/09/05
;
;===============================================================================
Func _Hex16toString($hByte)
    Local $i,$hVal,$sResult
    If Mod(StringLen($hByte),4)<>0 Then
        SetError(1)
        Return -1
    EndIf
    For $i=1 to StringLen($hByte) Step 8
        $hVal=Chr(Dec(StringMid($hByte,$i,2))) & Chr(Dec(StringMid($hByte,$i+4,2)))
        $sResult=$sResult & $hVal
    Next
    Return $sResult
EndFunc   ;==> _Hex16toString

;===============================================================================
; TAKEN FROM INCLUDES OF AUTOIT v3.2 - WRITTEN BY Jarvis Stubblefield
Func _StringToHex($strChar)
    Local $aryChar, $i, $iDec, $hChar, $strHex
    
    $aryChar = StringSplit($strChar, "")
    
    For $i = 1 To $aryChar[0]
        $iDec = Asc($aryChar[$i])
        $hChar = Hex($iDec, 2)
        $strHex = $strHex & $hChar
    Next
    
    Return $strHex
    
EndFunc   ;==>_StringToHex

;===============================================================================
;
; Function Name:    _StringToHex16("string")
; Description:      Convert a string of characters to Unicode hexadecimal.
; Parameter(s):     $strChar is the string you want to convert.
;                   Optional: $bComma is boolean to insert a comma between hex values.
; Requirement(s):   String Input.
; Return Value(s):  Returns the converted string in Unicode hexadecimal (ie 5C,00,22,00).
; Author(s):        Sulfurious
; Corrected:        2005/09/05
;
;===============================================================================

Func _StringToHex16($strChar,$bComma=False)
    Local $i,$hSt,$hStr
    For $i=1 To StringLen($strChar)
        If $bComma=True Then
            $hSt=Hex(Asc(StringMid($strChar,$i,1)),2) & ",00,"
        Else
            $hSt=Hex(Asc(StringMid($strChar,$i,1)),2) & "00"
        EndIf
        $hStr=$hStr & $hSt
    Next
    Return $hStr
EndFunc   ;==>_StringToHex16

Share this post


Link to post
Share on other sites

Erifash I believe made the _StringToHex and _HexToString a tad faster...

$s = 'SmoKe_N'
$shex = _StringToHexEx($s)
MsgBox(0, '1', $shex)
$sstring = _HexToStringEx($shex)
MsgBox(0, '2', $sstring)

Func _StringToHexEx($sText, $i0x = False)
    Local $nHex = Hex(BinaryString($sText))
    If $i0x Then Return '0x' & $nHex
    Return $nHex
EndFunc

Func _HexToStringEx($nHex)
    If StringLeft($nHex, 2) = '0x' Then Return BinaryString($nHex)
    Return BinaryString('0x' & $nHex)
EndFuncoÝ÷ Ø(÷«µçmén+m¢§vW÷Þ­éí²+njبÆî¶Ø^ɪÞç!}«-z³hÃ*.q©é¬°ØG{zGb·V®¶­s`¤gVæ2õ7G&æuFôWbb33c·5FWBÂb33c·dFVÆÒÒfÇ6R Æö6Âb33c¶äWÒW&æ'7G&ærb33c·5FWBÂb33c·4öÆBÂb33c·46'0 bb33c·dFVÆÒFVâ b33c·46'2Òb33²ÃÂb33° VÇ6P b33c·46'2Òb33³b33° VæD` f÷"b33c¶42ÒFò7G&ætÆVâb33c¶äW7FW  b33c·4öÆBf׳Ò7G&ætÖBb33c¶äWÂb33c¶42Â"fײb33c·46'0 æW@ bb33c·dFVÆÒFVâ&WGW&â7G&æuG&Õ&vBb33c·4öÆB &WGW&âb33c·4öÆ@¤VæDgVæoÝ÷ ØGb·b!j÷§¢'^j'í+(è¬Â¸­µéÜ¢ºÞrÙr#fZ±ö¦X¯­Ê'½êììjÂ,¥u·ºØ­±ç¦²Úzkö¬µêÚ[azƦy«­¢+Ø)Õ¹}!àÄÙѽMÑÉ¥¹ ÀÌØí¹!à¤(%%MÑÉ¥¹1Ð ÀÌØí¹!à°È¤ôÌäìÁàÌäì¹MÑÉ¥¹%¹MÑÈ ÀÌØí¹!à°Ìäì°Ìäì¤Q¡¸($$ÀÌØí¹!àôMÑÉ¥¹IÁ± ÀÌØí¹!à°ÌäìÀÀÌäì°ÌäìÌäì¤($$ÀÌØí¹!àôMÑÉ¥¹IÁ± ÀÌØí¹!à°Ìäì°Ìäì°Ìäì°Ìäì¤($%IÑÕɸ    ¥¹ÉåMÑÉ¥¹¡MÑÉ¥¹QÉ¥µ1Ð ÀÌØí¹!à°È¤¤(%±Í%MÑÉ¥¹%¹MÑÈ ÀÌØí¹!à°Ìäì°Ìäì¤Q¡¸($$ÀÌØí¹!àôMÑÉ¥¹IÁ± ÀÌØí¹!à°ÌäìÀÀÌäì°ÌäìÌäì¤($$ÀÌØí¹!àôMÑÉ¥¹IÁ± ÀÌØí¹!à°Ìäì°Ìäì°Ìäì°Ìäì¤($%IÑÕɸ   ¥¹ÉåMÑÉ¥¹ ÌäìÁàÌäìµÀìMÑÉ¥¹QÉ¥µ1Ð ÀÌØí¹!à°È¤¤(%±Í%MÑÉ¥¹1Ð ÀÌØí¹!à°È¤ôÌäìÁàÌäìQ¡¸($%IÑÕɸ ¥¹ÉåMÑÉ¥¹¡MÑÉ¥¹IÁ± ÀÌØí¹!à°ÌäìÀÀÌäì°ÌäìÌä줤(%¹%(%IÑÕɸ ¥¹ÉåMÑÉ¥¹ ÌäìÁàÌäìµÀìMÑÉ¥¹IÁ± ÀÌØí¹!à°ÌäìÀÀÌäì°ÌäìÌä줤)¹Õ¹

Edit3:

Seems the native hex16's may be a tad faster than what I wrote... Not really sure though.

Edited by SmOke_N

Common sense plays a role in the basics of understanding AutoIt... If you're lacking in that, do us all a favor, and step away from the computer.

Share this post


Link to post
Share on other sites

Interesting. I had not seen those before.

How about a faster function than the one I wrote, _BinHexToHex16 ? I came across the situation where a registry key, a REG_MULTI_SZ or REG_EXPAND_SZ is input as text. But, xp exports it as a hexadecimal value of a hexadecimal value. To use it in an INF, it must be converted to string. So I stepped through it, converting the first hex to a hex, which was actually the hex that then needed to be converted to string. If you follow.

I will check those examples out. Perhaps they can speed up my script, which is pretty slow even for a small reg file. Too many loops me thinks.

Later,

Sul

Share this post


Link to post
Share on other sites

Interesting. I had not seen those before.

How about a faster function than the one I wrote, _BinHexToHex16 ? I came across the situation where a registry key, a REG_MULTI_SZ or REG_EXPAND_SZ is input as text. But, xp exports it as a hexadecimal value of a hexadecimal value. To use it in an INF, it must be converted to string. So I stepped through it, converting the first hex to a hex, which was actually the hex that then needed to be converted to string. If you follow.

I will check those examples out. Perhaps they can speed up my script, which is pretty slow even for a small reg file. Too many loops me thinks.

Later,

Sul

Yeah, you hadn't seen them becaue I had just written them :P

I hadn't even looked at the other function (totally missed it.)


Common sense plays a role in the basics of understanding AutoIt... If you're lacking in that, do us all a favor, and step away from the computer.

Share this post


Link to post
Share on other sites

Hmm. The BinaryString function is a neat one. I have used that version instead of what I was. 2x as fast it seems.

Here is your Hex16ToString

Func _Hex16toString($nHex)
    If StringLeft($nHex, 2) = '0x' And StringInStr($nHex, ',') Then
        $nHex = StringReplace($nHex, '00', '')
        $nHex = StringReplace($nHex, ',', ',')
        Return BinaryString(StringTrimLeft($nHex, 2))
    ElseIf StringInStr($nHex, ',') Then
        $nHex = StringReplace($nHex, '00', '')
        $nHex = StringReplace($nHex, ',', ',')
        Return BinaryString('0x' & StringTrimLeft($nHex, 2))
    ElseIf StringLeft($nHex, 2) = '0x' Then
        Return BinaryString(StringReplace($nHex, '00', ''))
    EndIf
    Return BinaryString('0x' & StringReplace($nHex, '00', ''))
EndFunc

It seems to work well. However, I am curious how you would handle this kind of situation. Here goes.

Since Unicode presents some 65000 usable characters (by some estimates, with extensibility I read over 100,000), you are likely to encounter characters above 254. So, converting Hex16 to a string value is only useful for viewing (if it even can), not in real data... because, how do you reconstruct it?

Meaning, you have hex 4800 & 4900, which are Uppercase H & I. Strip the 00, no problems. Next, (and likely in a registry file) you may come across something like 4906, which is some kind of little s looking thing, maybe arabic or something. If you were guaranteed to be ASCII, you could convert Hex16 to string, do some If - EndIf stuff to it using english, then convert it back to Hex16.

Your converter properly shows the ASCII symbols. But how to go backwards? I have scratched my head at that one for awhile now. I ended up either putting in short hex values to parse with, or if they were overly long (longer than my lowercase memorization heh heh) I used a string to hex16 converion.

Meaning, if I wish to keep all unicode characters intact, but I wish to parse them in english, or look at them in english, how do I put them back to unicode keeping the original structure, if for example I stripped a few letters out of the middle?

I make no sense sometimes. Just curious to see how you might twist that around.

later,

Sul

Share this post


Link to post
Share on other sites

I make no sense sometimes. Just curious to see how you might twist that around.

later,

Sul

No, your question(s) made perfect sense, the short answer... I don't know :) (without doing some home work of my own that is).

Common sense plays a role in the basics of understanding AutoIt... If you're lacking in that, do us all a favor, and step away from the computer.

Share this post


Link to post
Share on other sites

Here is a script to properly convert Unicode files to ASCII files. This way you keep any extended characters. However, it does not much good without some logic to reverse it. And I believe that if any manipulation is performed between time, it is very unlikely to be achieved.

#include <file.au3>
#include <array.au3>
Local $PrepFile1 = _TempFile()
Local $TempFile = FileOpen(@DesktopDir & "\epfull.reg",4)
$Rread = StringReplace(String(FileRead($TempFile,FileGetSize(@DesktopDir & "\epfull.reg"))),"0x","")
$Rread = StringTrimLeft($Rread,4)
$sz = StringLen($Rread)
Dim $arr[$sz][2]
For $x = 1 to $sz - 1 
    $arr[$x][0] = StringMid($Rread,$x,2)
    $arr[$x][1] = StringMid($Rread,$x+2,2)
    $x = $x + 3
Next
$l = ""
For $i = 1 to UBound($arr) - 1
    $l = $l & $arr[$i][0]; & $arr[$i][1] ; comment next line & uncomment this to write out as UTF-16
    If $arr[$i][1] <> "00" Then $l = $l & $arr[$i][1]
Next
$f = FileOpen(@DesktopDir & "\foobar.txt",1)
$fw = FileWrite($f,BinaryString("0x" & $l))

later,

Sul

Share this post


Link to post
Share on other sites

Not entirely sure how to handle any bits that may be left that will give unreadable characters, but here is a script to take an ASCII file and convert it to Unicode.

#include <file.au3>
#include <Array.au3>

Local $TempFile = FileOpen(@DesktopDir & "\ep9x.reg",4)
$Rread = StringReplace(String(FileRead($TempFile,FileGetSize(@DesktopDir & "\ep9x.reg"))),"0x","")
$sz = StringLen($Rread)
Dim $arr[$sz][2]
For $x = 1 to $sz - 1
    $arr[$x][0] =  Hex(BinaryString(StringMid($Rread,$x,1)))
    $arr[$x][1] = '00'
Next
$l = ''
for $i = 1 to UBound($arr) - 1
    $l = $l & $arr[$i][0] & $arr[$i][1]
Next
$f = FileOpen(@DesktopDir & "\foobar.txt",1)
$fw = FileWrite($f,BinaryString("0x" & $l))

later,

Sul

Share this post


Link to post
Share on other sites

Create an account or sign in to comment

You need to be a member in order to leave a comment

Create an account

Sign up for a new account in our community. It's easy!

Register a new account

Sign in

Already have an account? Sign in here.

Sign In Now
Sign in to follow this  

  • Recently Browsing   0 members

    No registered users viewing this page.

×
×
  • Create New...