Jump to content
Raik

[UDF] encode/decode Html reserved Chars

Recommended Posts

Raik

UDF:

#include-Once
; #INDEX# =======================================================================================================================
; Title .........: HtmlEntities
; AutoIt Version : 3.2.10++
; Language ......: English
; Description ...: Functions to escape Html reserved Characters.
; Author( .......: Raik
; ===============================================================================================================================
; #CONSTANTS# ===================================================================================================================
Global Const $aisEntities[246][2]=[[34,'quot'],[38,'amp'],[39,'apos'],[60,'lt'],[62,'gt'],[160,'nbsp'],[161,'iexcl'],[162,'cent'],[163,'pound'],[164,'curren'],[165,'yen'],[166,'brvbar'],[167,'sect'],[168,'uml'],[169,'copy'],[170,'ordf'],[171,'laquo'],[172,'not'],[173,'shy'],[174,'reg'],[175,'macr'],[176,'deg'],[177,'plusmn'],[180,'acute'],[181,'micro'],[182,'para'],[183,'middot'],[184,'cedil'],[186,'ordm'],[187,'raquo'],[191,'iquest'],[192,'Agrave'],[193,'Aacute'],[194,'Acirc'],[195,'Atilde'],[196,'Auml'],[197,'Aring'],[198,'AElig'],[199,'Ccedil'],[200,'Egrave'],[201,'Eacute'],[202,'Ecirc'],[203,'Euml'],[204,'Igrave'],[205,'Iacute'],[206,'Icirc'],[207,'Iuml'],[208,'ETH'],[209,'Ntilde'],[210,'Ograve'],[211,'Oacute'],[212,'Ocirc'],[213,'Otilde'],[214,'Ouml'],[215,'times'],[216,'Oslash'],[217,'Ugrave'],[218,'Uacute'],[219,'Ucirc'],[220,'Uuml'],[221,'Yacute'],[222,'THORN'],[223,'szlig'],[224,'agrave'],[225,'aacute'],[226,'acirc'],[227,'atilde'],[228,'auml'],[229,'aring'],[230,'aelig'],[231,'ccedil'],[232,'egrave'],[233,'eacute'],[234,'ecirc'],[235,'euml'],[236,'igrave'],[237,'iacute'],[238,'icirc'],[239,'iuml'],[240,'eth'],[241,'ntilde'],[242,'ograve'],[243,'oacute'],[244,'ocirc'],[245,'otilde'],[246,'ouml'],[247,'divide'],[248,'oslash'],[249,'ugrave'],[250,'uacute'],[251,'ucirc'],[252,'uuml'],[253,'yacute'],[254,'thorn'],[255,'yuml'],[338,'OElig'],[339,'oelig'],[352,'Scaron'],[353,'scaron'],[376,'Yuml'],[402,'fnof'],[710,'circ'],[732,'tilde'],[913,'Alpha'],[914,'Beta'],[915,'Gamma'],[916,'Delta'],[917,'Epsilon'],[918,'Zeta'],[919,'Eta'],[920,'Theta'],[921,'Iota'],[922,'Kappa'],[923,'Lambda'],[924,'Mu'],[925,'Nu'],[926,'Xi'],[927,'Omicron'],[928,'Pi'],[929,'Rho'],[931,'Sigma'],[932,'Tau'],[933,'Upsilon'],[934,'Phi'],[935,'Chi'],[936,'Psi'],[937,'Omega'],[945,'alpha'],[946,'beta'],[947,'gamma'],[948,'delta'],[949,'epsilon'],[950,'zeta'],[951,'eta'],[952,'theta'],[953,'iota'],[954,'kappa'],[955,'lambda'],[956,'mu'],[957,'nu'],[958,'xi'],[959,'omicron'],[960,'pi'],[961,'rho'],[962,'sigmaf'],[963,'sigma'],[964,'tau'],[965,'upsilon'],[966,'phi'],[967,'chi'],[968,'psi'],[969,'omega'],[977,'thetasym'],[978,'upsih'],[982,'piv'],[8194,'ensp'],[8195,'emsp'],[8201,'thinsp'],[8204,'zwnj'],[8205,'zwj'],[8206,'lrm'],[8207,'rlm'],[8211,'ndash'],[8212,'mdash'],[8216,'lsquo'],[8217,'rsquo'],[8218,'sbquo'],[8220,'ldquo'],[8221,'rdquo'],[8222,'bdquo'],[8224,'dagger'],[8225,'Dagger'],[8226,'bull'],[8230,'hellip'],[8240,'permil'],[8242,'prime'],[8243,'Prime'],[8249,'lsaquo'],[8250,'rsaquo'],[8254,'oline'],[8260,'frasl'],[8364,'euro'],[8465,'image'],[8472,'weierp'],[8476,'real'],[8482,'trade'],[8501,'alefsym'],[8592,'larr'],[8593,'uarr'],[8594,'rarr'],[8595,'darr'],[8596,'harr'],[8629,'crarr'],[8656,'lArr'],[8657,'uArr'],[8658,'rArr'],[8659,'dArr'],[8660,'hArr'],[8704,'forall'],[8706,'part'],[8707,'exist'],[8709,'empty'],[8711,'nabla'],[8712,'isin'],[8713,'notin'],[8715,'ni'],[8719,'prod'],[8721,'sum'],[8722,'minus'],[8727,'lowast'],[8730,'radic'],[8733,'prop'],[8734,'infin'],[8736,'ang'],[8743,'and'],[8744,'or'],[8745,'cap'],[8746,'cup'],[8747,'int'],[8764,'sim'],[8773,'cong'],[8776,'asymp'],[8800,'ne'],[8801,'equiv'],[8804,'le'],[8805,'ge'],[8834,'sub'],[8835,'sup'],[8836,'nsub'],[8838,'sube'],[8839,'supe'],[8853,'oplus'],[8855,'otimes'],[8869,'perp'],[8901,'sdot'],[8968,'lceil'],[8969,'rceil'],[8970,'lfloor'],[8971,'rfloor'],[9001,'lang'],[9002,'rang'],[9674,'loz'],[9824,'spades'],[9827,'clubs'],[9829,'hearts'],[9830,'diams']]
; ===============================================================================================================================
; #CURRENT# =====================================================================================================================
;_HtmlEntities_Encode
;_HtmlEntities_Decode
; ===============================================================================================================================
; #FUNCTION# ====================================================================================================================
; Name...........: _HtmlEntities_Encode
; Description ...: Replaces Html Entities with the reserved Chars.
; Syntax.........: _HtmlEntities_Encode(ByRef $sTxt)
; Parameters ....: $sTxt - Html Source to modify
; Return values .: Returns always 0
; Author ........: Raik
; Modified.......:
; Remarks .......:
; Related .......:
; Link ..........;
; Example .......; Yes
; ===============================================================================================================================
Func _HtmlEntities_Encode(ByRef $sTxt)
For $i=0 to 245
  $sTxt=StringReplace($sTxt,ChrW($aisEntities[$i][0]),'&'&$aisEntities[$i][1]&';',0,1)
Next
EndFunc ;==>_HtmlEntities_Encode
; #FUNCTION# ====================================================================================================================
; Name...........: _HtmlEntities_Decode
; Description ...: Replaces reserved Chars with its Html Entities.
; Syntax.........: _HtmlEntities_Decode(ByRef $sTxt)
; Parameters ....: $sTxt - Html Source to modify
; Return values .: Returns always 0
; Author ........: Raik
; Modified.......:
; Remarks .......:
; Related .......:
; Link ..........;
; Example .......; Yes
; ===============================================================================================================================
Func _HtmlEntities_Decode(ByRef $sTxt)
For $i=0 to 245
  $sTxt=StringReplace($sTxt,'&'&$aisEntities[$i][1]&';',ChrW($aisEntities[$i][0]),0,1)
Next
EndFunc ;==>_HtmlEntities_Decode

Example:

#include "EncodeHtmlEntities.au3"
$txt="<Äöü>"
_HtmlEntities_Decode($txt)
MsgBox(0,"Decode",$txt)
_HtmlEntities_Encode($txt)
MsgBox(0,"Encode",$txt)

EncodeHtmlEntities.au3

EncodeHtmlEntities_Example.au3

  • Like 1

AutoIt-Syntaxsheme for Proton & Phase5 * Firefox Addons by me (resizable Textarea 0.1d) (docked JS-Console 0.1.1)

Share this post


Link to post
Share on other sites
Saruman

Adventurer,  Thanks! :thumbsup:

Usefull for my project.

Share this post


Link to post
Share on other sites
Starg

Nice UDF.

Thanks!

Share this post


Link to post
Share on other sites
dmob

Oh thank you, just had a need for this in my IMAP project.

Share this post


Link to post
Share on other sites
GPinzone

Minor bug with the double quote character. It gets translated to &quote; instead of ". Simple fix. Just move the entry for "&" to the beginning of the array.

Global Const $aisEntities[246][2]=[[34,'quot'],[38,'amp'],[39,'apos'],

becomes

Global Const $aisEntities[246][2]=[[38,'amp'],[34,'quot'],[39,'apos'],

BTW, as per http://www.w3.org/TR/xhtml1/#C_16 , the entry for the apostrophe should be removed. ' is not valid html.

 

 

Edited by GPinzone

Gerard J. Pinzonegpinzone AT yahoo.com

Share this post


Link to post
Share on other sites
NotAviable

I needed these functionalities, so I used them changing the code a little:

  • put change suggested by GPinzone about amp
  • added management of tags composed by "&#" + (utf code number) + ";", in decode function
  • added 2 functions, _Html_GetEncoded and _Html_GetDecoded: they return text encoded/decoded, do not change it using ByRef
Global Const $aisEntities[246][2]=[[34,'quot'],[38,'amp'],[60,'lt'],[62,'gt'],[160,'nbsp'],[161,'iexcl'],[162,'cent'],[163,'pound'],[164,'curren'],[165,'yen'],[166,'brvbar'],[167,'sect'],[168,'uml'],[169,'copy'],[170,'ordf'],[171,'laquo'],[172,'not'],[173,'shy'],[174,'reg'],[175,'macr'],[176,'deg'],[177,'plusmn'],[180,'acute'],[181,'micro'],[182,'para'],[183,'middot'],[184,'cedil'],[186,'ordm'],[187,'raquo'],[191,'iquest'],[192,'Agrave'],[193,'Aacute'],[194,'Acirc'],[195,'Atilde'],[196,'Auml'],[197,'Aring'],[198,'AElig'],[199,'Ccedil'],[200,'Egrave'],[201,'Eacute'],[202,'Ecirc'],[203,'Euml'],[204,'Igrave'],[205,'Iacute'],[206,'Icirc'],[207,'Iuml'],[208,'ETH'],[209,'Ntilde'],[210,'Ograve'],[211,'Oacute'],[212,'Ocirc'],[213,'Otilde'],[214,'Ouml'],[215,'times'],[216,'Oslash'],[217,'Ugrave'],[218,'Uacute'],[219,'Ucirc'],[220,'Uuml'],[221,'Yacute'],[222,'THORN'],[223,'szlig'],[224,'agrave'],[225,'aacute'],[226,'acirc'],[227,'atilde'],[228,'auml'],[229,'aring'],[230,'aelig'],[231,'ccedil'],[232,'egrave'],[233,'eacute'],[234,'ecirc'],[235,'euml'],[236,'igrave'],[237,'iacute'],[238,'icirc'],[239,'iuml'],[240,'eth'],[241,'ntilde'],[242,'ograve'],[243,'oacute'],[244,'ocirc'],[245,'otilde'],[246,'ouml'],[247,'divide'],[248,'oslash'],[249,'ugrave'],[250,'uacute'],[251,'ucirc'],[252,'uuml'],[253,'yacute'],[254,'thorn'],[255,'yuml'],[338,'OElig'],[339,'oelig'],[352,'Scaron'],[353,'scaron'],[376,'Yuml'],[402,'fnof'],[710,'circ'],[732,'tilde'],[913,'Alpha'],[914,'Beta'],[915,'Gamma'],[916,'Delta'],[917,'Epsilon'],[918,'Zeta'],[919,'Eta'],[920,'Theta'],[921,'Iota'],[922,'Kappa'],[923,'Lambda'],[924,'Mu'],[925,'Nu'],[926,'Xi'],[927,'Omicron'],[928,'Pi'],[929,'Rho'],[931,'Sigma'],[932,'Tau'],[933,'Upsilon'],[934,'Phi'],[935,'Chi'],[936,'Psi'],[937,'Omega'],[945,'alpha'],[946,'beta'],[947,'gamma'],[948,'delta'],[949,'epsilon'],[950,'zeta'],[951,'eta'],[952,'theta'],[953,'iota'],[954,'kappa'],[955,'lambda'],[956,'mu'],[957,'nu'],[958,'xi'],[959,'omicron'],[960,'pi'],[961,'rho'],[962,'sigmaf'],[963,'sigma'],[964,'tau'],[965,'upsilon'],[966,'phi'],[967,'chi'],[968,'psi'],[969,'omega'],[977,'thetasym'],[978,'upsih'],[982,'piv'],[8194,'ensp'],[8195,'emsp'],[8201,'thinsp'],[8204,'zwnj'],[8205,'zwj'],[8206,'lrm'],[8207,'rlm'],[8211,'ndash'],[8212,'mdash'],[8216,'lsquo'],[8217,'rsquo'],[8218,'sbquo'],[8220,'ldquo'],[8221,'rdquo'],[8222,'bdquo'],[8224,'dagger'],[8225,'Dagger'],[8226,'bull'],[8230,'hellip'],[8240,'permil'],[8242,'prime'],[8243,'Prime'],[8249,'lsaquo'],[8250,'rsaquo'],[8254,'oline'],[8260,'frasl'],[8364,'euro'],[8465,'image'],[8472,'weierp'],[8476,'real'],[8482,'trade'],[8501,'alefsym'],[8592,'larr'],[8593,'uarr'],[8594,'rarr'],[8595,'darr'],[8596,'harr'],[8629,'crarr'],[8656,'lArr'],[8657,'uArr'],[8658,'rArr'],[8659,'dArr'],[8660,'hArr'],[8704,'forall'],[8706,'part'],[8707,'exist'],[8709,'empty'],[8711,'nabla'],[8712,'isin'],[8713,'notin'],[8715,'ni'],[8719,'prod'],[8721,'sum'],[8722,'minus'],[8727,'lowast'],[8730,'radic'],[8733,'prop'],[8734,'infin'],[8736,'ang'],[8743,'and'],[8744,'or'],[8745,'cap'],[8746,'cup'],[8747,'int'],[8764,'sim'],[8773,'cong'],[8776,'asymp'],[8800,'ne'],[8801,'equiv'],[8804,'le'],[8805,'ge'],[8834,'sub'],[8835,'sup'],[8836,'nsub'],[8838,'sube'],[8839,'supe'],[8853,'oplus'],[8855,'otimes'],[8869,'perp'],[8901,'sdot'],[8968,'lceil'],[8969,'rceil'],[8970,'lfloor'],[8971,'rfloor'],[9001,'lang'],[9002,'rang'],[9674,'loz'],[9824,'spades'],[9827,'clubs'],[9829,'hearts'],[9830,'diams']]
Global Const $HE_DECODETYPE_Html  ="HTML"
Global Const $HE_DECODETYPE_Number="Number"

; #FUNCTION# ====================================================================================================================
; Name...........: _HtmlEntities_Encode
; Description ...: Replaces Html Entities with the reserved Chars.
; Syntax.........: _HtmlEntities_Encode(ByRef $sTxt)
; Parameters ....: $sTxt - Html Source to modify
; Return values .: Returns always 0
; Author ........: Raik
; Modified.......:
; Remarks .......:
; Related .......:
; Link ..........;
; Example .......; Yes
; ===============================================================================================================================
Func _HtmlEntities_Encode(ByRef $sTxt, $decodeType_IN = $HE_DECODETYPE_Html)
    Switch $decodeType_IN
        Case $HE_DECODETYPE_Html
            For $i=0 to 245
                $sTxt=StringReplace($sTxt, ChrW($aisEntities[$i][0]), '&' & $aisEntities[$i][1] & ';', 0, 0)
            Next
        Case $HE_DECODETYPE_Number
            For $i=0 to 245
                $sTxt=StringReplace($sTxt, ChrW($aisEntities[$i][0]), '#' & $aisEntities[$i][0] & ';', 0, 0)
            Next
    EndSwitch
EndFunc ;==>_HtmlEntities_Encode

; #FUNCTION# ====================================================================================================================
; Name...........: _HtmlEntities_Decode
; Description ...: Replaces reserved Chars with its Html Entities.
; Syntax.........: _HtmlEntities_Decode(ByRef $sTxt)
; Parameters ....: $sTxt - Html Source to modify
; Return values .: Returns always 0
; Author ........: Raik
; Modified.......:
; Remarks .......:
; Related .......:
; Link ..........;
; Example .......; Yes
; ===============================================================================================================================
Func _HtmlEntities_Decode(ByRef $sTxt)
    For $i=0 to 245
        $sTxt=StringReplace($sTxt, '&'  & $aisEntities[$i][1] & ';', ChrW($aisEntities[$i][0]), 0, 0)
        $sTxt=StringReplace($sTxt, '&#' & $aisEntities[$i][0] & ';', ChrW($aisEntities[$i][0]), 0, 0)
    Next
EndFunc ;==>_HtmlEntities_Decode

Func _Html_GetEncoded (ByRef $txt_IN, $decodeType_IN = $HE_DECODETYPE_Html)
  Local $txt_LOC=$txt_IN
  _HtmlEntities_Encode($txt_LOC, $decodeType_IN)
  Return $txt_LOC
EndFunc ;==>_HtmlEntities_Encode


Func _Html_GetDecoded(ByRef $txt_IN)
  Local $txt_LOC=$txt_IN
  _HtmlEntities_Decode($txt_LOC)
  Return $txt_LOC
EndFunc ;==>_HtmlEntities_Decode

 

 

Thank you,

Share this post


Link to post
Share on other sites

Create an account or sign in to comment

You need to be a member in order to leave a comment

Create an account

Sign up for a new account in our community. It's easy!

Register a new account

Sign in

Already have an account? Sign in here.

Sign In Now

×