#include "HTMLParser.au3" #include Func StringRepeat($sChar, $nCount) $tBuffer = DLLStructCreate("char[" & $nCount & "]") DllCall("msvcrt.dll", "ptr:cdecl", "memset", "ptr", DLLStructGetPtr($tBuffer), "int", Asc($sChar), "int", $nCount) Return DLLStructGetData($tBuffer, 1) EndFunc $sHTML = FileRead("prettyhtml.txt") $hFile = FileOpen("prettyhtml_output.txt", 2) $tTokenList = _HTMLParser2($sHTML) $iExtended = @extended $pItem = $tTokenList.First $iLevel = 0 While 1 _MemMoveMemory($pItem, $__g_pTokenListToken, $__g_iTokenListToken) Switch $__g_tTokenListToken.Type Case $__HTMLPARSERCONSTANT_TYPE_NONE FileClose($hFile) Exit MsgBox(0, "Ooops!", "Some unknown element found!"&@CRLF&"Closing script...") Case $__HTMLPARSERCONSTANT_TYPE_CDATA, $__HTMLPARSERCONSTANT_TYPE_COMMENT, $__HTMLPARSERCONSTANT_TYPE_DOCTYPE FileWrite($hFile, ($iLevel>0?StringRepeat(@TAB, $iLevel):"")&StringMid($sHTML, $__g_tTokenListToken.Start, $__g_tTokenListToken.Length)&@CRLF) Case $__HTMLPARSERCONSTANT_TYPE_STARTTAG FileWrite($hFile, ($iLevel>0?StringRepeat(@TAB, $iLevel):"")&StringMid($sHTML, $__g_tTokenListToken.Start, $__g_tTokenListToken.Length)&@CRLF) StringRegExp(StringMid($sHTML, $__g_tTokenListToken.Start, $__g_tTokenListToken.Length), "[/][>]$", 1) If @error<>0 Then $aRet = StringRegExp(StringMid($sHTML, $__g_tTokenListToken.Start, $__g_tTokenListToken.Length), "^[<]([0-9a-zA-Z]+)", 1) Switch $aRet[0] Case "area", "base", "br", "col", "embed", "hr", "img", "input", "link", "meta", "param", "source", "track", "wbr" ;void element Case Else $iLevel+=1 EndSwitch EndIf Case $__HTMLPARSERCONSTANT_TYPE_ENDTAG $iLevel-=1 FileWrite($hFile, ($iLevel>0?StringRepeat(@TAB, $iLevel):"")&StringMid($sHTML, $__g_tTokenListToken.Start, $__g_tTokenListToken.Length)&@CRLF) ;~ ConsoleWrite("?"&@CRLF) Case $__HTMLPARSERCONSTANT_TYPE_TEXT ;excluded for now ;~ ConsoleWrite(StringMid($sHTML, $__g_tTokenListToken.Start, $__g_tTokenListToken.Length)&@CRLF) StringRegExp(StringMid($sHTML, $__g_tTokenListToken.Start, $__g_tTokenListToken.Length), "^[\R\h\s]+$", 1) If @error<>0 Then $sTabs = ($iLevel>0?StringRepeat(@TAB, $iLevel):"") FileWrite($hFile, $sTabs&StringStripWS(StringRegExpReplace(StringReplace(StringMid($sHTML, $__g_tTokenListToken.Start, $__g_tTokenListToken.Length), @CRLF, @CRLF&$sTabs), "(^\R|\R$)", ""), 1+2)&@CRLF) EndIf EndSwitch $pItem = $__g_tTokenListToken.Next If $pItem = 0 Then ExitLoop WEnd FileClose($hFile) Func _HTMLParser2($sHTML);alternate parser, dealing with the tmp current