Jump to content

Recommended Posts

Hi everyone. I want to format the output of _INetGetSource to look nice and pretty. 

Example google.com source output: 

<!doctype html><html itemscope="" itemtype="http://schema.org/WebPage" lang="en"><head><meta content="Search the world's information, including webpages, images, videos and more. Google has many special features to help you find exactly what you're looking for." name="description"><meta content="noodp" name="robots"><meta content="text/html; charset=UTF-8" http-equiv="Content-Type"><meta content="/images/branding/googleg/1x/googleg_standard_color_128dp.png" itemprop="image"><title>Google</title><script>(function(){window.google={kEI:'DJtTWvCOI6WGjwSE9JrICg',kEXPI:'18167,1354277,1354916,1355218,1355675,1355793,1356171,1356806,1357219,1357326,3700304,3700519,3700521,4003510,4029815,4031109,4043492,4045841,4048347,4081038,4081164,4095909,4096834,4097153,4097195,4097922,4097929,4098733,4098740,4098752,4102237,4102827,4103475,4103845,4106084,4107914,4109316,4109490,4112770,4113217,4115697,4116349,4116724,4116731,4116926,4116927,4116935,4117980,4118798,4119032,4119034,4119036,4120285,4120286,4120660,4121175,4121518,4122511,4123830,4123850,4124091,4124850,4125837,4126202,4126754,4126869,4127262,4127418,4127473,4127744,4127863,4128586,4128622,4129001,4129520,4129556,4129633,4130362,4130783,4131247,4131834,4132956,4133114,4133509,4135025,4135088,4135249,4135934,4136073,4136092,4136137,4137597,4137646,4140792,4140849,4141281,4141707,4141915,4142071,4142328,4142420,4142443,4142503,4142678,4142729,4142829,4142834,4142847,4143278,4143527,4143902,4144442,4144550,4144704,4145074,4145075,4145082,4145088,4145461,4145485,4145622,4145688,4145713,4145836,4146146,4146183,4146874,4147032,4147043,4147096,4147443,4147800,4147951,4148257,4148304,4148436,4148498,4148573,6512220,10200083,10202524,10202562,15807763,19000288,19000423,19000427,19001999,19002287,19002288,19002366,19002548,19002880,19003321,19003323,19003325,19003326,19003328,19003329,19003330,19003407,19003408,19003409,19004309,19004516,19004517,19004518,19004519,19004520,19004521,19004531,19004656,19004668,19004670,19004692,41317155',authuser:0,kscs:'c9c918f0_DJtTWvCOI6WGjwSE9JrICg',u:'c9c918f0',kGL:'US'};google.kHL='en';})();(function(){google.lc=[];google.li=0;google.getEI=function(a){for(var b;a&&(!a.getAttribute||!(b=a.getAttribute("eid")));)a=a.parentNode;return b||google.kEI};google.getLEI=function(a){for(var b=null;a&&(!a.getAttribute||!(b=a.getAttribute("leid")));)a=a.parentNode;return b};google.https=function(){return"https:"==window.location.protocol};google.ml=function(){return null};google.wl=function(a,b){try{google.ml(Error(a),!1,b)}catch(d){}};google.time=function(){return(new Date).getTime()};google.log=function(a,b,d,c,g){if(a=google.logUrl(a,b,d,c,g)){b=new Image;var e=google.lc,f=google.li;e[f]=b;b.onerror=b.onload=b.onabort=function(){delete e[f]};google.vel&&google.vel.lu&&google.vel.lu(a);b.src=a;google.li=f+1}};google.logUrl=function(a,b,d,c,g){var e="",f=google.ls||"";d||-1!=b.search("&ei=")||(e="&ei="+google.getEI(c),-1==b.search("&lei=")&&(c=google.getLEI(c))&&(e+="&lei="+c));c="";!d&&google.cshid&&-1==b.search("&cshid=")&&(c="&cshid="+google.cshid);a=d||"/"+(g||"gen_204")+"?atyp=i&ct="+a+"&cad="+b+e+f+"&zx="+google.time()+c;/^http:/i.test(a)&&google.https()&&(google.ml(Error("a"),!1,{src:a,glmm:1}),a="");return a};}).call(this);(function(){google.y={};google.x=function(a,b){if(a)var c=a.id;else{do c=Math.random();while(google.y[c])}google.y[c]=[a,b];return!1};google.lm=[];google.plm=function(a){google.lm.push.apply(google.lm,a)};google.lq=[];google.load=function(a,b,c){google.lq.push([[a],b,c])};google.loadAll=function(a,b){google.lq.push([a,b])};}).call(this);google.f={};var a=window.location,b=a.href.indexOf("#");if(0<=b){var c=a.href.substring(b+1);/(^|&)q=/.test(c)&&-1==c.indexOf("#")&&a.replace("/search?"+c.replace(/(^|&)fp=[^&]*/g,"")+"&cad=h")};</script><style>#gbar,#guser{font-size:13px;padding-top:1px !important;}#gbar{height:22px}#guser{padding-bottom:7px !important;text-align:right}.gbh,.gbd{border-top:1px solid #c9d7f1;font-size:1px}.gbh{height:0;position:absolute;top:24px;width:100%}@media all{.gb1{height:22px;margin-right:.5em;vertical-align:top}#gbar{float:left}}a.gb1,a.gb4{text-decoration:underline !important}a.gb1,a.gb4{color:#00c !important}.gbi .gb4{color:#dd8e27 !important}.gbf .gb4{color:#900 !important}

But I want it outputted like this:

<!doctype html>
<html itemscope="" itemtype="http://schema.org/WebPage" lang="en">

<head>
    <meta content="Search the world's information, including webpages, images, videos and more. Google has many special features to help you find exactly what you're looking for." name="description">
    <meta content="noodp" name="robots">
    <meta content="text/html; charset=UTF-8" http-equiv="Content-Type">
    <meta content="/images/branding/googleg/1x/googleg_standard_color_128dp.png" itemprop="image">
    <title>Google</title>
    <script>
        (function() {
            window.google = {
                kEI: 'DJtsdfgWGjwSE9JrICg',
                kEXPI: '18167,1354277,1354916,1355218,1355675,1355793,1356171,1356806,1357219,1357326,37sdfg0304,3700519,3700521,4003510,4029815,4031109,4043492,4045841,4048347,4081038,4081164,4095909,4096834,4097153,4097195,4097922,4097929,4098733,4098740,4098752,4102237,4102827,4103475,4103845,4106084,4107914,4109316,4109490,4112770,4113217,4115697,4116349,4116724,4116731,4116926,4116927,4116935,4117980,4118798,4119032,4119034,4119036,4120285,4120286,4120660,4121175,4121518,4122511,4123830,4123850,4124091,4124850,4125837,4126202,4126754,4126869,4127262,4127418,4127473,4127744,4127863,4128586,4128622,4129001,4129520,4129556,4129633,4130362,4130783,4131247,4131834,413sdfg56,4133114,4133509,4135025,4135088,4135249,4135934,4136073,4136092,4136137,4137597,4137646,4140792,4140849,4141281,4141707,4141915,4142071,4142328,4142420,4142443,4142503,4142678,4142729,4142829,4142834,4142847,4143278,4143527,4143902,4144442,4144550,4144704,4145074,4145075,4145082,4145088,4145461,4145485,4145622,4145688,4145713,4145836,4146146,4146183,4146874,4147032,4147043,4147096,4147443,4147800,4147951,4148257,4148304,4148436,4148498,4148573,6512220,10200083,10202524,10202562,15807763,19000288,190sdfg23,19000427,19001999,19002287,19002288,19002366,19002548,19002880,19003321,19003323,19003325,19003326,19003328,19003329,19003330,19003407,19003408,19003409,19004309,19004516,19004517,19004518,19004519,19004520,19004521,19004531,19004656,19004668,19004670,19004692,41317155',
                authuser: 0,
                kscs: 'c9c918f0_DJtTWvCOI6WGjwSE9JrICg',
                u: 'c9c918f0',
                kGL: 'US'
            };
            google.kHL = 'en';
        })();
        
.......

I checked the forums and did not see any UDFs that allow for this. I see the Chilkat UDF but that only supports JSON. Any help would be greatly appreciated.

Link to post
Share on other sites

Hi @natedog102.

So i took a stab at it for 30mins, and got it to work with google html. (I was doing something related anyway, and i got to address a problem in my hTMLParser.au3 lib i can implement when i find a way to make it less messy)

the file you need to run in the same folder as the two other files is prettyhtml.au3

the html you need to parse, currently need to be in a file named: prettyhtml.txt

the output will be in the same folder and be named: prettyhtml_output.txt

Hope you can use it.

Btw. there might be some strange that can give you trouble still, and if you find them, be sure to let me know, i will appreciate it.

prettyhtml.au3

HTMLParser.au3

TokenList.au3

Edit: credit to @Zedna for the StringRepeat Function

Edited by genius257
Link to post
Share on other sites

Thanks for the quick response! If the HTML is already partially formatted, it doubles the whitespaces and returns. If the HTML contains javascript, it sometimes doesn't appear in the formatted text file. Same thing with CSS.

Hope that helps. Let me know if you want me to post any examples.

Link to post
Share on other sites
2 minutes ago, natedog102 said:

If the HTML is already partially formatted, it doubles the whitespaces and returns.

hmmm I imagine it might be an easy fix with StringStripWS(..., 1+2)

3 minutes ago, natedog102 said:

If the HTML contains javascript, it sometimes doesn't appear in the formatted text file. Same thing with CSS.

Hmmm i suspect it might be the cases i have a tough time testing for myself ^^ examples would be greatly appreciated :)

4 minutes ago, natedog102 said:

Hope that helps.

Oh yeah, it helps :) The more bugs i know of, the more i can try to improve it ^^

Link to post
Share on other sites

Hey @natedog102.

So here's the most i'll do on the script for now: prettyhtml.au3

What's missing that i know of without your special case examples, would be start tags without end tags. There's just too many for me to do without some kind of usage of the end product for me ^^, see https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission

The "An ... element's ... tag may be omitted if ..." cases are many and very specific for each case :)

Anyway i hope the updated script may help a little.

 

Link to post
Share on other sites
  • 2 years later...

@genius257

I'm actually seeing a problem with that prettyhtml code you nicely posted.  For some reason the code isn't clearing the memory each time it's run. I'm using it in a way where I keep regenerating the clean formatted html. How can I re-use the _Pretty() function without it appending and appending to the output file? See example here:

 

#include "HTMLParser.au3"
#include <Array.au3>

_Pretty()
MsgBox(0,"Notice","Make changes to prettyhml.txt then click OK")
_Pretty()



Func StringRepeat($sChar, $nCount)
    $tBuffer = DLLStructCreate("char[" & $nCount & "]")
    DllCall("msvcrt.dll", "ptr:cdecl", "memset", "ptr", DLLStructGetPtr($tBuffer), "int", Asc($sChar), "int", $nCount)
    Return DLLStructGetData($tBuffer, 1)
EndFunc

Func _Pretty()
   $sHTML = FileRead("prettyhtml.txt")
   $hFile = FileOpen("prettyhtml_output.txt", 2)
   $tTokenList = _HTMLParser2($sHTML)
   $iExtended = @extended
   $pItem = $tTokenList.First
   $iLevel = 0

   While 1
       _MemMoveMemory($pItem, $__g_pTokenListToken, $__g_iTokenListToken)
       Switch $__g_tTokenListToken.Type
           Case $__HTMLPARSERCONSTANT_TYPE_NONE
               FileClose($hFile)
               Exit MsgBox(0, "Ooops!", "Some unknown element found!"&@CRLF&"Closing script...")
           Case $__HTMLPARSERCONSTANT_TYPE_CDATA, $__HTMLPARSERCONSTANT_TYPE_COMMENT, $__HTMLPARSERCONSTANT_TYPE_DOCTYPE
               FileWrite($hFile, ($iLevel>0?StringRepeat(@TAB, $iLevel):"")&StringMid($sHTML, $__g_tTokenListToken.Start, $__g_tTokenListToken.Length)&@CRLF)
           Case $__HTMLPARSERCONSTANT_TYPE_STARTTAG
               FileWrite($hFile, ($iLevel>0?StringRepeat(@TAB, $iLevel):"")&StringMid($sHTML, $__g_tTokenListToken.Start, $__g_tTokenListToken.Length)&@CRLF)
               $iLevel+=1
           Case $__HTMLPARSERCONSTANT_TYPE_ENDTAG
               $iLevel-=1
               FileWrite($hFile, ($iLevel>0?StringRepeat(@TAB, $iLevel):"")&StringMid($sHTML, $__g_tTokenListToken.Start, $__g_tTokenListToken.Length)&@CRLF)
   ;~           ConsoleWrite("?"&@CRLF)
           Case $__HTMLPARSERCONSTANT_TYPE_TEXT
               ;excluded for now
   ;~           ConsoleWrite(StringMid($sHTML, $__g_tTokenListToken.Start, $__g_tTokenListToken.Length)&@CRLF)
               $sTabs = ($iLevel>0?StringRepeat(@TAB, $iLevel):"")
               FileWrite($hFile, $sTabs&StringReplace(StringMid($sHTML, $__g_tTokenListToken.Start, $__g_tTokenListToken.Length), @CRLF, @CRLF&$sTabs)&@CRLF)
       EndSwitch
       $pItem = $__g_tTokenListToken.Next
       If $pItem = 0 Then ExitLoop
   WEnd
   FileClose($hFile)
EndFunc ;==>_Pretty

Func _HTMLParser2($sHTML);alternate parser, dealing with the tmp current <script> content problem in _HTMLParser function
    Local $iExtended, $i, $aRet, $iOffset = 1, $iScript = 0
    While 1
        $aRet=StringRegExp($sHTML, "\G<![dD][oO][cC][tT][yY][pP][eE][\x{0009}\x{000A}\x{000C}\x{000D}\x{0020}]+[^>]*?>", 1, $iOffset)
        If @error=0 Then
            $iExtended = @extended
;~          ConsoleWrite(StringMid($sHTML, $iOffset, $iExtended-($iOffset))&@CRLF)
            If _TokenList_CreateToken($__HTMLPARSERCONSTANT_TYPE_DOCTYPE, $iOffset, $iExtended-($iOffset)) = 0 Then Return SetError(1, 1, 0)
            $iOffset = $iExtended
            ContinueLoop
        EndIf

        $aRet=StringRegExp($sHTML, "\G[<]([0-9a-zA-Z]+)(?:[\x{0009}\x{000A}\x{000C}\x{000D}\x{0020}]+[^\x{0000}-\x{001F}\x{007F}-\x{009F}\x{0020}\x{0022}\x{0027}\x{003E}\x{002F}\x{003D}\x{FDD0}\x{FDEF}\x{FFFE}\x{FFFF}\x{1FFFE}\x{1FFFF}\x{2FFFE}\x{2FFFF}\x{3FFFE}\x{3FFFF}\x{4FFFE}\x{4FFFF}\x{5FFFE}\x{5FFFF}\x{6FFFE}\x{6FFFF}\x{7FFFE}\x{7FFFF}\x{8FFFE}\x{8FFFF}\x{9FFFE}\x{9FFFF}\x{AFFFE}\x{AFFFF}\x{BFFFE}\x{BFFFF}\x{CFFFE}\x{CFFFF}\x{DFFFE}\x{DFFFF}\x{EFFFE}\x{EFFFF}\x{FFFFE}\x{FFFFF}\x{10FFFE}\x{10FFFF}]+(?:[\x{0009}\x{000A}\x{000C}\x{000D}\x{0020}]*[=][\x{0009}\x{000A}\x{000C}\x{000D}\x{0020}]*(?:[^\x{0009}\x{000A}\x{000C}\x{000D}\x{0020}\x{0022}\x{0027}\x{003D}\x{003C}\x{003E}\x{0060}]+|['][^']*[']|[""""][^""""]*[""""]))?)*[\x{0009}\x{000A}\x{000C}\x{000D}\x{0020}]*[/]?[>]", 1, $iOffset)
        If @error=0 Then
            $iExtended = @extended
;~          ConsoleWrite(StringMid($sHTML, $iOffset, $iExtended-($iOffset))&@CRLF)
            If _TokenList_CreateToken($__HTMLPARSERCONSTANT_TYPE_STARTTAG, $iOffset, $iExtended-($iOffset)) = 0 Then Return SetError(1, 2, 0)
            $iOffset = $iExtended
            If StringLower($aRet[0]) = "script" Then $iScript+=1
            ContinueLoop
        EndIf

        $aRet=StringRegExp($sHTML, "\G[<][/]([0-9a-zA-Z]+)[\x{0009}\x{000A}\x{000C}\x{000D}\x{0020}]*[>]", 1, $iOffset)
        If @error=0 Then
            $iExtended = @extended
;~          ConsoleWrite(StringMid($sHTML, $iOffset, $iExtended-($iOffset))&@CRLF)
            If _TokenList_CreateToken($__HTMLPARSERCONSTANT_TYPE_ENDTAG, $iOffset, $iExtended-($iOffset)) = 0 Then Return SetError(1, 3, 0)
            $iOffset = $iExtended
            If StringLower($aRet[0]) = "script" Then $iScript-=1
            ContinueLoop
        EndIf

        $aRet=StringRegExp($sHTML, "\G<!\[CDATA\[.*?\]\]>", 1, $iOffset)
        If @error=0 Then
            $iExtended = @extended
;~          ConsoleWrite(StringMid($sHTML, $iOffset, $iExtended-($iOffset))&@CRLF)
            If _TokenList_CreateToken($__HTMLPARSERCONSTANT_TYPE_CDATA, $iOffset, $iExtended-($iOffset)) = 0 Then Return SetError(1, 4, 0)
            $iOffset = $iExtended
            ContinueLoop
        EndIf

        $aRet=StringRegExp($sHTML, "\G<!--.*?-->", 1, $iOffset)
        If @error=0 Then
            $iExtended = @extended
;~          ConsoleWrite(StringMid($sHTML, $iOffset, $iExtended-($iOffset))&@CRLF)
            If _TokenList_CreateToken($__HTMLPARSERCONSTANT_TYPE_COMMENT, $iOffset, $iExtended-($iOffset)) = 0 Then Return SetError(1, 5, 0)
            $iOffset = $iExtended
            ContinueLoop
        EndIf

        $aRet=($iScript>0)?StringRegExp($sHTML, "(?s)\G.+?([<][/]script[>])", 1, $iOffset):StringRegExp($sHTML, "\G[^<]+", 1, $iOffset)
        If @error=0 Then
            $iExtended = @extended
            If $iScript>0 Then $iExtended-=StringLen($aRet[0])
            If _TokenList_CreateToken($__HTMLPARSERCONSTANT_TYPE_TEXT, $iOffset, $iExtended-($iOffset)) = 0 Then Return SetError(1, 6, 0)
            $iOffset = $iExtended
            ContinueLoop
        EndIf

        ExitLoop
    WEnd
    Return SetError(0, $iExtended, $__g_tTokenListList)
EndFunc


Thanks!

Link to post
Share on other sites

Hi @NassauSky

Your question seems confusing to me as you mention memory usage and output file as if they are the same?

Anyway the memory thing is pretty easy to explain :)

_HTMLParser2 uses the _TokenList_CreateToken. It in turn allocates memory that AutoIt does not clear automatically (via _MemGlobalAlloc). This is done to allow dllstruct references within dllstructs without holding a variable with all dllstructs to prevent AutoIt from freeing the memory when the  variable gets unset.

Also the tokenlist never get cleared, so i guess you were still talking about memory when you mentioned "appending" to the output, i guess?

Clearing the tokenList is easy, freeing the memory held by the tokenlist hovever requires a loop and some code:

Func TokenList_Clear()
    Local $tList = $__g_tTokenListList
    Local $tToken
    Local $hToken = $tList.First
    While Not $hToken = 0
        $tToken = DllStructCreate($tagTokenListToken, $hToken)
        $tToken = $tToken.Next
        _MemGlobalFree($hToken)
        $hToken = $tToken
    WEnd
    $tList.First = 0
    $tList.Last = 0
EndFunc

Just call it between _Pretty calls like this:

_Pretty()
TokenList_Clear()
_Pretty()

 

Hope this helps :)

Link to post
Share on other sites

@genius257

Jackpot that is the solution and I am sorry how I had explained it.  I was writing the formatted result of the string into an edit box so I knew it had nothing to do with the writing to the file.  I just provided that as a sample.   I figured it was some sort of memory allocation issue. I was interested in getting a better idea of what it does.  Are you saying that memory is allocated for particular DLL calls which otherwise would require you to load a larger set of DLLs into memory and using memalloc saves memory?

 

;--- Example provided by @genius257
;--- This working example takes an unformatted html file (prettyhtml.txt) and cleans it up to (prettyhtml.txt)
; This sample writes to a file but can be easily modified for other purposes

#include "HTMLParser.au3"
#include <Array.au3>

_Pretty()
MsgBox(0,"Notice","Make changes to prettyhml.txt then click OK")
TokenList_Clear() ;The resulting formatted html remains in memory until this is called
_Pretty()

Func StringRepeat($sChar, $nCount)
    $tBuffer = DLLStructCreate("char[" & $nCount & "]")
    DllCall("msvcrt.dll", "ptr:cdecl", "memset", "ptr", DLLStructGetPtr($tBuffer), "int", Asc($sChar), "int", $nCount)
    Return DLLStructGetData($tBuffer, 1)
EndFunc

Func _Pretty()
   $sHTML = FileRead("prettyhtml.txt")
   $hFile = FileOpen("prettyhtml.txt", 2)
   $tTokenList = _HTMLParser2($sHTML)
   $iExtended = @extended
   $pItem = $tTokenList.First
   $iLevel = 0

   While 1
       _MemMoveMemory($pItem, $__g_pTokenListToken, $__g_iTokenListToken)
       Switch $__g_tTokenListToken.Type
           Case $__HTMLPARSERCONSTANT_TYPE_NONE
               FileClose($hFile)
               Exit MsgBox(0, "Ooops!", "Some unknown element found!"&@CRLF&"Closing script...")
           Case $__HTMLPARSERCONSTANT_TYPE_CDATA, $__HTMLPARSERCONSTANT_TYPE_COMMENT, $__HTMLPARSERCONSTANT_TYPE_DOCTYPE
               FileWrite($hFile, ($iLevel>0?StringRepeat(@TAB, $iLevel):"")&StringMid($sHTML, $__g_tTokenListToken.Start, $__g_tTokenListToken.Length)&@CRLF)
           Case $__HTMLPARSERCONSTANT_TYPE_STARTTAG
               FileWrite($hFile, ($iLevel>0?StringRepeat(@TAB, $iLevel):"")&StringMid($sHTML, $__g_tTokenListToken.Start, $__g_tTokenListToken.Length)&@CRLF)
               $iLevel+=1
           Case $__HTMLPARSERCONSTANT_TYPE_ENDTAG
               $iLevel-=1
               FileWrite($hFile, ($iLevel>0?StringRepeat(@TAB, $iLevel):"")&StringMid($sHTML, $__g_tTokenListToken.Start, $__g_tTokenListToken.Length)&@CRLF)
   ;~           ConsoleWrite("?"&@CRLF)
           Case $__HTMLPARSERCONSTANT_TYPE_TEXT
               ;excluded for now
   ;~           ConsoleWrite(StringMid($sHTML, $__g_tTokenListToken.Start, $__g_tTokenListToken.Length)&@CRLF)
               $sTabs = ($iLevel>0?StringRepeat(@TAB, $iLevel):"")
               FileWrite($hFile, $sTabs&StringReplace(StringMid($sHTML, $__g_tTokenListToken.Start, $__g_tTokenListToken.Length), @CRLF, @CRLF&$sTabs)&@CRLF)
       EndSwitch
       $pItem = $__g_tTokenListToken.Next
       If $pItem = 0 Then ExitLoop
   WEnd
   FileClose($hFile)
EndFunc ;==>_Pretty

Func _HTMLParser2($sHTML);alternate parser, dealing with the tmp current <script> content problem in _HTMLParser function
    Local $iExtended, $i, $aRet, $iOffset = 1, $iScript = 0
    While 1
        $aRet=StringRegExp($sHTML, "\G<![dD][oO][cC][tT][yY][pP][eE][\x{0009}\x{000A}\x{000C}\x{000D}\x{0020}]+[^>]*?>", 1, $iOffset)
        If @error=0 Then
            $iExtended = @extended
;~          ConsoleWrite(StringMid($sHTML, $iOffset, $iExtended-($iOffset))&@CRLF)
            If _TokenList_CreateToken($__HTMLPARSERCONSTANT_TYPE_DOCTYPE, $iOffset, $iExtended-($iOffset)) = 0 Then Return SetError(1, 1, 0)
            $iOffset = $iExtended
            ContinueLoop
        EndIf

        $aRet=StringRegExp($sHTML, "\G[<]([0-9a-zA-Z]+)(?:[\x{0009}\x{000A}\x{000C}\x{000D}\x{0020}]+[^\x{0000}-\x{001F}\x{007F}-\x{009F}\x{0020}\x{0022}\x{0027}\x{003E}\x{002F}\x{003D}\x{FDD0}\x{FDEF}\x{FFFE}\x{FFFF}\x{1FFFE}\x{1FFFF}\x{2FFFE}\x{2FFFF}\x{3FFFE}\x{3FFFF}\x{4FFFE}\x{4FFFF}\x{5FFFE}\x{5FFFF}\x{6FFFE}\x{6FFFF}\x{7FFFE}\x{7FFFF}\x{8FFFE}\x{8FFFF}\x{9FFFE}\x{9FFFF}\x{AFFFE}\x{AFFFF}\x{BFFFE}\x{BFFFF}\x{CFFFE}\x{CFFFF}\x{DFFFE}\x{DFFFF}\x{EFFFE}\x{EFFFF}\x{FFFFE}\x{FFFFF}\x{10FFFE}\x{10FFFF}]+(?:[\x{0009}\x{000A}\x{000C}\x{000D}\x{0020}]*[=][\x{0009}\x{000A}\x{000C}\x{000D}\x{0020}]*(?:[^\x{0009}\x{000A}\x{000C}\x{000D}\x{0020}\x{0022}\x{0027}\x{003D}\x{003C}\x{003E}\x{0060}]+|['][^']*[']|[""""][^""""]*[""""]))?)*[\x{0009}\x{000A}\x{000C}\x{000D}\x{0020}]*[/]?[>]", 1, $iOffset)
        If @error=0 Then
            $iExtended = @extended
;~          ConsoleWrite(StringMid($sHTML, $iOffset, $iExtended-($iOffset))&@CRLF)
            If _TokenList_CreateToken($__HTMLPARSERCONSTANT_TYPE_STARTTAG, $iOffset, $iExtended-($iOffset)) = 0 Then Return SetError(1, 2, 0)
            $iOffset = $iExtended
            If StringLower($aRet[0]) = "script" Then $iScript+=1
            ContinueLoop
        EndIf

        $aRet=StringRegExp($sHTML, "\G[<][/]([0-9a-zA-Z]+)[\x{0009}\x{000A}\x{000C}\x{000D}\x{0020}]*[>]", 1, $iOffset)
        If @error=0 Then
            $iExtended = @extended
;~          ConsoleWrite(StringMid($sHTML, $iOffset, $iExtended-($iOffset))&@CRLF)
            If _TokenList_CreateToken($__HTMLPARSERCONSTANT_TYPE_ENDTAG, $iOffset, $iExtended-($iOffset)) = 0 Then Return SetError(1, 3, 0)
            $iOffset = $iExtended
            If StringLower($aRet[0]) = "script" Then $iScript-=1
            ContinueLoop
        EndIf

        $aRet=StringRegExp($sHTML, "\G<!\[CDATA\[.*?\]\]>", 1, $iOffset)
        If @error=0 Then
            $iExtended = @extended
;~          ConsoleWrite(StringMid($sHTML, $iOffset, $iExtended-($iOffset))&@CRLF)
            If _TokenList_CreateToken($__HTMLPARSERCONSTANT_TYPE_CDATA, $iOffset, $iExtended-($iOffset)) = 0 Then Return SetError(1, 4, 0)
            $iOffset = $iExtended
            ContinueLoop
        EndIf

        $aRet=StringRegExp($sHTML, "\G<!--.*?-->", 1, $iOffset)
        If @error=0 Then
            $iExtended = @extended
;~          ConsoleWrite(StringMid($sHTML, $iOffset, $iExtended-($iOffset))&@CRLF)
            If _TokenList_CreateToken($__HTMLPARSERCONSTANT_TYPE_COMMENT, $iOffset, $iExtended-($iOffset)) = 0 Then Return SetError(1, 5, 0)
            $iOffset = $iExtended
            ContinueLoop
        EndIf

        $aRet=($iScript>0)?StringRegExp($sHTML, "(?s)\G.+?([<][/]script[>])", 1, $iOffset):StringRegExp($sHTML, "\G[^<]+", 1, $iOffset)
        If @error=0 Then
            $iExtended = @extended
            If $iScript>0 Then $iExtended-=StringLen($aRet[0])
            If _TokenList_CreateToken($__HTMLPARSERCONSTANT_TYPE_TEXT, $iOffset, $iExtended-($iOffset)) = 0 Then Return SetError(1, 6, 0)
            $iOffset = $iExtended
            ContinueLoop
        EndIf

        ExitLoop
    WEnd
    Return SetError(0, $iExtended, $__g_tTokenListList)
 EndFunc

 Func TokenList_Clear()
    Local $tList = $__g_tTokenListList
    Local $tToken
    Local $hToken = $tList.First
    While Not $hToken = 0
        $tToken = DllStructCreate($tagTokenListToken, $hToken)
        $tToken = $tToken.Next
        _MemGlobalFree($hToken)
        $hToken = $tToken
    WEnd
    $tList.First = 0
    $tList.Last = 0
EndFunc

 

Link to post
Share on other sites

Hi @NassauSky :)

4 hours ago, NassauSky said:

Jackpot that is the solution and I am sorry how I had explained it.

No problem :) i was just not sure if i understood you correctly ;)

4 hours ago, NassauSky said:

I figured it was some sort of memory allocation issue. I was interested in getting a better idea of what it does.  Are you saying that memory is allocated for particular DLL calls which otherwise would require you to load a larger set of DLLs into memory and using memalloc saves memory?

Ah :)Well no, DllStructCreate and DllCall is not necessarily always used together. To tokinize the input HTML text i use a simple linked list approach. AutoIt did not have any dynamic array that i am aware of, so i used my own LinkedList solution (Singly Linked List to be specific).

The DllStruct is used to read and write to the linked list nodes, but if no AutoIt variable contains the DllStruct created, then the address gets freed and will most likely contain garbage data from usage by other parts of the AutoIt process. Two solutions to keep the data:

one: keep all DllStruct's in a array that may need to be expanded multiple times, to keep all the DllStruct's reserved in memory.

two: allocate memory and manage by yourself. This has it's good sides and bad sides. I just like it, because one DllStruct can have a PTR pointing to the next node in the list, without having to store that reference by name anywhere. This CAN be better for performance, if done right :) (I am a flawed person, so my code could be optimized in many ways, but I'm lazy).

 

example:

$tDllStruct = DllStructCreate("char[5]")

DllStructSetData($tDllStruct, 1, "hello")

MsgBox(0, "directly", DllStructGetData($tDllStruct, 1))

$pDllStruct = DllStructGetPtr($tDllStruct)

MsgBox(0, "via pointer", DllStructGetData(DllStructCreate("char[5]", $pDllStruct), 1))

$tDllStruct = 0 ; freeing the dllstruct from memory

ConsoleWrite("1")
ConsoleWrite(1+2-3*4/5)

MsgBox(0, "via pointer, but no longer the correct data", DllStructGetData(DllStructCreate("char[5]", $pDllStruct), 1))

 

I hope some of my ramblings made sense. If not, you are more than welcome to ask more, if you want :D

Link to post
Share on other sites

@genius257 haha thanks. You've got me thinking a bit deeper now but forgive me I haven't worked with stacks in a while, I'm not sure what the advantage is yet of using that method rather than something maybe easier like:

 

;https://www.autoitscript.com/forum/topic/117057-how-to-define-pointer-variable-in-autoit/

;Would this make it simpler (Dymanic Array Resizing)?
Local $aArray
Dim $aArray[1]
MsgBox(0,"Array Length", UBound($aArray))
ReDim $aArray[4]
MsgBox(0,"Array Length", UBound($aArray))
ReDim $aArray[2]
MsgBox(0,"Array Length", UBound($aArray))
$aArray=""
MsgBox(0,"Array Length", UBound($aArray))

;Your example not using a pointer
Dim $aArray[5]
$aArray[0] = "hello"
MsgBox(0, "Directly Addressing", $aArray[0])
$aArray=""

;or a stacked list used this way
#include <array.au3>
Local $aList[3] = ["element 1", "element 2", "element 3"]
MsgBox(0,"Current Value", _ArrayPop($aList))
MsgBox(0,"Current Value", _ArrayPop($aList))
MsgBox(0,"Current Value", _ArrayPop($aList))
If UBound($aList)=0 Then ReDim $aList[1] ;If adding over again when the array is gone
_ArrayPush($aList, "new element 1")
MsgBox(0,"Current Value", _ArrayPop($aList))

 

Make sense or is it really deeper than that?

 

Link to post
Share on other sites

@NassauSky Sorry for the delay.

Using it for the HTML pretty print, does not play to the strengths.

When using ReDim, new memory is allocated for the entire new size of the new array and the data is then copied over. Finally the old array memory is freed.

This makes sense for changes to the end of the array, but if you need to make changes at the start or middle, you need a loop to iterate over the entire loop (AutoIt is not the fastest for this). Linked Lists however are very good at changes anywhere in the list, as the next pointer simply need to be altered.

Then again, Arrays are better for random index access, as each index is easy to find based on the starting ptr + item size * index, compared to the Linked List where you need to run through each element, until the desired index is reached.

I choose Linked List to not waste time on re-allocating the same array again and again during generation. And for future features planned with the HTML parser. Performance wise i did not do much testing. I expect if only generation and read is needed, a dynamic array implementation (instead of the default AutoIt static array) would be best. But then again it's mostly relevant with bigger data-sets.

I have a newer(, yet still messy) version of the HTML Parser here, if you're ever interested.

Link to post
Share on other sites

Create an account or sign in to comment

You need to be a member in order to leave a comment

Create an account

Sign up for a new account in our community. It's easy!

Register a new account

Sign in

Already have an account? Sign in here.

Sign In Now
  • Recently Browsing   0 members

    No registered users viewing this page.

  • Similar Content

    • By Hermes
      Hi, I have a site that has the following elements below: 
      <div>More element here</div> <div>More element here</div> <div>More element here</div> When I do this in Auto It:
      Local $oSelectDiv = _WD_FindElement($sSession, $_WD_LOCATOR_ByCSSSelector, "div") _WD_HighlightElement($sSession, $oSelectDiv, 1) I also tried to add [3], but it doesnt seems to work:
      Local $oSelectDiv = _WD_FindElement($sSession, $_WD_LOCATOR_ByCSSSelector, "div[3]") _WD_HighlightElement($sSession, $oSelectDiv, 1) It always highlight the first one, but I am trying to highlight the 3rd in the list. Is there anyway to select the 3rd div without having to add any class/id in the divs, and without using XPATH? The structure of the elements in that site were built that way.
    • By Pured
      I am looking to create a script which refreshes/reads a webpage every few seconds. My goal is to see if the page has changed, then I will send myself a notification that the webpage has been updated.
       
      However, rather than downloading the entire webpage every single time, is there a way to check when the webpage last updated?
       
      If not, is there away to partially download/read html source until a specific tag is hit?
       
      Goal: I would like to increase my poll rate and not excessively waste data.
    • By Mr_Microphone
      Alright, I may be an idiot.
      Three years ago, I wrote a program that pushed component information to a secure site via their API. I went back to add some attributes and (here's the idiot part) ended up losing the  source code and my modified code does not quite work. I have the compiled version that works minus the new attributes, so I know that their system has not changed. I stripped the larger program down from 3,000 lines to the part that is broken, but I am stumped. This was one of my first scripts, so it heavily leverages examples and isn't as pretty as I'd like it to be.
      Be gentle. 
      The program / script creates a new records as expected, but for some reason, I cannot access information in the response, which I need for a later step.
      I use Charles, a web debugging proxy tool so I can see the request and the response and both are as expected. Also, when I write to log file, the JSON reply is exactly what I expect and need, but when I try to do anything with the http body, it seems to be blank. 
      Here is the script minus  the URL and token:
      #include <Array.au3> #include <Curl.au3> #include <MsgBoxConstants.au3> #include <json.au3>  ; this was added as an alternate way to read the data Global $WM_serial_number = "WM20745001" Global $wm_component_status_id = "10" Global $wm_manufacturer ="Multi-Tech" Global $wm_model = "MTR-LAT1-B07" Global $cellular_carrier_id = "3" Global $iccid_esn = "89010303300012345678" Global $ip_address = "192.168.2.11" Global $NewIDNumber     Local $Curl = Curl_Easy_Init()     Local $Html = $Curl ; any number as identify     Local $Header = $Curl + 1 ; any number as identify     Local $HtmlFile = "cURL_Request.html"     Local $File = FileOpen($HtmlFile, 2 + 16)     Local $Slist = Curl_Slist_Append(0, "content-type: multipart/form-data; boundary=---011000010111000001101001")     $Slist = Curl_Slist_Append($Slist, "authorization: Token token=" & $Token)     Curl_Easy_Setopt($Curl, $CURLOPT_PROXY, "127.0.0.1") ; needed to use Charles web debugging proxy     Curl_Easy_Setopt($Curl, $CURLOPT_PROXYPORT, 8888) ; needed to use Charles     Curl_Easy_Setopt($Curl, $CURLOPT_HTTPHEADER, $Slist) ;     Curl_Easy_Setopt($Curl, $CURLOPT_URL, $Server & "wireless_module" & "s")     Curl_Easy_Setopt($Curl, $CURLOPT_SSL_VERIFYPEER, 0)     Curl_Easy_Setopt($Curl, $CURLOPT_TIMEOUT, 30)     Curl_Easy_Setopt($Curl, $CURLOPT_WRITEDATA, $Html)     Curl_Easy_Setopt($Curl, $CURLOPT_WRITEFUNCTION, Curl_FileWriteCallback())     Curl_Easy_Setopt($Curl, $CURLOPT_WRITEDATA, $File)     Local $HttpPost = ""     Local $LastItem = ""         Curl_FormAdd($HttpPost, $LastItem, $CURLFORM_COPYNAME, "wireless_module" & "[serial_number]", $CURLFORM_COPYCONTENTS, $WM_serial_number, $CURLFORM_END)         Curl_FormAdd($HttpPost, $LastItem, $CURLFORM_COPYNAME, "wireless_module" & "[component_status_id]", $CURLFORM_COPYCONTENTS, $wm_component_status_id, $CURLFORM_END)         Curl_FormAdd($HttpPost, $LastItem, $CURLFORM_COPYNAME, "wireless_module" & "[manufacturer]", $CURLFORM_COPYCONTENTS, $wm_manufacturer, $CURLFORM_END)         Curl_FormAdd($HttpPost, $LastItem, $CURLFORM_COPYNAME, "wireless_module" & "[model]", $CURLFORM_COPYCONTENTS, $wm_model, $CURLFORM_END)         Curl_FormAdd($HttpPost, $LastItem, $CURLFORM_COPYNAME, "wireless_module" & "[cellular_carrier_id]", $CURLFORM_COPYCONTENTS, $cellular_carrier_id, $CURLFORM_END)         Curl_FormAdd($HttpPost, $LastItem, $CURLFORM_COPYNAME, "wireless_module" & "[iccid_esn]", $CURLFORM_COPYCONTENTS, $iccid_esn, $CURLFORM_END)         Curl_FormAdd($HttpPost, $LastItem, $CURLFORM_COPYNAME, "wireless_module" & "[ip_address]", $CURLFORM_COPYCONTENTS, $ip_address, $CURLFORM_END)         ; submit         Curl_Easy_Setopt($Curl, $CURLOPT_HTTPPOST, $HttpPost)         Local $Code = Curl_Easy_Perform($Curl)         If $Code = $CURLE_OK Then         ConsoleWrite("Content Type: " & Curl_Easy_GetInfo($Curl, $CURLINFO_CONTENT_TYPE) & @LF)         ConsoleWrite("Download Size: " & Curl_Easy_GetInfo($Curl, $CURLINFO_SIZE_DOWNLOAD) & @LF)         MsgBox(0, 'Html', BinaryToString(Curl_Data_Get($Html))) ; this is something I threw in for debugging, expecting to see SOMETHING. Returns nothing         MsgBox(0, 'Header', BinaryToString(Curl_Data_Get($Header))) ; this is something I threw in for debugging, expecting to see SOMETHING. Returns nothing         Local $response = Curl_Easy_GetInfo($Curl, $CURLINFO_RESPONSE_CODE)             If $response = "409" Then $response = "Failed due to a conflict."             If $response = "200" Then $response = "Was NOT created."             If $response = "201" Then $response = "Was created."             ; read the ID that was assigned and store it         $NewIDNumber = StringRight(StringLeft(BinaryToString(Curl_Data_Get($Html)), 10), 4) ; this DID work, but now it doesn't. An old compiled version still works ;~         Global $JsonObject = json_decode($Html); another debugging attempt. Did not use json functions previously and the program worked without it. ;~         Global $NewIDNumber = json_get($JsonObject, '.id')         ConsoleWrite(@CRLF &'! id:' & $NewIDNumber & @CRLF & @CRLF)    ; debugging feedback         MsgBox(0, $response, $wm_serial_number & " new ID = " & $NewIDNumber); debugging feedback         If $Code <> $CURLE_OK Then ConsoleWrite(Curl_Easy_StrError($Code) & @LF)             Local $Data = BinaryToString(Curl_Data_Get($Curl))             Curl_Easy_Cleanup($Curl)             Curl_Data_Cleanup($Curl)             Curl_Data_Cleanup($Header)             Curl_Data_Cleanup($Html)             Curl_FormFree($HttpPost)             Curl_slist_free_all($Slist)             curl_easy_reset($Curl)             FileClose($File)             ConsoleWrite(@LF)         EndIf  This is the captured request (minus the host and token)
      POST /api/v2/wireless_modules HTTP/1.1 Host: api. Accept: */* authorization: Token token= Content-Length: 942 Expect: 100-continue content-type: multipart/form-data; boundary=---011000010111000001101001; boundary=------------------------9adb0d87c7ea5061 --------------------------9adb0d87c7ea5061 Content-Disposition: form-data; name="wireless_module[serial_number]" WM20745001 --------------------------9adb0d87c7ea5061 Content-Disposition: form-data; name="wireless_module[component_status_id]" 10 --------------------------9adb0d87c7ea5061 Content-Disposition: form-data; name="wireless_module[manufacturer]" Multi-Tech --------------------------9adb0d87c7ea5061 Content-Disposition: form-data; name="wireless_module[model]" MTR-LAT1-B07 --------------------------9adb0d87c7ea5061 Content-Disposition: form-data; name="wireless_module[cellular_carrier_id]" 3 --------------------------9adb0d87c7ea5061 Content-Disposition: form-data; name="wireless_module[iccid_esn]" 89010303300012345678 --------------------------9adb0d87c7ea5061 Content-Disposition: form-data; name="wireless_module[ip_address]" 192.168.2.11 --------------------------9adb0d87c7ea5061-- and the captured response
      HTTP/1.1 201 Created Date: Sun, 04 Apr 2021 00:12:18 GMT Server: Apache Cache-Control: max-age=0, private, must-revalidate Access-Control-Allow-Origin: not-allowed Vary: Accept-Encoding Access-Control-Max-Age: 1728000 X-XSS-Protection: 1; mode=block X-Request-Id: 71cfcf36-6020-48a6-a822-d2b393a27b69 Access-Control-Allow-Credentials: true Access-Control-Allow-Methods: PUT, OPTIONS, GET, POST ETag: W/"25d97fe8a9387cb4b9029a9e62b0bfa2" X-Frame-Options: SAMEORIGIN, SAMEORIGIN X-Runtime: 0.344005 X-Content-Type-Options: nosniff Access-Control-Request-Method: * X-Powered-By: Phusion Passenger 5.2.1 Strict-Transport-Security: max-age=63072000; includeSubDomains; preload Location: /wireless_modules/3195 Status: 201 Created Connection: close Transfer-Encoding: chunked Content-Type: application/json; charset=utf-8 X-Charles-Received-Continue: HTTP/1.1 100 Continue {"id":3195,"model":"MTR-LAT1-B07","serial_number":"WM20745001","manufacturer":"Multi-Tech","mfg_date":null,"iccid_esn":"89010303300012345678","ip_address":"192.168.2.11","purchase_order":null,"supplier":null,"cellular_carrier_id":3,"component_status_id":10,"component_status":{"id":10,"name":"Hold","description":"Available- Held for specific use"},"custom_attributes":[{"name":"Deactivated","type":"Boolean","value":false},{"name":"Port 3001","type":"Boolean","value":false}],"comments":[]}  
      Also attached is the log file. I need to read the id value. Clearly, it is arriving back to cURL, since it is being written out to the log, but I cannot seem to get to it within the code. 
      It is established that I may be an idiot, but this idiot has wasted days in non-billable hours trying to figure out what should be a simple glitch.
      Help???
       
      cURL_Request.html
    • By Hermes
      I have an html table that displays data along with an excel spreadsheet that has the same data as the html table. I am wanting to only match the Title column in my html table with the Title column in my Excel spreadsheet. If the titles match, click on the Edit hyperlink and continue to loop to next row. The issue I'm experience is its not matching correctly. So far  i've written the codes below:
      <table border="1" class="test"> <tr> <th> UniqueID</th> <th> Title</th> <th> UserID</th> <th> Address</th> <th> Gender </th> </tr> <tr> <td> 1 </td> <td> Title1 </td> <td> 12345 </td> <td> Manila </td> <td> <span> Male </span> </td> </tr> <tr> <td align="center" colspan="5"> <a href="#" class="testlink">Edit</a> </td> </tr> <tr> <td> 2 </td> <td> Title2 </td> <td> 67891 </td> <td> Valenzuela </td> <td> <span> Female </span> </td> </tr> <tr> <td align="center" colspan="5" > <a href="#" class="testlink">Edit</a> </td> </tr> <tr> <td> 3 </td> <td> Title3 </td> <td> 88888 </td> <td> Ohio </td> <td> <span> Male </span> </td> </tr> <tr> <td align="center" colspan="5" > <a href="#" class="testlink">Edit</a> </td> </tr> <tr> <td> 4 </td> <td> Title4 </td> <td> 77777 </td> <td> California </td> <td> <span> Female </span> </td> </tr> <tr> <td align="center" colspan="5" > <a href="#" class="testlink">Edit</a> </td> </tr> <tr> <td> 5 </td> <td> Title5 </td> <td> 33333 </td> <td> Arizona </td> <td> <span> Male </span> </td> </tr> <tr> <td align="center" colspan="5" > <a href="#" class="testlink">Edit</a> </td> </tr> </table> #Include "Chrome.au3" #Include "wd_core.au3" #Include "wd_helper.au3" #Include "Excel.au3" #Include "_HtmlTable2Array.au3" #Include "Array.au3" Local $sDesiredCapabilities, $sSession SetupChrome() _WD_Startup() $sSession = _WD_CreateSession($sDesiredCapabilities) _WD_LoadWait($sSession) _WD_Navigate($sSession, "index.html") Sleep(6000) Local $oExcel = _Excel_Open() Local $oWorkbook = _Excel_BookOpen($oExcel, "test.xlsx") ; Get the table element $sElement = _WD_FindElement($sSession, $_WD_LOCATOR_ByXPath, "//table[@class='test']") ; Retrieve HTML $sHTML = _WD_ElementAction($sSession, $sElement, "Property", "outerHTML") ;Local $aTable = _HtmlTableGetWriteToArray($sHTML) Local $aArray1 = _Excel_RangeRead($oWorkbook,1,$oWorkbook.ActiveSheet.Usedrange.Columns("B:B")) Local $aArray2 = _HtmlTableGetWriteToArray($sHTML) ;_ArrayDisplay($aArray1) ;_ArrayDisplay($aArray2) For $i = UBound($aArray1) - 1 To 0 step - 1 For $j = UBound($aArray2) - 1 to 0 step - 1 If $aArray1[$i][1] == $aArray2[$j][1] Then _WD_WaitElement($sSession, $_WD_LOCATOR_ByXPath, "//a[contains(@class,'testlink') or contains(text(),'Edit')]") $test1 = _WD_FindElement($sSession, $_WD_LOCATOR_ByXPath, "//a[contains(@class,'testlink') or contains(text(),'Edit')]") _WD_ElementAction($sSession, $test1, 'click') ;_ArrayDisplay($aArray1) ;_ArrayDelete($aArray1 , $i) ;exitloop EndIf Next Next _WD_Shutdown() Func SetupChrome() _WD_Option('Driver', 'chromedriver.exe') _WD_Option('Port', 9515) _WD_Option('DriverParams', '--log-path="' & @ScriptDir & '\chrome.log"') $sDesiredCapabilities = '{"capabilities": {"alwaysMatch": {"goog:chromeOptions": {"w3c": true, "args":["start-maximized","disable-infobars"]}}}}' EndFunc ;==>SetupChrome Would appreciate if anyone can provide tips, or point me in the right direction in doing it.
       
      test.xlsx
    • By Zaoka
      I'm trying to create script that would login on my SAP WEBI "web page" and replace some uploaded excel files. But i'm having problem on even the log in.
      These are user fill elements with Firefox Inspector
      <input type="text" id="_id0:logon:CMS" name="_id0:logon:CMS"> <input type="text" id="_id0:logon:USERNAME" name="_id0:logon:USERNAME"> <input type="password" id="_id0:logon:PASSWORD" name="_id0:logon:PASSWORD"> LogOnButton
      <input type="submit" id="_id0:logon:logonButton" value="Log On" class="logonButtonNoHover logon_button_no_hover" onmouseover="this.className = 'logonButtonHover logon_button_hover';" onmouseout="this.className = 'logonButtonNoHover logon_button_no_hover';">  
       
      My example, I tried _IEGetObjByName and _IEGetObjById but the script only open page but will not even fill out the form
        #include <IE.au3> Call("signIn") Func signIn() Global $oIE = _IECreate("http://192.xxx.xxx.xx:xxx/BOE/BI") Sleep (5000) Local $server = _IEGetObjByName($oIE, "_id0:logon:CMS") Local $username = _IEGetObjByName($oIE, "_id0:logon:USERNAME") Local $password = _IEGetObjByName($oIE, "_id0:logon:PASSWORD") Local $button = _IEGetObjById($oIE, "_id0:logon:logonButton") _IEFormElementSetValue($server, "TestServer:1000") _IEFormElementSetValue($username, "MyUserName") _IEFormElementSetValue($password, "MyPass") ; THIS PART ABOUT BUTTON CLICK I DONT UNDERSTAND AT ALL $sSelector = "body > div:nth-of-type(2) > div > div:nth-of-type(3) > div > table > tbody > tr > td > table > tbody > tr > td > table > tbody > tr:nth-of-type(1) > td > div > div:nth-of-type(1) > form > div:nth-of-type(5) > button" $signInUC = $oIE.document.QuerySelector($sSelector) _IEAction($signInUC, "click") EndFunc ;==>signIn  
      I would be very grateful for any help
×
×
  • Create New...