Champak Posted March 11, 2022 Share Posted March 11, 2022 Just sharing in case anyone else found a need. I was looking for something to generate my image cache and webp images on my site and oddly enough couldn't find anything simple or didn't need server access, then I realized I was overcomplicating things and could just generate a sitemap and make an app to crawl it. I have only tested this on win 10 64bit, but it's simple and should work in any environment. You will need chrome driver and the UDF for it. The "URL Element" just put in whatever it is without the "< >". expandcollapse popup#include <String.au3> #include <Array.au3> #include <GUIConstantsEx.au3> #include <StaticConstants.au3> #include <WindowsConstants.au3> ; non standard UDF's #include "wd_helper.au3" #include "wd_capabilities.au3" Global $sSession Global $sDesiredCapabilities = Call(SetupChrome) Global $_WD_DEBUG = $_WD_DEBUG_None Global $sFile = RegRead("HKEY_CURRENT_USER\Software\Crawler", "CrawlFile") Global $sSite = RegRead("HKEY_CURRENT_USER\Software\Crawler", "CrawlSite") Opt('GUICloseOnESC', 0) GUICreate("Crawler", 355, 130, -1, -1, -1, $WS_EX_ACCEPTFILES) GUISetFont (10) $gFileL = GUICtrlCreateLabel("File:", 10, 13, 30, 20) $gFile = GUICtrlCreateInput($sFile, 95, 10, 200, 25) GUICtrlSetState($gFile, $GUI_DROPACCEPTED) $gBetweenL = GUICtrlCreateLabel("URL Element:", 10, 43, 95, 20) $gSite = GUICtrlCreateInput($sSite, 95, 40, 200, 25) $gBetweeni = GUICtrlCreateLabel("< ??? >", 300, 43, 55, 20) $gSpeed = GUICtrlCreateCheckbox("Speed", 10, 70) $gExperimentali = GUICtrlCreateLabel("(Experimental: Does not wait on Load)", 75, 74, 230, 20) ;GUICtrlSetBkColor(-1, 0xff0000) $gCrawl = GUICtrlCreateButton("Crawl", 10, 95) $gFrame = GUICtrlCreateLabel("dds", 49, 96, 295, 27, $SS_ETCHEDFRAME) $gProgress = GUICtrlCreateProgress(50, 97, 292, 24) DllCall("UxTheme.dll", "int", "SetWindowTheme", "hwnd", GUICtrlGetHandle(-1), "wstr", " ", "wstr", " ") GUICtrlSetStyle(-1, 1) $gPages = GUICtrlCreateLabel("", 49, 96, 295, 26, BitOR($SS_CENTER, $SS_CENTERIMAGE), $WS_EX_TOPMOST) GUICtrlSetBkColor(-1, $GUI_BKCOLOR_TRANSPARENT) GUISetState() While 1 $ID = GUIGetMsg() Switch $ID Case $gCrawl _Crawl() Case $GUI_EVENT_CLOSE Exit EndSwitch Sleep(20) WEnd Func _Crawl() Local $Diff RegWrite("HKEY_CURRENT_USER\Software\Crawler", "CrawlFile", "REG_SZ", GUICtrlRead($gFile)) RegWrite("HKEY_CURRENT_USER\Software\Crawler", "CrawlSite", "REG_SZ", GUICtrlRead($gSite)) $bixcontent=FileOpen(GUICtrlRead($gFile)) $bixcontent=FileRead($bixcontent) $SitePages=_StringBetween($bixcontent, "<" & GUICtrlRead($gSite) & ">", "</" & GUICtrlRead($gSite) & ">" ) ;_ArrayDisplay($SitePages,"") ;Exit _WD_Startup() If @error <> $_WD_ERROR_Success Then MsgBox(0,9,@error) $sSession = _WD_CreateSession($sDesiredCapabilities) ;ConsoleWrite("Session: " & $sSession & " - " & @ScriptLineNumber & @CRLF) ConsoleWrite("! Pages to scan: " & UBound($SitePages) & @CRLF) GUICtrlSetData($gPages, UBound($SitePages) & " Pages.") For $i = 0 To UBound($SitePages) -1 ;$Timer = TimerInit() _WD_Navigate($sSession, $SitePages[$i]) ;Experimental, may not completely work in generating everything on the page on the speed side If GUICtrlRead($gSpeed) = $GUI_CHECKED Then _WD_LoadWait($sSession, 250, 6000) Else Sleep(10) EndIf ;$Diff = TimerDiff($Timer) ;ConsoleWrite($i & " -- " & $SitePages[$i] & " -- " & $Diff/1000 & @CRLF) GUICtrlSetData($gProgress, $i / UBound($SitePages) * 100) GUICtrlSetData($gPages, UBound($SitePages) & " Pages.") Next GUICtrlSetData($gPages, "Complete.") _WD_DeleteSession($sSession) _WD_Shutdown() EndFunc Func SetupChrome() _WD_Option('Driver', 'chromedriver.exe') _WD_Option('Port', 9515) _WD_Option('DriverParams', '--verbose --log-path="' & @ScriptDir & '\chrome.log"') _WD_CapabilitiesStartup() _WD_CapabilitiesAdd('alwaysMatch', 'chrome') _WD_CapabilitiesAdd('w3c', True) _WD_CapabilitiesAdd('args', '--headless') _WD_CapabilitiesAdd('excludeSwitches', 'enable-automation') _WD_CapabilitiesDump(@ScriptLineNumber) ; dump current Capabilities setting to console - only for testing in this demo Local $sDesiredCapabilities = _WD_CapabilitiesGet() Return $sDesiredCapabilities EndFunc ;==>SetupChrome Link to comment Share on other sites More sharing options...
Recommended Posts
Create an account or sign in to comment
You need to be a member in order to leave a comment
Create an account
Sign up for a new account in our community. It's easy!
Register a new accountSign in
Already have an account? Sign in here.
Sign In Now