Sign in to follow this  
Followers 0
Falling

Posting 4 homeuse

1 post in this topic

This works sorta...gotta fix relative paths

Func Foo_($url)
  $urlInTextFile = 0
    
    $fileRead = FileOpen("temp.txt", 0)
      If $fileRead = -1 Then
            MsgBox(0, "Error", "Unable to open file.")
            Exit
      EndIf
    
    While 1
     
      $line = FileReadLine($fileRead)
      If @error = -1 Then ExitLoop
      If $line = $url THEN
        $urlInTextFile = 1
      EndIf
    
    Wend
    
    FileClose($fileRead)   
   
    
   
   IF $urlInTextFile = 0 Then 
    
    $file = FileOpen("temp.txt", 1)
     ; Check if file opened for reading OK
    If $file = -1 Then
      MsgBox(0, "Error", "Unable to open file.")
      Exit
    EndIf
   
    FileWrite($file, $url & @CRLF)

    FileClose($file)
  
    local $linklist = _AddHttpToLinks(_cleanQuotes(_cleanuplinks(_GetLinks($url))))
  For $nZ = 1 to $linklist[0]
  
      Foo_($linklist[$nZ])
  Next
    endif
      
EndFunc
   
;$file = FileOpen("temp.txt", 2)
; Check if file opened for writing OK
;If $file = -1 Then
;   MsgBox(0, "Error", "Unable to open file.")
;Exit
;EndIf


$sURL = InputBox("What webpage?", "Enter the webpage") 
Sleep(10000)      
Foo_($sURL)
;FileClose($file)


Func _GetLinks($psURL)
     
;Returns an array of links from a webpage
;------------------------------------------------------------------------------

;Download the HTML to a temporary file
   $sTempFile  = "$tridsf13.htm"
   URLDownloadToFile($psURL, $sTempFile)
   $sHTML = FileRead($sTempFile, FileGetSize($sTempFile))
   FileDelete($sTempFile)
   
;Cleanup the HTML for better consumption    
   $sHTML = StringReplace($sHTML, @CR, "")
   $sHTML = StringReplace($sHTML, @LF, "")
   $sHTML = StringReplace($sHTML, @TAB, " ")

;Break it into chewable bytes
   $sHTML = StringReplace($sHTML, "href=", @LF & "href=")
   $sHTML = StringReplace($sHTML, "</a>", @LF & "scrap")
   
   $asHTML = StringSplit($sHTML, @LF)
   
;Spit out the bones
   $sLinks = ""
   For $nX = 1 to $asHTML[0]
     ;Process only "href=" lines
       If StringLeft($asHTML[$nX],5) = "href=" then
           $asLink = StringSplit($asHTML[$nX], ">")
           $sLinks = $sLinks & @LF & $asLink[1]
       Endif
   Next

;Return the juicy links
   Return StringSplit(StringTrimLeft($sLinks,1), @LF)
   
EndFunc

Func _cleanuplinks($ArrayHrefs)
    For $nX = 1 to $ArrayHrefs[0]
     ;Process only "href=" lines
       If StringLeft($ArrayHrefs[$nX],5) = "href=" then
           $ArrayHrefs[$nX] = StringTrimLeft($ArrayHrefs[$nX], 5)
       Endif
    Next
   return($ArrayHrefs)
EndFunc
Func _cleanQuotes($ArrayHrefs)
    
    For $nX = 1 to $ArrayHrefs[0]
     ;Process only "href=" lines
         $ArrayHrefs[$nX] = StringTrimLeft($ArrayHrefs[$nX], 1)
         $ArrayHrefs[$nX] = StringTrimRight($ArrayHrefs[$nX], 1)
    Next
   
   return($ArrayHrefs)
EndFunc   

Func _AddHttpToLinks($ArrayHrefs)
   
   For $nX = 1 to $ArrayHrefs[0]
      If StringLeft($ArrayHrefs[$nX],7) <> "http://" then
         $ArrayHrefs[$nX] = "http://" & $ArrayHrefs[$nX]
      Endif  
   Next
   return($ArrayHrefs)
EndFunc

Share this post


Link to post
Share on other sites



Create an account or sign in to comment

You need to be a member in order to leave a comment

Create an account

Sign up for a new account in our community. It's easy!


Register a new account

Sign in

Already have an account? Sign in here.


Sign In Now
Sign in to follow this  
Followers 0