#cs ---------------------------------------------------------------------------- TO DO: InetGetsize for threads #ce ---------------------------------------------------------------------------- #Include #include #include #include #include #include _SQLite_Startup() Global $database = _SQLite_Open("thumbnails.db"), $hQuery, $aRow, $sMsg, $sDummy, $fn_check, $us_array, $iRows, $iColumns, $image_check, $tb_check Global $board_first = 17, $board_last = 17 Global $ponychan = "http://www.ponychan.net/chan/rp/" Global $thread_contents, $thread_array[1], $inet[1] = ["header"], $d_ = "', '" ;General cleanup _SQLite_Exec($database, "DELETE FROM unsorted;") FileDelete("Temp\*.*") DirCreate("Pages") DirCreate("Pages\Thumbnails") DirCreate("Temp") ;Download the pages on the board ;~ Download("http://guildedage.net/webcomic/chapter-15/chapter-15-cover/", "temp\test.html") Download_Board() Strip_Board_Pages() Download_Threads() ;HTML processing loop $thread_array = _FileListToArray("Temp", "*.html", 1) For $x = 1 to UBound($thread_array)-1 TrayTip("Processing thread "&StringTrimRight($thread_array[$x], 5), UBound($thread_array)-$x-1&" threads remain", 5) ConsoleWrite("Processing thread "&$thread_array[$x]&@CRLF) Html_Process($thread_array[$x]) Next _SQLite_Close() _SQLite_Shutdown() ;############################### ;####### Functions ############# ;############################### ;Download the pages Func Download_Board() TrayTip("Please wait...", "Downloading board pages "&$board_first&" through "&$board_last, 10) ConsoleWrite("Downloading board pages"&@CRLF) For $page = $board_first to $board_last Download($ponychan&$page&".html", "Temp\"&$page&".html", 1, 1) Next Download_Close() EndFunc ;Download the threads Func Strip_Board_Pages() ;Scrape each downloaded page For $x = $board_first To $board_last TrayTip("Please wait...", "Searching for links on page "&$x, 10) ConsoleWrite("Searching for links on page "&$x&@CRLF) ;Split the board page into rows $page_array = StringSplit(FileRead("Temp\"&$x&".html"), @CRLF, 1) ;Check for thread links For $x2 = 1 to $page_array[0]-1 If StringLeft($page_array[$x2], 46) = '[View]' Then $thread_loc = StringTrimLeft($page_array[$x2], 10) $thread_loc = StringTrimRight($thread_loc, 11) ;DELETE THIS LATER If $thread_loc = "http://www.ponychan.net/chan/rp/res/37813514.html" Then _ArrayInsert($thread_array, 0, $thread_loc) EndIf EndIf Next ;Delete the scraped file FileDelete("Temp\"&$x&".html") Next EndFunc ;Download individual threads Func Download_Threads() ;Download threads For $x = 0 To UBound($thread_array)-1 $thread_url = $thread_array[$x] $thread_file = StringTrimLeft($thread_array[$x], 36) Download($thread_url, "Temp\"&$thread_file, 1, 1) Next Download_Close() EndFunc Func Html_Process($thread_file) ;move this later ;Check if the thread size has changed since last time $check_size = Check_Thread_Size($thread_file) If $check_size = 1 Then Return ; If the thread has changed or hasn't been archived yet, process it now Else HTML_Cleanup($thread_file) Duplicate_Check() Download_Images() Download_Close() Sort_Images() $thread_title = Index_Thread($thread_file) ;Dump the processed file $x2 = FileOpen("Pages\"&$thread_file, 2) FileWriteLine($x2, ""&$thread_title&"") For $x1 = 1 to $thread_contents[0] If $thread_contents[$x1] <> "" then FileWrite($x2, $thread_contents[$x1]&@LF) Next ;Wrap up FileClose($x2) ;~ FileDelete("Temp\"&$thread_file) EndIf EndFunc Func Duplicate_Check() ;Load the unsorted array _SQLite_GetTable2d($database, "SELECT * FROM unsorted", $us_array, $iRows, $iColumns) ;Check for duplicate filenames For $x = 0 To UBound($us_array)-1 $image_file = $us_array[$x][0] _SQLite_GetTable2d($database, "SELECT * FROM tb WHERE filenames LIKE '%"&$image_file&"%';", $image_check, $iRows, $iColumns) $image_file = $us_array[$x][0] $image_URL = $us_array[$x][1] $line_number = $us_array[$x][2] ;If there is a pre-existing filename match, reroute the HTML and delete it from the queue If UBound($image_check) = 2 Then $image_template = $image_check[1][0] $thread_contents[$line_number] = StringReplace($thread_contents[$line_number], $image_URL, "Thumbnails\"&$image_template) _SQLite_Exec($database, "DELETE FROM unsorted WHERE filename = '"&$image_file&"';") EndIf Next EndFunc Func Download_Images() _SQLite_GetTable2d($database, "SELECT * FROM unsorted", $us_array, $iRows, $iColumns) For $x = 1 to UBound($us_array)-1 Download($us_array[$x][1], "Temp\"&$us_array[$x][0], 0, 1) Next EndFunc ;Tidy up the HTML Func HTML_Cleanup($thread_file) $thread_contents = StringReplace(FileRead("Temp\"&$thread_file), @CRLF, @LF) $thread_contents = StringTrimLeft($thread_contents, StringInStr($thread_contents, '')-1) $thread_contents = ''&$thread_contents $thread_contents = StringLeft($thread_contents, StringInStr($thread_contents, '