Good day, I would appreciate any help refining and speeding up my script. Disclaimer: I am a autoit and coding newbie. My script has one main purpose: to OCR many PDF files on a nightly schedule based on date modified. There are two stages to implementing the script: Firstly, to OCR several thousands of files in one go on a one-time basis (and never touch them again unless modified).Secondly, for the script to run on a daily schedule to OCR documents saved into the file system every day.These are the challenges that are involved: 1. PDFs must be scanned recursively from several hundred directories contained in one directory, and moved into a single directory for Acrobat to do the batch OCR, because Acrobat batches do not recurse.2. PDFs must be placed back into the file system with the same file name, because the document management system tracks its files through a database.3. The OCR process changes the modified date, so the script needs to ignore files that it has previously OCRd in the past 24 hours.Number 3 is the only part left, and it is stumping me. My current script logs all of these activities, so I think that I could use that log to ignore OCRd files for 24 hours, but my brain is fried at this point and I'm not sure how to do it beyond a vague notion of writing each line of the log into an array and using the array to tell the script to ignore those files. I have no idea how to accomplish this. I think that I am going to hear right away that I should be using arrays for the whole process, but I am new to autoit and scripting in general, so if possible please give examples with at least a couple comments telling me what is going on. I really, really appreciate any help. #include<Date.au3>
#include<array.au3>
Opt("ExpandVarStrings", 1) ;0=don't expand, 1=do expand
Opt("ExpandEnvStrings", 1) ;0=don't expand, 1=do expand
; Set these! ========================
Dim $OCRStartFolderName = "C:\Worldox Documents\"
Dim $CopyFolder = "C:\OCR Batch In\"
Dim $OCRdFolder = "C:\OCR Batch Out\"
Dim $AcrobatPath = "C:\Program Files\Adobe\Acrobat 9.0\Acrobat\acrobat.exe /n"
; ===================================
Dim $FileCount = 0
ScanWorldoxFolder($OCRStartFolderName)
; Initial Worldox directory scan
Func ScanWorldoxFolder($SourceFolder)
Local $Search
Local $File
Local $FileAttributes
Local $FullFilePath
$Search = FileFindFirstFile($SourceFolder & "\*.*")
While 1
If $Search = -1 Then
ExitLoop
EndIf
$File = FileFindNextFile($Search)
If @error Then ExitLoop
$FullFilePath = $SourceFolder & "\" & $File
$FileAttributes = FileGetAttrib($FullFilePath)
$FileDate = FileGetTime($FullFilePath,0,0)
$FileModifiedDiff = _DateDiff('D',$FileDate[0] & "/" & $FileDate[1] & "/" & $FileDate[2] & " " & $FileDate[3] & ":" & $FileDate[4] & ":" & $FileDate[5],_NowCalc()) ; h for hours, D for days
$FullPathStringReplaceDash = StringReplace($FullFilePath,"-","+")
$FullPathStringReplaceSlash = StringReplace($FullPathStringReplaceDash,"\","-")
$FullPathStringReplaceColon = StringReplace($FullPathStringReplaceSlash,":","-")
$FullPathString = $FullPathStringReplaceColon ; unnecessary for code but necessary for sanity
If StringInStr($FileAttributes,"D") Then
ScanWorldoxFolder($FullFilePath)
Else
If $FileModifiedDiff <= 5000 Then ; If any selected file has been modified in 48 hours or less
If StringInStr($FullFilePath, ".pdf", 0) Then ; And is a pdf document
FileCopy($FullFilePath, $CopyFolder & $FullPathString, 1) ; Copy it to the CopyFolder
CopiedLogFile($FullFilePath) ; Log its file path
CopiedLogFile("Last Modified: " & $FileDate[0] & "/" & $FileDate[1] & "/" & $FileDate[2] & " " & $FileDate[3] & ":" & $FileDate[4] & ":" & $FileDate[5])
CopiedLogFile("Days since Modification: " & $FileModifiedDiff) ; And how many hours since modification
EndIf
EndIf
EndIf
WEnd
FileClose($Search)
EndFunc
; Activity log for directory scan
Func CopiedLogFile($FileName)
FileWriteLine($CopyFolder & StringReplace(_NowDate(),"/","-") & " Copied Filelist.txt",$FileName)
$FileCount += 1
ToolTip($FileName,0,0)
EndFunc
Run($AcrobatPath) ; Run Acrobat and open Batch processing to OCR the proper folder (configured in Acrobat), then close Acrobat
WinWait ("Adobe Acrobat Pro Extended")
WinActivate ("Adobe Acrobat Pro Extended")
WinWaitActive ("Adobe Acrobat Pro Extended")
Sleep (500)
Send ("!a") ; Advanced
Sleep (500)
Send ("d") ; Document Processing
Sleep (500)
Send ("b") ; Batch Processing
Sleep (500)
WinWait ("Batch Sequences") ; A batch sequence called "Batch OCR" should already be configured for this section to work properly
Sleep (1000)
Send ("s") ; Run sequence
Send ("{ENTER}") ; Start the batch
WinWait ("Warnings") ; Batch is complete
Sleep (1000)
Send ("{ENTER}") ; Enter "OK" in warnings window, closing it
Sleep (2000)
WinClose("Adobe Acrobat Pro Extended")
Dim $FileCount = 0
; ScanCopyFolder($CopyFolder)
Dim $FileCount = 0
ScanCopyFolder($CopyFolder)
; CopyFolder directory scan
Func ScanCopyFolder($SourceFolder)
Local $Search
Local $File
Local $FileAttributes
Local $FullFilePath
$Search = FileFindFirstFile($SourceFolder & "\*.*")
While 1
If $Search = -1 Then
ExitLoop
EndIf
$File = FileFindNextFile($Search)
If @error Then ExitLoop
$FullFilePath = $SourceFolder & "\" & $File
$FileString = $File
$FileDate = FileGetTime($FullFilePath,0,0)
$FileAttributes = FileGetAttrib($FullFilePath)
If StringInStr($FullFilePath, ".pdf", 0) Then ; And is a pdf document
$FileReplaceSlash = StringReplace($File,"-","\") ; Replace C--x-x+x- with C\\x\x+x\
$FileReplaceDash = StringReplace($FileReplaceSlash,"+","-") ; Replace C:\x\x-x\
$FileString = $FileReplaceDash ; unnecessary for code but necessary for sanity
EndIf
If StringInStr($FileAttributes,"D") Then
ScanCopyFolder($FullFilePath)
Else
FileMove($FullFilePath, $OCRdFolder & $FileString, 9) ; Move it to the OCRdFolder
MovedLogFile($OCRdFolder & "\" & $FileString) ; Log its file path
MovedLogFile("Last Modified: " & $FileDate[0] & "/" & $FileDate[1] & "/" & $FileDate[2] & " " & $FileDate[3] & ":" & $FileDate[4] & ":" & $FileDate[5])
EndIf
WEnd
FileClose($Search)
EndFunc
; Activity log for directory scan
Func MovedLogFile($FileName)
FileWriteLine($OCRdFolder & StringReplace(_NowDate(),"/","-") & " OCRd Filelist.txt",$FileName)
$FileCount += 1
ToolTip($FileName,0,0)
EndFunc