#include-once #Include #Include #include #include #include #include #include #Include #Include #Region Header #cs Title: Tesseract UDF Library for AutoIt3 Filename: Tesseract.au3 Description: A collection of functions for capturing text in applications. Author: seangriffin Version: V0.6 Last Update: 17/03/09 Requirements: AutoIt3 3.2 or higher, Tesseract 2.01. Changelog: ---------15/02/09---------- v0.1 Initial release. ---------15/02/09---------- v0.2 Changed path to tesseract.exe to @ProgramFilesDir. Added scaling as input to _TesseractCapture. Fixed indentation. Changed CaptureHWNDToTIFF to input window and control IDs. ---------16/02/09---------- v0.3 Added the parameter $get_last_capture to _TesseractCapture. Added the parameter $show_capture to _TesseractCapture. ---------16/02/09---------- v0.4 Added the function _TesseractFind. ---------21/02/09---------- v0.5 Updated _TesseractCapture to remove a listbox selection entirely, and return it after the text capture is done. ---------17/03/09---------- v0.6 Split the function "_TesseractCapture" into 3 functions: _TesseractScreenCapture _TesseractWinCapture _TesseractControlCapture Split the function "_TesseractFind" into 3 functions: _TesseractScreenFind _TesseractWinFind _TesseractControlFind Renamed the function "CaptureHWNDToTIFF" to "CaptureToTIFF", and modified it to allow for handling of the screen, windows and controls. Added the function "_TesseractTempPathSet". ---------16/05/20---------- by autoitscript.com forum user Beonn Added some changes from autoitscript.com forum user airday, changes are marked with Start and End in code Added some changes from autoitscript.com forum user don134, changes are marked with Start and End in code rewrite code to use coordinates like PixelSearch, idea from autoitscript.com user hendrikhe $iLeft - Left coordinate of rectangle $iTop - Top coordinate of rectangle $iRight - Right coordinate of rectangle $iBottom - Bottom coordinate of rectangle Add _GDIPlus_ImageScale to CaptureToTiff function, I didn't find or understand the original scale function #ce #EndRegion Header #Region Global Variables and Constants Global $last_capture ; Start Code by autoitscript.com forum user airday ;Global $tesseract_temp_path = "C:\" Global $tesseract_temp_path = @TempDir & "\" Global $tesseract_Program_file = @ProgramFilesDir & "\Tesseract-OCR\tesseract.exe" Global $LanguageOption = "eng" ; "eng" = English, "fra" = French... Global $cstTesseractProcessShow = @SW_HIDE ;@SW_MINIMIZE, @SW_MAXIMIZE, @SW_HIDE ; End Code by autoitscript.com forum user airday #EndRegion Global Variables and Constants #Region Core functions ; #FUNCTION# ;=============================================================================== ; ; Name...........: _TesseractTempPathSet() ; Description ...: Sets the location where Tesseract functions temporary store their files. ; You must have read and write access to this location. ; The default location is "C:\". ; Syntax.........: _TesseractTempPathSet($temp_path) ; Parameters ....: $temp_path - The path to use for temporary file storage. ; This path must not contain any spaces (see "Remarks" below). ; Return values .: On Success - Returns 1. ; On Failure - Returns 0. ; Author ........: seangriffin ; Modified.......: ; Remarks .......: The current version of Tesseract doesn't support paths with spaces. ; Related .......: ; Link ..........: ; Example .......: No ; ; ;========================================================================================== func _TesseractTempPathSet($temp_path) $tesseract_temp_path = $temp_path Return 1 EndFunc ; #FUNCTION# ;=============================================================================== ; ; Name...........: _TesseractScreenCapture() ; Description ...: Captures text from the screen. ; Syntax.........: _TesseractScreenCapture($get_last_capture = 0, $delimiter = "", $cleanup = 1, $scale = 2, $iLeft = 0, $iTop = 0, $iRight = 0, $iBottom = 0, $show_capture = 0) ; Parameters ....: $get_last_capture - Retrieve the text of the last capture, rather than ; performing another capture. Useful if the text in ; the window or control hasn't changed since the last capture. ; 0 = do not retrieve the last capture (default) ; 1 = retrieve the last capture ; $delimiter - Optional: The string that delimits elements in the text. ; A string of text will be returned if this isn't provided. ; An array of delimited text will be returned if this is provided. ; Eg. Use @CRLF to return the items of a listbox as an array. ; $cleanup - Optional: Remove invalid text recognised ; 0 = do not remove invalid text ; 1 = remove invalid text (default) ; $scale - Optional: The scaling factor of the screenshot prior to text recognition. ; Increase this number to improve accuracy. ; The default is 2. ; $iLeft - Left coordinate of rectangle ; $iTop - Top coordinate of rectangle ; $iRight - Right coordinate of rectangle ; $iBottom - Bottom coordinate of rectangle ; $show_capture - Display screenshot and text captures ; (for debugging purposes). ; 0 = do not display the screenshot taken (default) ; 1 = display the screenshot taken and exit ; $Language - The language used for recognition by default "eng". Based on Tesseract reference ; "eng" = English (default) ; "fra" = French (need the package) ; Return values .: On Success - Returns an array of text that was captured. ; On Failure - Returns an empty array. ; Author ........: seangriffin ; Modified.......: ; Remarks .......: Use the default values for first time use. If the text recognition accuracy is low, ; I suggest setting $show_capture to 1 and rerunning. If the screenshot of the ; window or control includes borders or erroneous pixels that may interfere with ; the text recognition process, then use $iLeft, $iTop, $iRight and ; $iBottom to adjust the portion of the screen being captured, to ; exclude these non-textural elements. ; If text accuracy is still low, increase the $scale parameter. In general, the higher ; the scale the clearer the font and the more accurate the text recognition. ; Related .......: ; Link ..........: ; Example .......: No ; ; ;========================================================================================== func _TesseractScreenCapture($get_last_capture = 0, $delimiter = "", $cleanup = 1, $scale = 2, $iLeft = 0, $iTop = 0, $iRight = 0, $iBottom = 0, $show_capture = 0, $Language = $LanguageOption) Local $tInfo dim $aArray, $final_ocr[1], $xyPos_old = -1, $capture_scale = 3 Local $tSCROLLINFO = DllStructCreate($tagSCROLLINFO) DllStructSetData($tSCROLLINFO, "cbSize", DllStructGetSize($tSCROLLINFO)) DllStructSetData($tSCROLLINFO, "fMask", $SIF_ALL) if $last_capture = "" Then $last_capture = ObjCreate("Scripting.Dictionary") EndIf ; if last capture is requested, and one exists. if $get_last_capture = 1 and $last_capture.item(0) <> "" Then return $last_capture.item(0) EndIf $capture_filename = _TempFile($tesseract_temp_path, "~", ".tif") $ocr_filename = StringLeft($capture_filename, StringLen($capture_filename) - 4) $ocr_filename_and_ext = $ocr_filename & ".txt" CaptureToTIFF("", "", "", $capture_filename, $scale, $iLeft, $iTop, $iRight, $iBottom) ; Start Code by autoitscript.com forum user airday ;ShellExecuteWait(@ProgramFilesDir & "\Tesseract-OCR\tesseract.exe", $capture_filename & " " & $ocr_filename) ShellExecuteWait($tesseract_Program_file, $capture_filename & " " & $ocr_filename & " -l " & $LanguageOption, "", "open", $cstTesseractProcessShow) ; End Code by autoitscript.com forum user airday ; If no delimter specified, then return a string if StringCompare($delimiter, "") = 0 Then $final_ocr = FileRead($ocr_filename_and_ext) Else _FileReadToArray($ocr_filename_and_ext, $aArray) _ArrayDelete($aArray, 0) ; Append the recognised text to a final array _ArrayConcatenate($final_ocr, $aArray) EndIf ; If the captures are to be displayed if $show_capture = 1 Then GUICreate("Tesseract Screen Capture. Note: image displayed is not to scale", 640, 480, 0, 0, $WS_SIZEBOX + $WS_SYSMENU) ; will create a dialog box that when displayed is centered GUISetBkColor(0xE0FFFF) #cs ;---old $Obj1 = ObjCreate("Preview.Preview.1") $Obj1_ctrl = GUICtrlCreateObj($Obj1, 0, 0, 640, 480) $Obj1.ShowFile ($capture_filename, 1) ;---old #ce ;---new $Obj1 = ObjCreate("WMPlayer.OCX") ;NEW OBJECT REPLACING ORIGINAL IN UDF $Obj1_ctrl = GUICtrlCreateObj($Obj1, 0, 0, 640, 480) With $Obj1 .URL = $capture_filename .fullScreen = True .windowlessVideo = True .stretchToFit = True .enableContextMenu = True .enabled = True .uiMode = "full"; none / mini full .settings.autostart = True .settings.mute = False .settings.volume = 100; 0 - 100 .settings.Balance = 0; -100 to 100 EndWith ;---new GUISetState() if IsArray($final_ocr) Then _ArrayDisplay($aArray, "Tesseract Text Capture") Else MsgBox(0, "Tesseract Text Capture", $final_ocr) EndIf GUIDelete() EndIf FileDelete($ocr_filename & ".*") ; Cleanup if IsArray($final_ocr) And $cleanup = 1 Then ; Cleanup the items for $final_ocr_num = 1 to (UBound($final_ocr)-1) ; Remove erroneous characters $final_ocr[$final_ocr_num] = StringReplace($final_ocr[$final_ocr_num], ".", "") $final_ocr[$final_ocr_num] = StringReplace($final_ocr[$final_ocr_num], "'", "") $final_ocr[$final_ocr_num] = StringReplace($final_ocr[$final_ocr_num], ",", "") $final_ocr[$final_ocr_num] = StringStripWS($final_ocr[$final_ocr_num], 3) Next ; Remove duplicate and blank items for $each in $final_ocr $found_item = _ArrayFindAll($final_ocr, $each) ; Remove blank items if IsArray($found_item) Then if StringCompare($final_ocr[$found_item[0]], "") = 0 Then _ArrayDelete($final_ocr, $found_item[0]) EndIf EndIf ; Remove duplicate items for $found_item_num = 2 to UBound($found_item) _ArrayDelete($final_ocr, $found_item[$found_item_num-1]) Next Next EndIf ; Store a copy of the capture if $last_capture.item(0) = "" Then $last_capture.item(0) = $final_ocr EndIf ; Start Code by autoitscript.com forum user don134 $tSCROLLINFO = 0 ;Add me just in case ; End Code by autoitscript.com forum user don134 Return $final_ocr EndFunc ; #FUNCTION# ;=============================================================================== ; ; Name...........: _TesseractWinCapture() ; Description ...: Captures text from a window. ; Syntax.........: _TesseractWinCapture($win_title, $win_text = "", $get_last_capture = 0, $delimiter = "", $cleanup = 1, $scale = 2, $iLeft = 0, $iTop = 0, $iRight = 0, $iBottom = 0, $show_capture = 0) ; Parameters ....: $win_title - The title of the window to capture text from. ; $win_text - Optional: The text of the window to capture text from. ; $get_last_capture - Retrieve the text of the last capture, rather than ; performing another capture. Useful if the text in ; the window or control hasn't changed since the last capture. ; 0 = do not retrieve the last capture (default) ; 1 = retrieve the last capture ; $delimiter - Optional: The string that delimits elements in the text. ; A string of text will be returned if this isn't provided. ; An array of delimited text will be returned if this is provided. ; Eg. Use @CRLF to return the items of a listbox as an array. ; $cleanup - Optional: Remove invalid text recognised ; 0 = do not remove invalid text ; 1 = remove invalid text (default) ; $scale - Optional: The scaling factor of the screenshot prior to text recognition. ; Increase this number to improve accuracy. ; The default is 2. ; $iLeft - Left coordinate of rectangle ; $iTop - Top coordinate of rectangle ; $iRight - Right coordinate of rectangle ; $iBottom - Bottom coordinate of rectangle ; $show_capture - Display screenshot and text captures ; (for debugging purposes). ; 0 = do not display the screenshot taken (default) ; 1 = display the screenshot taken and exit ; $Language - The language used for recognition by default "eng". Based on Tesseract reference ; "eng" = English (default) ; "fra" = French (need the package) ; Return values .: On Success - Returns an array of text that was captured. ; On Failure - Returns an empty array. ; Author ........: seangriffin ; Modified.......: ; Remarks .......: Use the default values for first time use. If the text recognition accuracy is low, ; I suggest setting $show_capture to 1 and rerunning. If the screenshot of the ; window or control includes borders or erroneous pixels that may interfere with ; the text recognition process, then use $iLeft, $iTop, $iRight and ; $iBottom to adjust the portion of the window being captured, to ; exclude these non-textural elements. ; If text accuracy is still low, increase the $scale parameter. In general, The higher ; the scale the clearer the font and the more accurate the text recognition. ; Related .......: ; Link ..........: ; Example .......: No ; ; ;========================================================================================== func _TesseractWinCapture($win_title, $win_text = "", $get_last_capture = 0, $delimiter = "", $cleanup = 1, $scale = 2, $iLeft = 0, $iTop = 0, $iRight = 0, $iBottom = 0, $show_capture = 0, $Language = $LanguageOption) Local $tInfo dim $aArray, $final_ocr[1], $xyPos_old = -1, $capture_scale = 3 Local $tSCROLLINFO = DllStructCreate($tagSCROLLINFO) DllStructSetData($tSCROLLINFO, "cbSize", DllStructGetSize($tSCROLLINFO)) DllStructSetData($tSCROLLINFO, "fMask", $SIF_ALL) if $last_capture = "" Then $last_capture = ObjCreate("Scripting.Dictionary") EndIf $hwnd = WinGetHandle($win_title, $win_text) ; if last capture is requested, and one exists. if $get_last_capture = 1 and $last_capture.item(Number($hwnd)) <> "" Then return $last_capture.item(Number($hwnd)) EndIf ; Perform the text recognition $capture_filename = _TempFile($tesseract_temp_path, "~", ".tif") $ocr_filename = StringLeft($capture_filename, StringLen($capture_filename) - 4) $ocr_filename_and_ext = $ocr_filename & ".txt" CaptureToTIFF($win_title, $win_text, "", $capture_filename, $scale, $iLeft, $iTop, $iRight, $iBottom) ; Start Code by autoitscript.com forum user airday ;ShellExecuteWait(@ProgramFilesDir & "\Tesseract-OCR\tesseract.exe", $capture_filename & " " & $ocr_filename) ShellExecuteWait($tesseract_Program_file, $capture_filename & " " & $ocr_filename & " -l " & $LanguageOption, "", "open", $cstTesseractProcessShow) ; End Code by autoitscript.com forum user airday ; If no delimter specified, then return a string if StringCompare($delimiter, "") = 0 Then $final_ocr = FileRead($ocr_filename_and_ext) Else _FileReadToArray($ocr_filename_and_ext, $aArray) _ArrayDelete($aArray, 0) ; Append the recognised text to a final array _ArrayConcatenate($final_ocr, $aArray) EndIf ; If the captures are to be displayed if $show_capture = 1 Then GUICreate("Tesseract Screen Capture. Note: image displayed is not to scale", 640, 480, 0, 0, $WS_SIZEBOX + $WS_SYSMENU) ; will create a dialog box that when displayed is centered GUISetBkColor(0xE0FFFF) #cs ;---old $Obj1 = ObjCreate("Preview.Preview.1") $Obj1_ctrl = GUICtrlCreateObj($Obj1, 0, 0, 640, 480) $Obj1.ShowFile ($capture_filename, 1) ;---old #ce ;---new $Obj1 = ObjCreate("WMPlayer.OCX") ;NEW OBJECT REPLACING ORIGINAL IN UDF $Obj1_ctrl = GUICtrlCreateObj($Obj1, 0, 0, 640, 480) With $Obj1 .URL = $capture_filename .fullScreen = True .windowlessVideo = True .stretchToFit = True .enableContextMenu = True .enabled = True .uiMode = "full"; none / mini full .settings.autostart = True .settings.mute = False .settings.volume = 100; 0 - 100 .settings.Balance = 0; -100 to 100 EndWith ;---new GUISetState() if IsArray($final_ocr) Then _ArrayDisplay($aArray, "Tesseract Text Capture") Else MsgBox(0, "Tesseract Text Capture", $final_ocr) EndIf GUIDelete() EndIf FileDelete($ocr_filename & ".*") ; Cleanup if IsArray($final_ocr) And $cleanup = 1 Then ; Cleanup the items for $final_ocr_num = 1 to (UBound($final_ocr)-1) ; Remove erroneous characters $final_ocr[$final_ocr_num] = StringReplace($final_ocr[$final_ocr_num], ".", "") $final_ocr[$final_ocr_num] = StringReplace($final_ocr[$final_ocr_num], "'", "") $final_ocr[$final_ocr_num] = StringReplace($final_ocr[$final_ocr_num], ",", "") $final_ocr[$final_ocr_num] = StringStripWS($final_ocr[$final_ocr_num], 3) Next ; Remove duplicate and blank items for $each in $final_ocr $found_item = _ArrayFindAll($final_ocr, $each) ; Remove blank items if IsArray($found_item) Then if StringCompare($final_ocr[$found_item[0]], "") = 0 Then _ArrayDelete($final_ocr, $found_item[0]) EndIf EndIf ; Remove duplicate items for $found_item_num = 2 to UBound($found_item) _ArrayDelete($final_ocr, $found_item[$found_item_num-1]) Next Next EndIf ; Store a copy of the capture if $last_capture.item(Number($hwnd)) = "" Then $last_capture.item(Number($hwnd)) = $final_ocr EndIf ; Start Code by autoitscript.com forum user don134 $tSCROLLINFO = 0 ;Add me just in case ; End Code by autoitscript.com forum user don134 Return $final_ocr EndFunc ; #FUNCTION# ;=============================================================================== ; ; Name...........: _TesseractControlCapture() ; Description ...: Captures text from a control. ; Syntax.........: _TesseractControlCapture($win_title, $win_text = "", $ctrl_id = "", $get_last_capture = 0, $delimiter = "", $expand = 1, $scrolling = 1, $cleanup = 1, $max_scroll_times = 5, $scale = 2, $iLeft = 0, $iTop = 0, $iRight = 0, $iBottom = 0, $show_capture = 0) ; Parameters ....: $win_title - The title of the window to capture text from. ; $win_text - Optional: The text of the window to capture text from. ; $ctrl_id - Optional: The ID of the control to capture text from. ; The text of the window will be returned if one isn't provided. ; $get_last_capture - Retrieve the text of the last capture, rather than ; performing another capture. Useful if the text in ; the window or control hasn't changed since the last capture. ; 0 = do not retrieve the last capture (default) ; 1 = retrieve the last capture ; $delimiter - Optional: The string that delimits elements in the text. ; A string of text will be returned if this isn't provided. ; An array of delimited text will be returned if this is provided. ; Eg. Use @CRLF to return the items of a listbox as an array. ; $expand - Optional: Expand the control before capturing text from it? ; 0 = do not expand the control ; 1 = expand the control (default) ; $scrolling - Optional: Scroll the control to capture all it's text? ; 0 = do not scroll the control ; 1 = scroll the control (default) ; $cleanup - Optional: Remove invalid text recognised ; 0 = do not remove invalid text ; 1 = remove invalid text (default) ; $max_scroll_times - The maximum number of scrolls to capture in a control ; If a control has a very long scroll bar, the text recognition ; process will take too long. Use this value to restrict ; the amount of text to recognise in a long control. ; $scale - Optional: The scaling factor of the screenshot prior to text recognition. ; Increase this number to improve accuracy. ; The default is 2. ; $iLeft - Left coordinate of rectangle ; $iTop - Top coordinate of rectangle ; $iRight - Right coordinate of rectangle ; $iBottom - Bottom coordinate of rectangle ; $show_capture - Display screenshot and text captures ; (for debugging purposes). ; 0 = do not display the screenshot taken (default) ; 1 = display the screenshot taken and exit ; $Language - The language used for recognition by default "eng". Based on Tesseract reference ; "eng" = English (default) ; "fra" = French (need the package) ; Return values .: On Success - Returns an array of text that was captured. ; On Failure - Returns an empty array. ; Author ........: seangriffin ; Modified.......: ; Remarks .......: Use the default values for first time use. If the text recognition accuracy is low, ; I suggest setting $show_capture to 1 and rerunning. If the screenshot of the ; window or control includes borders or erroneous pixels that may interfere with ; the text recognition process, then use $iLeft, $iTop, $iRight and ; $iBottom to adjust the portion of the control being captured, to ; exclude these non-textural elements. ; If text accuracy is still low, increase the $scale parameter. In general, The higher ; the scale the clearer the font and the more accurate the text recognition. ; Related .......: ; Link ..........: ; Example .......: Yes ; ; ;========================================================================================== func _TesseractControlCapture($win_title, $win_text = "", $ctrl_id = "", $get_last_capture = 0, $delimiter = "", $expand = 1, $scrolling = 1, $cleanup = 1, $max_scroll_times = 5, $scale = 2, $iLeft = 0, $iTop = 0, $iRight = 0, $iBottom = 0, $show_capture = 0, $Language = $LanguageOption) Local $tInfo dim $aArray, $final_ocr[1], $xyPos_old = -1, $capture_scale = 3 Local $tSCROLLINFO = DllStructCreate($tagSCROLLINFO) DllStructSetData($tSCROLLINFO, "cbSize", DllStructGetSize($tSCROLLINFO)) DllStructSetData($tSCROLLINFO, "fMask", $SIF_ALL) if $last_capture = "" Then $last_capture = ObjCreate("Scripting.Dictionary") EndIf ; if a control ID is specified, then get it's HWND if StringCompare($ctrl_id, "") <> 0 Then $hwnd = ControlGetHandle($win_title, $win_text, $ctrl_id) ; if expansion of the control is required. if $expand = 1 and StringCompare($delimiter, "") <> 0 Then $hwnd2 = $hwnd If _GUICtrlComboBox_GetComboBoxInfo($hwnd, $tInfo) Then $hwnd = DllStructGetData($tInfo, "hList") EndIf ; Expand the control. _GUICtrlComboBox_ShowDropDown($hwnd2, True) EndIf EndIf ; if last capture is requested, and one exists. if $get_last_capture = 1 and $last_capture.item(Number($hwnd)) <> "" Then return $last_capture.item(Number($hwnd)) EndIf ; Text recognition improves alot if the current selection and focus rectangle is removed. ; The following code will remove the selection. ; After text recognition the selection is returned. $sel_index = _GUICtrlListBox_GetCurSel($hwnd) ; The following two lines should remove the current selection and focus rectangle ; in all cases. _GUICtrlListBox_SetCurSel($hWnd, -1) _GUICtrlListBox_SetCaretIndex($hWnd, -1) ; Scroll to the top DllCall("user32.dll", "int", "SendMessage", "hwnd", $hwnd, "int", $WM_VSCROLL, "int", $SB_TOP, "int", 0) for $i = 1 to $max_scroll_times if $i > 1 Then ; Scroll the list down one page DllCall("user32.dll", "int", "SendMessage", "hwnd", $hwnd, "int", $WM_VSCROLL, "int", $SB_PAGEDOWN, "int", 0) EndIf ; Get the position of the scroll bar DllCall("user32.dll", "int", "GetScrollInfo", "hwnd", $hwnd, "int", $SB_VERT, "ptr", DllStructGetPtr($tSCROLLINFO)) $xyPos = DllStructGetData($tSCROLLINFO, "nPos") ; If the scroll bar hasn't moved, we have finished scrolling if $xyPos_old = $xyPos then ExitLoop $xyPos_old = $xyPos ; Perform the text recognition WinActivate($win_title) $capture_filename = _TempFile($tesseract_temp_path, "~", ".tif") $ocr_filename = StringLeft($capture_filename, StringLen($capture_filename) - 4) $ocr_filename_and_ext = $ocr_filename & ".txt" CaptureToTIFF($win_title, $win_text, $hwnd, $capture_filename, $scale, $iLeft, $iTop, $iRight, $iBottom) ; Start Code by autoitscript.com forum user airday ;ShellExecuteWait(@ProgramFilesDir & "\Tesseract-OCR\tesseract.exe", $capture_filename & " " & $ocr_filename) ShellExecuteWait($tesseract_Program_file, $capture_filename & " " & $ocr_filename & " -l " & $LanguageOption, "", "open", $cstTesseractProcessShow) ; End Code by autoitscript.com forum user airday ; Return the current selection (if one existed) if $sel_index > -1 Then _GUICtrlListBox_SetCurSel($hwnd, $sel_index) EndIf ; If no delimter specified, then return a string if StringCompare($delimiter, "") = 0 Then $final_ocr = FileRead($ocr_filename_and_ext) $i = $max_scroll_times Else _FileReadToArray($ocr_filename_and_ext, $aArray) _ArrayDelete($aArray, 0) ; Append the recognised text to a final array _ArrayConcatenate($final_ocr, $aArray) EndIf ; If the captures are to be displayed if $show_capture = 1 Then GUICreate("Tesseract Screen Capture. Note: image displayed is not to scale", 640, 480, 0, 0, $WS_SIZEBOX + $WS_SYSMENU) ; will create a dialog box that when displayed is centered GUISetBkColor(0xE0FFFF) #cs ;---old $Obj1 = ObjCreate("Preview.Preview.1") $Obj1_ctrl = GUICtrlCreateObj($Obj1, 0, 0, 640, 480) $Obj1.ShowFile ($capture_filename, 1) ;---old #ce ;---new $Obj1 = ObjCreate("WMPlayer.OCX") ;NEW OBJECT REPLACING ORIGINAL IN UDF $Obj1_ctrl = GUICtrlCreateObj($Obj1, 0, 0, 640, 480) With $Obj1 .URL = $capture_filename .fullScreen = True .windowlessVideo = True .stretchToFit = True .enableContextMenu = True .enabled = True .uiMode = "full"; none / mini full .settings.autostart = True .settings.mute = False .settings.volume = 100; 0 - 100 .settings.Balance = 0; -100 to 100 EndWith ;---new GUISetState() if IsArray($final_ocr) Then _ArrayDisplay($aArray, "Tesseract Text Capture") Else MsgBox(0, "Tesseract Text Capture", $final_ocr) EndIf GUIDelete() EndIf FileDelete($ocr_filename & ".*") Next ; Cleanup if IsArray($final_ocr) And $cleanup = 1 Then ; Cleanup the items for $final_ocr_num = 1 to (UBound($final_ocr)-1) ; Remove erroneous characters $final_ocr[$final_ocr_num] = StringReplace($final_ocr[$final_ocr_num], ".", "") $final_ocr[$final_ocr_num] = StringReplace($final_ocr[$final_ocr_num], "'", "") $final_ocr[$final_ocr_num] = StringReplace($final_ocr[$final_ocr_num], ",", "") $final_ocr[$final_ocr_num] = StringStripWS($final_ocr[$final_ocr_num], 3) Next ; Remove duplicate and blank items for $each in $final_ocr $found_item = _ArrayFindAll($final_ocr, $each) ; Remove blank items if IsArray($found_item) Then if StringCompare($final_ocr[$found_item[0]], "") = 0 Then _ArrayDelete($final_ocr, $found_item[0]) EndIf EndIf ; Remove duplicate items for $found_item_num = 2 to UBound($found_item) _ArrayDelete($final_ocr, $found_item[$found_item_num-1]) Next Next EndIf ; Store a copy of the capture if $last_capture.item(Number($hwnd)) = "" Then $last_capture.item(Number($hwnd)) = $final_ocr EndIf ; Start Code by autoitscript.com forum user don134 $tSCROLLINFO = 0 ;Add me just in case ; End Code by autoitscript.com forum user don134 Return $final_ocr EndFunc ; #FUNCTION# ;=============================================================================== ; ; Name...........: _TesseractScreenFind() ; Description ...: Finds the location of a string within text captured from the screen. ; Syntax.........: _TesseractScreenFind($find_str = "", $partial = 1, $get_last_capture = 0, $delimiter = "", $cleanup = 1, $scale = 2, $iLeft = 0, $iTop = 0, $iRight = 0, $iBottom = 0, $show_capture = 0) ; Parameters ....: $find_str - The text (string) to find. ; $partial - Optional: Find the text using a partial match? ; 0 = use a full text match ; 1 = use a partial text match (default) ; $get_last_capture - Search within the text of the last capture, rather than ; performing another capture. Useful if the text in ; the window or control hasn't changed since the last capture. ; 0 = do not use the last capture (default) ; 1 = use the last capture ; $delimiter - Optional: The string that delimits elements in the text. ; A string of text will be searched if this isn't provided. ; The index of the item found will be returned if this is provided. ; Eg. Use @CRLF to find an item in a listbox. ; $cleanup - Optional: Remove invalid text recognised ; 0 = do not remove invalid text ; 1 = remove invalid text (default) ; $scale - Optional: The scaling factor of the screenshot prior to text recognition. ; Increase this number to improve accuracy. ; The default is 2. ; $iLeft - Left coordinate of rectangle ; $iTop - Top coordinate of rectangle ; $iRight - Right coordinate of rectangle ; $iBottom - Bottom coordinate of rectangle ; $show_capture - Display screenshot and text captures ; (for debugging purposes). ; 0 = do not display the screenshot taken (default) ; Return values .: On Success - Returns the location of the text that was found. ; If $delimiter is "", then the character position of the text found ; is returned. ; If $delimiter is not "", then the element of the array where the ; text was found is returned. ; On Failure - Returns an empty array. ; Author ........: seangriffin ; Modified.......: ; Remarks .......: ; Related .......: ; Link ..........: ; Example .......: No ; ; ;========================================================================================== func _TesseractScreenFind($find_str = "", $partial = 1, $get_last_capture = 0, $delimiter = "", $cleanup = 1, $scale = 2, $iLeft = 0, $iTop = 0, $iRight = 0, $iBottom = 0, $show_capture = 0) ; Get all the text from the screen $recognised_text = _TesseractScreenCapture($get_last_capture, $delimiter, $cleanup, $scale, $iLeft, $iTop, $iRight, $iBottom, $show_capture) if IsArray($recognised_text) Then $index_found = _ArraySearch($recognised_text, $find_str, 0, 0, 0, $partial) Else if $partial = 1 Then $index_found = StringInStr($recognised_text, $find_str) Else if StringCompare($recognised_text, $find_str) = 0 Then $index_found = 1 Else $index_found = 0 EndIf EndIf EndIf Return $index_found EndFunc ; #FUNCTION# ;=============================================================================== ; ; Name...........: _TesseractWinFind() ; Description ...: Finds the location of a string within text captured from a window. ; Syntax.........: _TesseractWinFind($win_title, $win_text = "", $find_str = "", $partial = 1, $get_last_capture = 0, $delimiter = "", $cleanup = 1, $scale = 2, $iLeft = 0, $iTop = 0, $iRight = 0, $iBottom = 0, $show_capture = 0) ; Parameters ....: $win_title - The title of the window to find text in. ; $win_text - Optional: The text of the window to find text in. ; $find_str - The text (string) to find. ; $partial - Optional: Find the text using a partial match? ; 0 = use a full text match ; 1 = use a partial text match (default) ; $get_last_capture - Search within the text of the last capture, rather than ; performing another capture. Useful if the text in ; the window or control hasn't changed since the last capture. ; 0 = do not use the last capture (default) ; 1 = use the last capture ; $delimiter - Optional: The string that delimits elements in the text. ; A string of text will be searched if this isn't provided. ; The index of the item found will be returned if this is provided. ; Eg. Use @CRLF to find an item in a listbox. ; $cleanup - Optional: Remove invalid text recognised ; 0 = do not remove invalid text ; 1 = remove invalid text (default) ; $scale - Optional: The scaling factor of the screenshot prior to text recognition. ; Increase this number to improve accuracy. ; The default is 2. ; $iLeft - Left coordinate of rectangle ; $iTop - Top coordinate of rectangle ; $iRight - Right coordinate of rectangle ; $iBottom - Bottom coordinate of rectangle ; $show_capture - Display screenshot and text captures ; (for debugging purposes). ; 0 = do not display the screenshot taken (default) ; 1 = display the screenshot taken and exit ; Return values .: On Success - Returns the location of the text that was found. ; If $delimiter is "", then the character position of the text found ; is returned. ; If $delimiter is not "", then the element of the array where the ; text was found is returned. ; On Failure - Returns an empty array. ; Author ........: seangriffin ; Modified.......: ; Remarks .......: ; Related .......: ; Link ..........: ; Example .......: No ; ; ;========================================================================================== func _TesseractWinFind($win_title, $win_text = "", $find_str = "", $partial = 1, $get_last_capture = 0, $delimiter = "", $cleanup = 1, $scale = 2, $iLeft = 0, $iTop = 0, $iRight = 0, $iBottom = 0, $show_capture = 0) ; Get all the text from the window $recognised_text = _TesseractWinCapture($win_title, $win_text, $get_last_capture, $delimiter, $cleanup, $scale, $iLeft, $iTop, $iRight, $iBottom, $show_capture) if IsArray($recognised_text) Then $index_found = _ArraySearch($recognised_text, $find_str, 0, 0, 0, $partial) Else if $partial = 1 Then $index_found = StringInStr($recognised_text, $find_str) Else if StringCompare($recognised_text, $find_str) = 0 Then $index_found = 1 Else $index_found = 0 EndIf EndIf EndIf Return $index_found EndFunc ; #FUNCTION# ;=============================================================================== ; ; Name...........: _TesseractControlFind() ; Description ...: Finds the location of a string within text captured from a control. ; Syntax.........: _TesseractControlFind($win_title, $win_text = "", $ctrl_id = "", $find_str = "", $partial = 1, $get_last_capture = 0, $delimiter = "", $expand = 1, $scrolling = 1, $cleanup = 1, $max_scroll_times = 5, $scale = 2, $iLeft = 0, $iTop = 0, $iRight = 0, $iBottom = 0, $show_capture = 0) ; Parameters ....: $win_title - The title of the window to find text in. ; $win_text - Optional: The text of the window to find text in. ; $ctrl_id - Optional: The ID of the control to find text in. ; The text of the window will be usee if one isn't provided. ; $find_str - The text (string) to find. ; $partial - Optional: Find the text using a partial match? ; 0 = use a full text match ; 1 = use a partial text match (default) ; $get_last_capture - Search within the text of the last capture, rather than ; performing another capture. Useful if the text in ; the window or control hasn't changed since the last capture. ; 0 = do not use the last capture (default) ; 1 = use the last capture ; $delimiter - Optional: The string that delimits elements in the text. ; A string of text will be searched if this isn't provided. ; The index of the item found will be returned if this is provided. ; Eg. Use @CRLF to find an item in a listbox. ; $expand - Optional: Expand the control before searching it? ; 0 = do not expand the control ; 1 = expand the control (default) ; $scrolling - Optional: Scroll the control to search all it's text? ; 0 = do not scroll the control ; 1 = scroll the control (default) ; $cleanup - Optional: Remove invalid text recognised ; 0 = do not remove invalid text ; 1 = remove invalid text (default) ; $max_scroll_times - The maximum number of scrolls to capture in a control ; If a control has a very long scroll bar, the text recognition ; process will take too long. Use this value to restrict ; the amount of text to recognise in a long control. ; $scale - Optional: The scaling factor of the screenshot prior to text recognition. ; Increase this number to improve accuracy. ; The default is 2. ; $iLeft - Left coordinate of rectangle ; $iTop - Top coordinate of rectangle ; $iRight - Right coordinate of rectangle ; $iBottom - Bottom coordinate of rectangle ; $show_capture - Display screenshot and text captures ; (for debugging purposes). ; 0 = do not display the screenshot taken (default) ; 1 = display the screenshot taken and exit ; Return values .: On Success - Returns the location of the text that was found. ; If $delimiter is "", then the character position of the text found ; is returned. ; If $delimiter is not "", then the element of the array where the ; text was found is returned. ; On Failure - Returns an empty array. ; Author ........: seangriffin ; Modified.......: ; Remarks .......: ; Related .......: ; Link ..........: ; Example .......: Yes ; ; ;========================================================================================== func _TesseractControlFind($win_title, $win_text = "", $ctrl_id = "", $find_str = "", $partial = 1, $get_last_capture = 0, $delimiter = "", $expand = 1, $scrolling = 1, $cleanup = 1, $max_scroll_times = 5, $scale = 2, $iLeft = 0, $iTop = 0, $iRight = 0, $iBottom = 0, $show_capture = 0) ; Get all the text from the control $recognised_text = _TesseractControlCapture($win_title, $win_text, $ctrl_id, $get_last_capture, $delimiter, $expand, $scrolling, $cleanup, $max_scroll_times, $scale,$iLeft, $iTop, $iRight, $iBottom, $show_capture) if IsArray($recognised_text) Then $index_found = _ArraySearch($recognised_text, $find_str, 0, 0, 0, $partial) Else if $partial = 1 Then $index_found = StringInStr($recognised_text, $find_str) Else if StringCompare($recognised_text, $find_str) = 0 Then $index_found = 1 Else $index_found = 0 EndIf EndIf EndIf Return $index_found EndFunc ; #FUNCTION# ;=============================================================================== ; ; Name...........: CaptureToTIFF() ; Description ...: Captures an image of the screen, a window or a control, and saves it to a TIFF file. ; Syntax.........: CaptureToTIFF($win_title = "", $win_text = "", $ctrl_id = "", $sOutImage = "", $scale = 1, $iLeft = 0, $iTop = 0, $iRight = 0, $iBottom = 0) ; Parameters ....: $win_title - The title of the window to capture an image of. ; $win_text - Optional: The text of the window to capture an image of. ; $ctrl_id - Optional: The ID of the control to capture an image of. ; An image of the window will be returned if one isn't provided. ; $sOutImage - The filename to store the image in. ; $scale - Optional: The scaling factor of the capture. ; $iLeft - Left coordinate of rectangle ; $iTop - Top coordinate of rectangle ; $iRight - Right coordinate of rectangle ; $iBottom - Bottom coordinate of rectangle ; Return values .: None ; Author ........: seangriffin ; Modified.......: ; Remarks .......: ; Related .......: ; Link ..........: ; Example .......: No ; ; ;========================================================================================== Func CaptureToTIFF($win_title = "", $win_text = "", $ctrl_id = "", $sOutImage = "", $scale = 1, $iLeft = 0, $iTop = 0, $iRight = 0, $iBottom = 0) Local $hWnd, $hwnd2, $hDC, $hBMP, $hImage1, $hGraphic, $CLSID, $tParams, $pParams, $tData, $i = 0, $hImage2, $pos[4] Local $Ext = StringUpper(StringMid($sOutImage, StringInStr($sOutImage, ".", 0, -1) + 1)) Local $giTIFColorDepth = 24 Local $giTIFCompression = $GDIP_EVTCOMPRESSIONNONE ; If capturing a control if StringCompare($ctrl_id, "") <> 0 Then $hwnd2 = ControlGetHandle($win_title, $win_text, $ctrl_id) $pos = ControlGetPos($win_title, $win_text, $ctrl_id) Else ; If capturing a window if StringCompare($win_title, "") <> 0 Then $hwnd2 = WinGetHandle($win_title, $win_text) $pos = WinGetPos($win_title, $win_text) Else ; If capturing the desktop $hwnd2 = "" $pos[0] = 0 $pos[1] = 0 $pos[2] = @DesktopWidth $pos[3] = @DesktopHeight EndIf EndIf ; Capture an image of the window / control if IsHWnd($hwnd2) Then WinActivate($win_title, $win_text) $hBitmap2 = _ScreenCapture_CaptureWnd("", $hwnd2, 0, 0, -1, -1, False) Else $hBitmap2 = _ScreenCapture_Capture("", 0, 0, -1, -1, False) EndIf _GDIPlus_Startup () ; Convert the image to a bitmap $hImage2 = _GDIPlus_BitmapCreateFromHBITMAP ($hBitmap2) ; Start Code by autoitscript.com forum user Beonn If $scale > 1 Then $hImage2 = _GDIPlus_ImageScale ($hImage2, $scale, $scale) ; End Code by autoitscript.com forum user Beonn $hWnd = _WinAPI_GetDesktopWindow() $hDC = _WinAPI_GetDC($hWnd) ; Start Code by autoitscript.com forum user Beonn ;$hBMP = _WinAPI_CreateCompatibleBitmap($hDC, ($pos[2] * $scale) - ($right_indent * $scale), ($pos[3] * $scale) - ($bottom_indent * $scale)) $hBMP = _WinAPI_CreateCompatibleBitmap($hDC, ($iRight - $iLeft) * $scale, ($iBottom - $iTop) * $scale) ; End Code by autoitscript.com forum user Beonn _WinAPI_ReleaseDC($hWnd, $hDC) $hImage1 = _GDIPlus_BitmapCreateFromHBITMAP ($hBMP) $hGraphic = _GDIPlus_ImageGetGraphicsContext($hImage1) ; Start Code by autoitscript.com forum user Beonn ;_GDIPLus_GraphicsDrawImageRect($hGraphic, $hImage2, 0 - ($left_indent * $scale), 0 - ($top_indent * $scale), ($pos[2] * $scale) + $left_indent, ($pos[3] * $scale) + $top_indent) _GDIPLus_GraphicsDrawImageRect($hGraphic, $hImage2, (0 - $iLeft) * $scale, (0 - $iTop) * $scale, $pos[2] * $scale, $pos[3] * $scale) ; End Code by autoitscript.com forum user Beonn $CLSID = _GDIPlus_EncodersGetCLSID($Ext) ; Set TIFF parameters $tParams = _GDIPlus_ParamInit(2) $tData = DllStructCreate("int ColorDepth;int Compression") DllStructSetData($tData, "ColorDepth", $giTIFColorDepth) DllStructSetData($tData, "Compression", $giTIFCompression) _GDIPlus_ParamAdd($tParams, $GDIP_EPGCOLORDEPTH, 1, $GDIP_EPTLONG, DllStructGetPtr($tData, "ColorDepth")) _GDIPlus_ParamAdd($tParams, $GDIP_EPGCOMPRESSION, 1, $GDIP_EPTLONG, DllStructGetPtr($tData, "Compression")) If IsDllStruct($tParams) Then $pParams = DllStructGetPtr($tParams) ; Save TIFF and cleanup _GDIPlus_ImageSaveToFileEx($hImage1, $sOutImage, $CLSID, $pParams) _GDIPlus_ImageDispose($hImage1) _GDIPlus_ImageDispose($hImage2) _GDIPlus_GraphicsDispose ($hGraphic) ; Start Code by autoitscript.com forum user don134 _WinAPI_DeleteObject($hBitmap2) ;Add this line to delete bitmap from cache ; End Code by autoitscript.com forum user don134 _WinAPI_DeleteObject($hBMP) _GDIPlus_Shutdown() EndFunc