Jump to content

Problem with Tesseract.exe


Recommended Posts

Hey guys! How are you? Let's go to one more problem...

I'm creating a script that takes a certain area of the screen, runs tesseract in order to recognize a number and throws it into a text file. The problem is that some numbers recognize it normally and others don't... I'm using Tesseract 3.02. Tested on Windows 7 and 10, resolutions of 1366x768 and 192x1080.

When running the command manually from the command prompt, the following error appears:

print.jpg.f70c5f4251f8789229e8d562aa9ec233.jpg

And when detect normally with command
image.png.e4ebf95352a6eb2c079ea8844953a7d1.png

An example of captured image:

image.png.08cd91d0692883fe14df77dd2741ae51.png

image.png.029fc7e3824ecf8297304b8b5538b820.png

Let's go to the code...

;~    screenshot of last number...
   $captado = _ScreenCapture_Capture("", $left, $top, $right, $bottom)
;~    make .tif file...
   $img = _ScreenCapture_SaveImage(@ScriptDir & "\Captures\img.tif", $captado)
;~    run tesseract and save txt file...
   ShellExecuteWait(@ScriptDir & "\Bin\tesseract.exe", '"' & @ScriptDir & "\Captures\img.tif" & '" "' & @ScriptDir & "\Captures\result" & '"')
;~    if error occour, show msgbox...
   If @error Then
      MsgBox(0, "", @error, 3)
   EndIf
;~    get number from txt file...
   $busca = FileRead(@ScriptDir & "\Captures\result.txt")

 

Link to comment
Share on other sites

Example gui version:

Opt("TrayAutoPause", 0)
#include <WinAPISys.au3>
#include <WindowsConstants.au3>
#include <GUIConstantsEx.au3>
#include <StaticConstants.au3>
#include <ButtonConstants.au3>
#include <File.au3>
Global $TesseractEXE = 'tesseract.exe'
Global $TesseractDIR = _SplitPath('C:\Program Files (x86)\Tesseract-OCR\tesseract.exe', 5)
If Not FileExists($TesseractDIR & "\" & $TesseractEXE) Then Exit MsgBox(64 + 262144, "Tesseract ORC /!\", $TesseractEXE & " file does not exist, Please set the path!")
Global $AppWindows = GUICreate("Dao Van Trong  - Trong.LIVE", 420, 100, -1, -1, BitOR($WS_BORDER, $WS_POPUP), BitOR($WS_EX_ACCEPTFILES, $WS_EX_TOPMOST, $WS_EX_WINDOWEDGE))
Global $AppTitle = GUICtrlCreateLabel("===[ Tesseract ORC ]===", 10, 0, 400, 25, $SS_CENTERIMAGE, $GUI_WS_EX_PARENTDRAG)
GUICtrlSetState(-1, $GUI_DROPACCEPTED)
GUICtrlSetBkColor(-1, $GUI_BKCOLOR_TRANSPARENT)
GUICtrlSetFont(-1, 9, 800)
Global $AppTask = GUICtrlCreateLabel("=== Drag and drop files here ===", 10, 22, 420, 78, $SS_CENTERIMAGE + 0x0200, $GUI_WS_EX_PARENTDRAG)
GUICtrlSetState(-1, $GUI_DROPACCEPTED)
GUICtrlSetBkColor(-1, $GUI_BKCOLOR_TRANSPARENT)
GUICtrlSetFont(-1, 9, 500)
Global $xCLOSE = GUICtrlCreateButton("X", 405, 0, 12, 12, BitAND($BS_MULTILINE, $BS_VCENTER, $BS_FLAT))
GUICtrlSetBkColor(-1, $GUI_BKCOLOR_TRANSPARENT)
GUICtrlSetState(-1, $GUI_DROPACCEPTED)
GUISetState(@SW_SHOW)
_WinAPI_ChangeWindowMessageFilterEx($AppWindows, $WM_DROPFILES, $MSGFLT_ALLOW)
_WinAPI_ChangeWindowMessageFilterEx($AppWindows, $WM_COPYDATA, $MSGFLT_ALLOW)
_WinAPI_ChangeWindowMessageFilterEx($AppWindows, $WM_COPYGLOBALDATA, $MSGFLT_ALLOW)
Global $__aDropFiles
GUIRegisterMsg($WM_DROPFILES, "WM_DROPFILES")
Local $nMsg
While 1
    $nMsg = GUIGetMsg()
    Switch $nMsg
        Case $GUI_EVENT_DROPPED
            If $__aDropFiles[0] > 0 Then
                For $i = 1 To $__aDropFiles[0]
                    ConsoleWrite($__aDropFiles[$i] & @CRLF)
                    GUICtrlSetData($AppTask, 'Working on: ' & $__aDropFiles[$i])
                    Local $sInputFileDirName = _SplitPath($__aDropFiles[$i], 7)
                    FileDelete($sInputFileDirName & '.txt')
                    Local $command = $TesseractEXE & ' "' & $__aDropFiles[$i] & '" "' & $sInputFileDirName & '"'
                    _ReadDOS($command)
                    Local $Return = "-> Input File: " & $__aDropFiles[$i] & @CRLF

                    If FileExists($sInputFileDirName & '.txt') Then

                        $Return &= "-> Output: " & FileRead($sInputFileDirName & '.txt')
                    EndIf
                    GUICtrlSetData($AppTask, $Return)
                    ConsoleWrite($Return & @CRLF)
                Next
            EndIf
        Case $GUI_EVENT_CLOSE, $xCLOSE
            Exit
    EndSwitch
WEnd
Func _ReadDOS($command)
    Local $text = '', $Pid = Run('"' & @ComSpec & '" /c ' & $command, $TesseractDIR, @SW_HIDE, 8)
    ConsoleWrite($command & @CRLF)
    While 1
        $text &= StdoutRead($Pid, False, False)
        If @error Then ExitLoop
        Sleep(10)
    WEnd
    While 1
        $text &= StderrRead($Pid, False, False)
        If @error Then ExitLoop
        Sleep(10)
    WEnd
    Return $text
EndFunc   ;==>_ReadDOS
Func WM_DROPFILES($hWnd, $iMsg, $iwParam, $ilParam)
    #forceref $hWnd, $ilParam
    Switch $iMsg
        Case $WM_DROPFILES
            Local $aReturn = _WinAPI_DragQueryFileEx($iwParam)
            If IsArray($aReturn) Then
                $__aDropFiles = $aReturn
            Else
                Local $aError[1] = [0]
                $__aDropFiles = $aError
            EndIf
    EndSwitch
    Return $GUI_RUNDEFMSG
EndFunc   ;==>WM_DROPFILES
Func _SplitPath($sFilePath, $rType = -1)
    Local $sDrive = "", $sDir = "", $sFileName = "", $sExtension = ""
    Local $aPathSplit = _PathSplit($sFilePath, $sDrive, $sDir, $sFileName, $sExtension)
    $sDir = StringReplace($sDir, '/', '\')
    Switch $rType
        Case 1
            Return $sDrive
        Case 2
            Return $sDir
        Case 3
            Return $sFileName
        Case 4
            Return $sExtension
        Case 5, 12
            If StringRight($sDir, 1) == '\' Then $sDir = StringTrimRight($sDir, 1)
            Return $sDrive & $sDir
        Case 6, 34
            Return $sFileName & $sExtension
        Case 7, 123
            Return $sDrive & $sDir & $sFileName
        Case Else
            Return $aPathSplit
    EndSwitch
EndFunc   ;==>_SplitPath

image.png.473e223a5d35b959b48b90f34d6dd2bc.png

Regards,
 

Link to comment
Share on other sites

It's also normal for an image recognition program to not recognize the text on the image, check the quality of the image.

AutoIT is only playing tricks, processing images and returning results is up to tesseract 

Regards,
 

Link to comment
Share on other sites

Link to comment
Share on other sites

9 hours ago, Nine said:

You may want to try to use Modi.OCR com object.  Was reading your image with no problem.  Tested a text with quite a few numbers in it, and all were read properly...

Cool.. Do you have any example of use? Works only with Office 2010? Thanks!

Link to comment
Share on other sites

#include <Constants.au3>
#include <GUIConstants.au3>
#include <Array.au3>

Opt("MustDeclareVars", True)

Const $miLANG_CZECH = 5
Const $miLANG_DANISH = 6
Const $miLANG_DUTCH = 19
Const $miLANG_ENGLISH = 9
Const $miLANG_FINNISH = 11
Const $miLANG_FRENCH = 12
Const $miLANG_GERMAN = 7
Const $miLANG_GREEK = 8
Const $miLANG_HUNGARIAN = 14
Const $miLANG_ITALIAN = 16
Const $miLANG_JAPANESE = 17
Const $miLANG_KOREAN = 18
Const $miLANG_NORWEGIAN = 20
Const $miLANG_POLISH = 21
Const $miLANG_PORTUGUESE = 22
Const $miLANG_RUSSIAN = 25
Const $miLANG_SPANISH = 10
Const $miLANG_SWEDISH = 29
Const $miLANG_TURKISH = 31
Const $miLANG_SYSDEFAULT = 2048
Const $miLANG_CHINESE_SIMPLIFIED = 2052
Const $miLANG_CHINESE_TRADITIONAL = 1028

; Initialize error handler
Local $oMyError = ObjEvent("AutoIt.Error", "MyErrFunc")

Local $miDoc = ObjCreate("MODI.Document")
Local $miDocView = ObjCreate("MiDocViewer.MiDocView")

Local $Viewer = GUICreate("Embedded MODI Viewer", 750, 400)

;Creates an ActiveX Control in the GUI.
Local $GUIActiveX = GUICtrlCreateObj($miDocView, 0, 0,750, 400)
GUICtrlSetResizing($Viewer, $GUI_DOCKAUTO)

$miDoc.Create("capture.jpg")
$miDoc.Images(0).Ocr() ;$miLANG_ENGLISH, True, False)

GUISetState()

$miDocView.Document = $miDoc
$miDocView.SetScale(0.75, 0.75)

While GUIGetMsg() <> $GUI_EVENT_CLOSE
WEnd

Local $aArray[500], $str, $i = 0
For $oWord In $miDoc.Images(0).Layout.Words
  $str = $str & $oWord.text & @CRLF
  $aArray[$i] = $oWord.text
  $i += 1
Next

ReDim $aArray[$i]
_ArrayDisplay($aArray, "OCR Result")

Local $miRects = $miDoc.Images(0).Layout.Words(0).Rects
Local $strRectInfo = "Word falls within " & $miRects.Count & _
    " bounding rectangle(s)." & @CRLF
For $miRect In $miRects
  $strRectInfo &= _
      " Rectangle coordinates: " & @CRLF & _
      "  - Left: " & $miRect.Left & @CRLF & _
      "  - Right: " & $miRect.Right & @CRLF & _
      "  - Top: " & $miRect.Top & @CRLF & _
      "  - Bottom: " & $miRect.Bottom
Next
MsgBox($MB_SYSTEMMODAL, "", $strRectInfo)

;------------------------------ This is a COM Error handler --------------------------------
Func MyErrFunc()
  $HexNumber = Hex($oMyError.number, 8)
  MsgBox(0, "COM Error Test", "We intercepted a COM Error !" & @CRLF & @CRLF & _
      "err.description is: " & @TAB & $oMyError.description & @CRLF & _
      "err.windescription:" & @TAB & $oMyError.windescription & @CRLF & _
      "err.number is: " & @TAB & $HexNumber & @CRLF & _
      "err.lastdllerror is: " & @TAB & $oMyError.lastdllerror & @CRLF & _
      "err.scriptline is: " & @TAB & $oMyError.scriptline & @CRLF & _
      "err.source is: " & @TAB & $oMyError.source & @CRLF & _
      "err.helpfile is: " & @TAB & $oMyError.helpfile & @CRLF & _
      "err.helpcontext is: " & @TAB & $oMyError.helpcontext _
      )
  SetError(1) ; to check for after this function returns
EndFunc   ;==>MyErrFunc

 

Link to comment
Share on other sites

Create an account or sign in to comment

You need to be a member in order to leave a comment

Create an account

Sign up for a new account in our community. It's easy!

Register a new account

Sign in

Already have an account? Sign in here.

Sign In Now
 Share

  • Recently Browsing   0 members

    • No registered users viewing this page.
×
×
  • Create New...