Jump to content

Parts of Speech Database generator


JRowe
 Share

Recommended Posts

It's not incredibly fast, but it's effective, and shows how you can parse a text file and turn it into a database.

#include <ButtonConstants.au3>
#include <EditConstants.au3>
#include <GUIConstantsEx.au3>
#include <ProgressConstants.au3>
#include <StaticConstants.au3>
#include <WindowsConstants.au3>
#include <SQLite.au3>
#include <SQLite.dll.au3>
#Include <Date.au3>

#Region ### START Koda GUI section ### Form=
$Form1 = GUICreate("Parts of Speech Database Generator", 455, 197, 192, 124)
$Progress1 = GUICtrlCreateProgress(8, 168, 438, 17)
$Label1 = GUICtrlCreateLabel("Lines Processed:", 168, 144, 85, 17)
$Label2 = GUICtrlCreateLabel("12345", 264, 144, 84, 17)
$Input1 = GUICtrlCreateInput("Input1", 64, 8, 361, 21)
$Label3 = GUICtrlCreateLabel("Line:", 32, 8, 27, 17)
$Label4 = GUICtrlCreateLabel("Phrase Size:", 32, 56, 63, 17)
$Label5 = GUICtrlCreateLabel("Homonyms:", 36, 72, 59, 17)
$Label6 = GUICtrlCreateLabel("Word or Phrase:", 14, 40, 81, 17)
$Button1 = GUICtrlCreateButton("Pause", 152, 104, 75, 25, 0)
$Button2 = GUICtrlCreateButton("Resume", 232, 104, 75, 25, 0)
$Label7 = GUICtrlCreateLabel("Label7", 99, 40, 36, 17)
$Label8 = GUICtrlCreateLabel("Label8", 99, 56, 36, 17)
$Label9 = GUICtrlCreateLabel("Label9", 99, 72, 36, 17)
$Label10 = GUICtrlCreateLabel("HomonymID:", 199, 40, 65, 17)
$Label11 = GUICtrlCreateLabel("Part Of Speech:", 184, 71, 80, 17)
$Label12 = GUICtrlCreateLabel("Label12", 272, 40, 42, 17)
$Label13 = GUICtrlCreateLabel("Label13", 272, 71, 42, 17)
$Label_timeRemaining = GuiCtrlCreateLabel("", 10, 134, 105, 27)
GUISetState(@SW_SHOW)
#EndRegion ### END Koda GUI section ###

Local $Secs, $Mins, $Hour

;Load Text File
$file = FileOpen("part-of-speech.txt", 0)
If $file = -1 Then
MsgBox(0, "Error", "Unable to open file.")
Exit
EndIf
MsgBox(0,"File Loaded", "Loading Complete")


;LoadDataBase
_SQLite_Startup()
$database =_SQLite_Open("POSDatabase.db")

;$CurrentPos = 1;Dont need this

$Str = FileRead($file)

$aFile = StringSplit($Str,@crlf,1)

$str =""

_SQLite_Exec($database,"Begin")
$qty = Ubound($aFile) -1
$iTimer = TimerInit()
For $x = 1 to $qty;295172 Step 1
    $nMsg = GUIGetMsg()
    Switch $nMsg
        Case $GUI_EVENT_CLOSE
        _SQLite_Exec($database,"End")
        _SQLite_Close()
        _SQLite_Shutdown()
        Exit
        Case $Button1
        Pause()
    EndSwitch
    ProcessLine($aFile[$x])
;ProcessLine($CurrentPos)
;$CurrentPos +=1;Dont need this
    GUICtrlSetData($Progress1, Round(($x/$qty)*100)+1)
    GuiCtrlSetData($Label2, $x & "/" & $qty)

    If StringRight($x,3) = "000" then;Every 1000 entries make it save the journal file
        $Diff = Int(TimerDiff($iTimer))
        $TicksRemaining = (($qty - $x) / 1000 ) * $Diff
        _TicksToTime($TicksRemaining, $Hour, $Mins, $Secs)
        GuiCtrlSetData($Label_timeRemaining,"Estimated remaining " & StringFormat("%02i:%02i:%02i", $Hour, $Mins, $Secs))
        _SQLite_Exec($database,"End")
        _SQLite_Exec($database,"Begin")
        $iTimer = TImerInit()
    EndIf

Next

_SQLite_Exec($database,"End")
MsgBox(0, "Wow", "All Done!")
_SQLite_Close()
_SQLite_Shutdown()
Exit

Func ProcessLine($line);line is now already text rather than a line position $position
;$line = FileReadLine($file, $position)
    $a = StringRegExp($line, "[\w!'-.]+", 3)
    $b = UBound($a)-1
    $word = $a[0]

    For $i = 1 to $b-1 step 1
        $word = $word & " " & $a[$i]
    Next
;Get an array of all parts of speech, even if only 1
    $MeaningsArray = StringSplit($a[$b], "")
    $NumMeanings = UBound($MeaningsArray)-1
    $HomonymID = $b
    For $loops = 1 to $NumMeanings Step 1
        $currentMeaning = $MeaningsArray[$loops]
        GUICtrlSetData($Label12, $loops)
        GUICtrlSetData($Label13, $currentMeaning)
        _SQLite_Exec($database,'INSERT INTO PartsOfSpeech (HomonymID,NumMeanings,PartOfSpeech,PhraseSize,Word) VALUES ("'&$loops&'","'&$NumMeanings&'","'&$currentMeaning&'","'&$b&'","'&$word&'");')
    Next
    GUICtrlSetData($Input1, $line)
    GUICtrlSetData($Label7, $word)
    GUICtrlSetData($Label8, $b)
    GUICtrlSetData($Label9, $NumMeanings)
EndFunc

Func Pause()
    _SQLite_Exec($database,"End")
    While 1
        $nMsg = GUIGetMsg()
        Switch $nMsg
            Case $Button2
            ExitLoop
            Case $GUI_EVENT_CLOSE
            _SQLite_Exec($database,"End")
            _SQLite_Close()
            _SQLite_Shutdown()
            Exit
        EndSwitch
    WEnd
    _SQLite_Exec($database,"Begin")
EndFunc

You can find parts-of-speech.txt here, and just rename POSDatabase.txt to POSDatabase.db and you're set.

POSDatabase.txt

Updated with the faster, time estimate version by ChrisL. Thank you!

Edited by JRowe
Link to comment
Share on other sites

What is the version of AutoIT this works for and does it require any include files? I had a few errors:

C:\PROGRA~1\AutoIt3\Include\ProgressConstants.au3(16,39) : WARNING: $WM_USER: possibly used before declaration.
Global Const $PBM_SETRANGE = $WM_USER +
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^
C:\PROGRA~1\AutoIt3\Include\ProgressConstants.au3(26,47) : WARNING: $CCM_SETBKCOLOR: possibly used before declaration.
Global Const $PBM_SETBKCOLOR = $CCM_SETBKCOLOR
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^
C:\Junk\PartsofSpeech\PartsofSpeech.au3 - 0 error(s), 2 warning(s)
->12:55:29 AU3Check ended.rc:1
>Running:(3.2.10.0):C:\Program Files\AutoIt3\autoit3.exe "C:\Junk\PartsofSpeech\PartsofSpeech.au3"  
C:\PROGRA~1\AutoIt3\Include\ProgressConstants.au3 (16) : ==> Variable used without being declared.: 
Global Const $PBM_SETRANGE = $WM_USER + 1 
Global Const $PBM_SETRANGE = ^ ERROR
Edited by Yorn
Link to comment
Share on other sites

Create an account or sign in to comment

You need to be a member in order to leave a comment

Create an account

Sign up for a new account in our community. It's easy!

Register a new account

Sign in

Already have an account? Sign in here.

Sign In Now
 Share

  • Recently Browsing   0 members

    • No registered users viewing this page.
×
×
  • Create New...