#include <ButtonConstants.au3> #include <EditConstants.au3> #include <GUIConstantsEx.au3> #include <ProgressConstants.au3> #include <StaticConstants.au3> #include <WindowsConstants.au3> #include <SQLite.au3> #include <SQLite.dll.au3> #Include <Date.au3> #Region ### START Koda GUI section ### Form= $Form1 = GUICreate("Parts of Speech Database Generator", 455, 197, 192, 124) $Progress1 = GUICtrlCreateProgress(8, 168, 438, 17) $Label1 = GUICtrlCreateLabel("Lines Processed:", 168, 144, 85, 17) $Label2 = GUICtrlCreateLabel("12345", 264, 144, 84, 17) $Input1 = GUICtrlCreateInput("Input1", 64, 8, 361, 21) $Label3 = GUICtrlCreateLabel("Line:", 32, 8, 27, 17) $Label4 = GUICtrlCreateLabel("Phrase Size:", 32, 56, 63, 17) $Label5 = GUICtrlCreateLabel("Homonyms:", 36, 72, 59, 17) $Label6 = GUICtrlCreateLabel("Word or Phrase:", 14, 40, 81, 17) $Button1 = GUICtrlCreateButton("Pause", 152, 104, 75, 25, 0) $Button2 = GUICtrlCreateButton("Resume", 232, 104, 75, 25, 0) $Label7 = GUICtrlCreateLabel("Label7", 99, 40, 36, 17) $Label8 = GUICtrlCreateLabel("Label8", 99, 56, 36, 17) $Label9 = GUICtrlCreateLabel("Label9", 99, 72, 36, 17) $Label10 = GUICtrlCreateLabel("HomonymID:", 199, 40, 65, 17) $Label11 = GUICtrlCreateLabel("Part Of Speech:", 184, 71, 80, 17) $Label12 = GUICtrlCreateLabel("Label12", 272, 40, 42, 17) $Label13 = GUICtrlCreateLabel("Label13", 272, 71, 42, 17) $Label_timeRemaining = GuiCtrlCreateLabel("", 10, 134, 105, 27) GUISetState(@SW_SHOW) #EndRegion ### END Koda GUI section ### Local $Secs, $Mins, $Hour ;Load Text File $file = FileOpen("part-of-speech.txt", 0) If $file = -1 Then MsgBox(0, "Error", "Unable to open file.") Exit EndIf MsgBox(0,"File Loaded", "Loading Complete") ;LoadDataBase _SQLite_Startup() $database =_SQLite_Open("POSDatabase.db") ;$CurrentPos = 1;Dont need this $Str = FileRead($file) $aFile = StringSplit($Str,@crlf,1) $str ="" _SQLite_Exec($database,"Begin") $qty = Ubound($aFile) -1 $iTimer = TimerInit() For $x = 1 to $qty;295172 Step 1 $nMsg = GUIGetMsg() Switch $nMsg Case $GUI_EVENT_CLOSE _SQLite_Exec($database,"End") _SQLite_Close() _SQLite_Shutdown() Exit Case $Button1 Pause() EndSwitch ProcessLine($aFile[$x]) ;ProcessLine($CurrentPos) ;$CurrentPos +=1;Dont need this GUICtrlSetData($Progress1, Round(($x/$qty)*100)+1) GuiCtrlSetData($Label2, $x & "/" & $qty) If StringRight($x,3) = "000" then;Every 1000 entries make it save the journal file $Diff = Int(TimerDiff($iTimer)) $TicksRemaining = (($qty - $x) / 1000 ) * $Diff _TicksToTime($TicksRemaining, $Hour, $Mins, $Secs) GuiCtrlSetData($Label_timeRemaining,"Estimated remaining " & StringFormat("%02i:%02i:%02i", $Hour, $Mins, $Secs)) _SQLite_Exec($database,"End") _SQLite_Exec($database,"Begin") $iTimer = TImerInit() EndIf Next _SQLite_Exec($database,"End") MsgBox(0, "Wow", "All Done!") _SQLite_Close() _SQLite_Shutdown() Exit Func ProcessLine($line);line is now already text rather than a line position $position ;$line = FileReadLine($file, $position) $a = StringRegExp($line, "[\w!'-.]+", 3) $b = UBound($a)-1 $word = $a[0] For $i = 1 to $b-1 step 1 $word = $word & " " & $a[$i] Next ;Get an array of all parts of speech, even if only 1 $MeaningsArray = StringSplit($a[$b], "") $NumMeanings = UBound($MeaningsArray)-1 $HomonymID = $b For $loops = 1 to $NumMeanings Step 1 $currentMeaning = $MeaningsArray[$loops] GUICtrlSetData($Label12, $loops) GUICtrlSetData($Label13, $currentMeaning) _SQLite_Exec($database,'INSERT INTO PartsOfSpeech (HomonymID,NumMeanings,PartOfSpeech,PhraseSize,Word) VALUES ("'&$loops&'","'&$NumMeanings&'","'&$currentMeaning&'","'&$b&'","'&$word&'");') Next GUICtrlSetData($Input1, $line) GUICtrlSetData($Label7, $word) GUICtrlSetData($Label8, $b) GUICtrlSetData($Label9, $NumMeanings) EndFunc Func Pause() _SQLite_Exec($database,"End") While 1 $nMsg = GUIGetMsg() Switch $nMsg Case $Button2 ExitLoop Case $GUI_EVENT_CLOSE _SQLite_Exec($database,"End") _SQLite_Close() _SQLite_Shutdown() Exit EndSwitch WEnd _SQLite_Exec($database,"Begin") EndFunc
You can find parts-of-speech.txt here, and just rename POSDatabase.txt to POSDatabase.db and you're set.
POSDatabase.txt 35K
273 downloadsUpdated with the faster, time estimate version by ChrisL. Thank you!
Edited by JRowe, 04 October 2009 - 07:04 AM.






