logo78 Posted March 20, 2010 Posted March 20, 2010 Hi,i get frequently several really huge csv files (logs), where i need an specific range of lines (related to the timestamp)Just an example with a few lines'17.03.2010 16:03:59'; '1'; '5'; '7'; '128'; '0'; '207900'; '526'; '0'; '0'; '0'; '48'; '2'; '2'; '2'; '5'; '0'; '0'; '17'; ; '225'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '536'; '0'; '536'; '0'; '536'; '0'; '27'; '27'; '64'; '0'; '16'; '0'; '0'; '0'; '0'; '0'; '1'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '1'; '0'; '0'; '0'; '0'; '0'; '255'; '1'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '7'; '0'; '1'; '0'; '7'; '0'; '1'; '0'; '0'; '0'; '1'; ; ; '507'; '508'; '509'; '510'; '511'; '512'; '513'; '514'; '515'; '516'; '517'; '518'; '519'; '520'; '521'; '522'; '523'; '524'; '525'; '526'; '527'; '528'; '529'; '530'; '531'; '532'; '533'; '534'; '535'; '536'; '537'; '0'; '0'; '0'; ; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '8'; '0'; '0'; '0'; '0'; '0'; '128'; '506'; '536'; '536'; '0'; '0'; '0'; '37'; '2'; '1'; '1'; '1'; '0'; '2'; '1'; '1'; '521'; '0'; '0'; '0'; '410'; '1'; '506'; '256'; ; '98'; '0'; '410'; '512'; '506'; '512'; '98' '17.03.2010 16:03:59'; '1'; '5'; '7'; '128'; '0'; '208987'; '526'; '0'; '0'; '0'; '48'; '2'; '2'; '2'; '5'; '0'; '0'; '17'; ; '225'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '536'; '0'; '536'; '0'; '536'; '0'; '27'; '27'; '65'; '0'; '16'; '0'; '0'; '0'; '0'; '0'; '1'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '1'; '0'; '0'; '0'; '0'; '0'; '255'; '1'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '7'; '0'; '1'; '0'; '7'; '0'; '1'; '0'; '0'; '0'; '1'; ; ; '507'; '508'; '509'; '510'; '511'; '512'; '513'; '514'; '515'; '516'; '517'; '518'; '519'; '520'; '521'; '522'; '523'; '524'; '525'; '526'; '527'; '528'; '529'; '530'; '531'; '532'; '533'; '534'; '535'; '536'; '537'; '0'; '0'; '0'; ; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '8'; '0'; '0'; '0'; '0'; '0'; '128'; '506'; '536'; '536'; '0'; '0'; '0'; '37'; '2'; '1'; '1'; '1'; '0'; '2'; '1'; '1'; '521'; '0'; '0'; '0'; '410'; '1'; '506'; '256'; ; '98'; '0'; '410'; '512'; '506'; '512'; '98' '17.03.2010 16:04:00'; '1'; '5'; '6'; '128'; '0'; '210541'; '527'; '0'; '0'; '0'; '48'; '2'; '2'; '2'; '5'; '0'; '0'; '17'; ; '225'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '536'; '0'; '536'; '0'; '536'; '0'; '27'; '27'; '66'; '1'; '16'; '0'; '0'; '0'; '0'; '0'; '1'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '1'; '0'; '0'; '0'; '0'; '0'; '255'; '1'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '7'; '0'; '1'; '0'; '7'; '0'; '1'; '0'; '0'; '0'; '1'; ; ; '507'; '508'; '509'; '510'; '511'; '512'; '513'; '514'; '515'; '516'; '517'; '518'; '519'; '520'; '521'; '522'; '523'; '524'; '525'; '526'; '527'; '528'; '529'; '530'; '531'; '532'; '533'; '534'; '535'; '536'; '537'; '0'; '0'; '0'; ; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '8'; '0'; '0'; '0'; '0'; '0'; '128'; '506'; '536'; '536'; '0'; '0'; '0'; '37'; '2'; '1'; '1'; '1'; '0'; '2'; '1'; '1'; '521'; '0'; '0'; '0'; '410'; '1'; '506'; '256'; ; '98'; '0'; '410'; '512'; '506'; '512'; '98' '17.03.2010 16:04:01'; '1'; '5'; '6'; '128'; '0'; '212080'; '527'; '0'; '0'; '0'; '48'; '2'; '2'; '2'; '5'; '0'; '0'; '17'; ; '225'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '536'; '0'; '536'; '0'; '536'; '0'; '27'; '27'; '66'; '0'; '16'; '0'; '0'; '0'; '0'; '0'; '1'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '1'; '0'; '0'; '0'; '0'; '0'; '255'; '1'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '7'; '0'; '1'; '0'; '7'; '0'; '1'; '0'; '0'; '0'; '1'; ; ; '507'; '508'; '509'; '510'; '511'; '512'; '513'; '514'; '515'; '516'; '517'; '518'; '519'; '520'; '521'; '522'; '523'; '524'; '525'; '526'; '527'; '528'; '529'; '530'; '531'; '532'; '533'; '534'; '535'; '536'; '537'; '0'; '0'; '0'; ; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '8'; '0'; '0'; '0'; '0'; '0'; '128'; '506'; '536'; '536'; '0'; '0'; '0'; '37'; '2'; '1'; '1'; '1'; '0'; '2'; '1'; '1'; '521'; '0'; '0'; '0'; '410'; '1'; '506'; '256'; ; '98'; '0'; '410'; '512'; '506'; '512'; '98'A loop with FileReadLines is working in the beginning of the file (eg. 1000 to 2000) pretty fast.But if i need a range from 10000-12000 for example, it takes an amazing time, though im working with filehandles of course.I read the help file, either i understand the performance problem, the script has to the count on every loop to the first line (10000).My question would be:If you had the same issue, how would u handle it? May be any solution with external programs? Could be also a commercial one.There a not much possibilities. I could split the files, but then its just easier to handle, either i have search for the requested range - not a real help actually.As Editor is the best choose HiEditor.until nearer to the time, i have no other option to do it manually. A really silly job.
ajit Posted March 20, 2010 Posted March 20, 2010 @logo78 Have you tried FileReadLine without using the optional "line" parameter. From Help File "From a performance standpoint it is a bad idea to read line by line specifying "line" parameter whose value is incrementing by one. This forces AutoIt to reread the file from the beginning until it reach the specified line." Regards Ajit
logo78 Posted March 20, 2010 Author Posted March 20, 2010 @ajit, yup, i'm sure thats the problem. but i dont see an other chance for now. I have no other idea. Func Extractor() $Start = GUICtrlRead($I_Start) $Count = GUICtrlRead($I_Count) $DateiSplit = _PathSplit($Datei, $szDrive, $szDir, $szFName, $szExt) $Datei_Ziel = $DateiSplit[1] & $DateiSplit[2] & $DateiSplit[3] & "_Extracted" & $DateiSplit[4] $Quelle = FileOpen($Datei, 0) $Ziel = FileOpen($Datei_Ziel, 2) For $i = 0 To $Count $line = FileReadLine($Quelle, $Start + $i) FileWriteLine($Ziel, $line) Next FileClose($Quelle) FileClose($Ziel) EndFunc ;==>Extractor
Moderators Melba23 Posted March 20, 2010 Moderators Posted March 20, 2010 logo78,Sounds like you are defining the line every time! That would make it glacial as Autoit has to read through the file each time you call the function! It should be much faster if you do something like this asn let AutoIt worry about the line number after you have intialised the count:; Open file $hFile = FileOpen("Your_File_Name", 0) ; Read first line required $sText = FileReadLine($hFile, 1000) & @CRLF ; Now loop to read the required number of lines For $i = 1 To 1000 $sText &= FileReadLine($hFile) & @CRLF Next ; Close file FileClose($hFile) MsgBox(0, "Show", $sText)I have tried on smaller (6000 line) files and I do not get the massive performance hit you mention. The further you go in at the start the longer the first FileReadLine takes, because AutoIt has to get to the defined line initially, but it then runs just as fast each time. M23  Any of my own code posted anywhere on the forum is available for use by others without any restriction of any kind Open spoiler to see my UDFs: Spoiler ArrayMultiColSort ---- Sort arrays on multiple columnsChooseFileFolder ---- Single and multiple selections from specified path treeview listingDate_Time_Convert -- Easily convert date/time formats, including the language usedExtMsgBox --------- A highly customisable replacement for MsgBoxGUIExtender -------- Extend and retract multiple sections within a GUIGUIFrame ---------- Subdivide GUIs into many adjustable framesGUIListViewEx ------- Insert, delete, move, drag, sort, edit and colour ListView itemsGUITreeViewEx ------ Check/clear parent and child checkboxes in a TreeViewMarquee ----------- Scrolling tickertape GUIsNoFocusLines ------- Remove the dotted focus lines from buttons, sliders, radios and checkboxesNotify ------------- Small notifications on the edge of the displayScrollbars ----------Automatically sized scrollbars with a single commandStringSize ---------- Automatically size controls to fit textToast -------------- Small GUIs which pop out of the notification area Â
l3ill Posted March 20, 2010 Posted March 20, 2010 Or possibly import into excel and @ the import selection tell excel "where" to put "what" and then read the row or column you identified. just a suggestion... My Contributions... SnippetBrowser NewSciTE PathFinder Text File Manipulation FTP Connection Tester / INI File - Read, Write, Save & Load Example    Â
logo78 Posted March 20, 2010 Author Posted March 20, 2010 (edited) @Melba23, ohh, come on. At first, i didnt believe ya, because it couldnt be such simple Just tried, working like a charm. Thank u very much!! The solution was so close. I checked the help file again; and ur right. its described there, probably i misunderstood.If no line number to read is given, the "next" line will be read. ("Next" for a newly opened file is initially the first line.) I didnt get, that FileReadLie is reading the next line after initialization. Is the LineFeed (@CRLF) on the end necessary? @Billo, unfortunately, Excel is bad idea with large files and wont work. btw: Thx for the community for such a fast solution. What a crap, that i didnt register my acc here earlier. I am shocked.(/me is a silent reader) Edited March 20, 2010 by logo78
jchd Posted March 20, 2010 Posted March 20, 2010 (edited) The improvement in speed in from an operation in O(N2) to the same operation in O(N). When you give it a line number K, it will have to re-read K lines. As you might recall or learn, this sums up this way, writing twice all the number of lines read (once from 1 to N, once from N to 1):1 N = N + 12 N-1 = N + 13 N-2 = N + 1. .. .. .N-1 2 = N + 1N 1 = N + 1--------------------N lines * (N+1) = N2 + N operations.So you were "just" doing N2 undue reads. With N ≈ 10000, you were doing something like 100 millions useless readline ops. Not very surprising hat it took ages.Edit:While typing, I was so focused on trying to avoid the post reformatted (blanks get easily compressed, suppressing any attempt to align things) that I forgot the 1/2 factor. We summed the line count _twice_, so the actual number is the well know formula N(N+1)/2. Anyway, 50million read operations instead of 10000 makes the difference you experienced. Edited March 20, 2010 by jchd This wonderful site allows debugging and testing regular expressions (many flavors available). An absolute must have in your bookmarks.Another excellent RegExp tutorial. Don't forget downloading your copy of up-to-date pcretest.exe and pcregrep.exe hereRegExp tutorial: enough to get startedPCRE v8.33 regexp documentation latest available release and currently implemented in AutoIt beta. SQLitespeed is another feature-rich premier SQLite manager (includes import/export). Well worth a try.SQLite Expert (freeware Personal Edition or payware Pro version) is a very useful SQLite database manager.An excellent eBook covering almost every aspect of SQLite3: a must-read for anyone doing serious work.SQL tutorial (covers "generic" SQL, but most of it applies to SQLite as well)A work-in-progress SQLite3 tutorial. Don't miss other LxyzTHW pages!SQLite official website with full documentation (may be newer than the SQLite library that comes standard with AutoIt)
logo78 Posted March 20, 2010 Author Posted March 20, 2010 @jchd, ur detailed execution, explains why it takes (u see in the progress bar) from line to line more time. What a good feeling, that it works now. Another question: For now, i am reading from a static linenumber a specific amount of additional lines. how i could handle it, to read the line between an appointed timestamp. '17.03.2010 16:03:59' .............. ... ... '17.03.2010 16:27:33' .............. I have to search for the 'begin' time string, for the 'ending' time string and read their linenumbers. But how can i get from specific found string the corresponding line number?
jchd Posted March 20, 2010 Posted March 20, 2010 Unfortunately, the issuer of your data had the wrong idea to use a _display_ format to write dates. Hence you need to run loops at your side to get the timestamps into the correct ISO format. That's a good example of why a display format for dates isn't the smatest thing after slice bread: you can't compare (or sort) them in such format. You can do like this: Local $line = "'17.03.2010 16:03:59'; '1'; '5'; '7'; '128'; '0'; '207900'; '526'; '0'; '0'; '0'; '48'; '2'; '2'; '2'; '5'; '0'; '0'; '17'; ; '225'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '536'; '0'; '536'; '0'; '536'; '0'; '27'; '27'; '64'; '0'; '16'; '0'; '0'; '0'; '0'; '0'; '1'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '1'; '0'; '0'; '0'; '0'; '0'; '255'; '1'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '7'; '0'; '1'; '0'; '7'; '0'; '1'; '0'; '0'; '0'; '1'; ; ; '507'; '508'; '509'; '510'; '511'; '512'; '513'; '514'; '515'; '516'; '517'; '518'; '519'; '520'; '521'; '522'; '523'; '524'; '525'; '526'; '527'; '528'; '529'; '530'; '531'; '532'; '533'; '534'; '535'; '536'; '537'; '0'; '0'; '0'; ; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '0'; '8'; '0'; '0'; '0'; '0'; '0'; '128'; '506'; '536'; '536'; '0'; '0'; '0'; '37'; '2'; '1'; '1'; '1'; '0'; '2'; '1'; '1'; '521'; '0'; '0'; '0'; '410'; '1'; '506'; '256'; ; '98'; '0'; '410'; '512'; '506'; '512'; '98'" $line = StringRegExpReplace($line, "(?:')(\d{2})(?:\.)(\d{2})(?:\.)(\d{4})(.*)", "'$3.$2.$1$4") ConsoleWrite($line & @LF) If I were you, I'd apply this transform blindly to every input file and rewrite them with, say another extension or a change in the name. Once that made, it would be much easier to scan your files for a timestamp inside a range: ... $starttime = "'2010.03.18" ; note the single quote as first character! $stoptime = "'2010.03.19 20:07" ; note the single quote as first character! $line = '' While $line < $starttime $line = FileReadLine($handle) Switch @error Case -1 ExitLoop Case 1 MsgBox("Error occured reading file.....") Exit EndSwitch Wend If @error Then MsgBox("Start time not found....") Else ;; process lines within time range Do _ProcessLine($line) $line = FileReadLine($handle) Switch @error Case -1 ExitLoop Case 1 MsgBox("Error occured reading file.....") Exit EndSwitch Until $line > $stoptime EndIf ... Of course this assumes the records are initially written as increasing time in you input files. This wonderful site allows debugging and testing regular expressions (many flavors available). An absolute must have in your bookmarks.Another excellent RegExp tutorial. Don't forget downloading your copy of up-to-date pcretest.exe and pcregrep.exe hereRegExp tutorial: enough to get startedPCRE v8.33 regexp documentation latest available release and currently implemented in AutoIt beta. SQLitespeed is another feature-rich premier SQLite manager (includes import/export). Well worth a try.SQLite Expert (freeware Personal Edition or payware Pro version) is a very useful SQLite database manager.An excellent eBook covering almost every aspect of SQLite3: a must-read for anyone doing serious work.SQL tutorial (covers "generic" SQL, but most of it applies to SQLite as well)A work-in-progress SQLite3 tutorial. Don't miss other LxyzTHW pages!SQLite official website with full documentation (may be newer than the SQLite library that comes standard with AutoIt)
MvGulik Posted March 20, 2010 Posted March 20, 2010 (edited) whatever Edited February 7, 2011 by MvGulik "Straight_and_Crooked_Thinking" : A "classic guide to ferreting out untruths, half-truths, and other distortions of facts in political and social discussions.""The Secrets of Quantum Physics" : New and excellent 2 part documentary on Quantum Physics by Jim Al-Khalili. (Dec 2014) "Believing what you know ain't so" ... Knock Knock ...Â
jchd Posted March 20, 2010 Posted March 20, 2010 That's possible but since the file looks like a log file, it isn't obvious that log line occur at regular interval. It could also be possible to FilePos following a dicotomic algorithm, even if the lines don't have a fixed format, but is the complexity really worth it for 10-12 or even 20K line files in the hands of a newcomer to AutoIt? If he find the result to be a real bottleneck, then it's always time to dig further. This wonderful site allows debugging and testing regular expressions (many flavors available). An absolute must have in your bookmarks.Another excellent RegExp tutorial. Don't forget downloading your copy of up-to-date pcretest.exe and pcregrep.exe hereRegExp tutorial: enough to get startedPCRE v8.33 regexp documentation latest available release and currently implemented in AutoIt beta. SQLitespeed is another feature-rich premier SQLite manager (includes import/export). Well worth a try.SQLite Expert (freeware Personal Edition or payware Pro version) is a very useful SQLite database manager.An excellent eBook covering almost every aspect of SQLite3: a must-read for anyone doing serious work.SQL tutorial (covers "generic" SQL, but most of it applies to SQLite as well)A work-in-progress SQLite3 tutorial. Don't miss other LxyzTHW pages!SQLite official website with full documentation (may be newer than the SQLite library that comes standard with AutoIt)
KaFu Posted March 21, 2010 Posted March 21, 2010 How about parsing the csv file to a SQLite database and let it do all the performance related stuff? Reading records 12-20k in such a db is mere a case of some few seconds, if not only 1 to 2. Â OS: Win10-22H2 - 64bit - German, AutoIt Version: 3.3.16.1, AutoIt Editor: SciTE, Website: https://funk.eu AMT - Auto-Movie-Thumbnailer (2024-Oct-13)Â BIC - Batch-Image-Cropper (2023-Apr-01) COP - Color Picker (2009-May-21) DCS - Dynamic Cursor Selector (2024-Oct-13) HMW - Hide my Windows (2024-Oct-19) HRC - HotKey Resolution Changer (2012-May-16)Â ICU - Icon Configuration Utility (2018-Sep-16) SMF - Search my Files (2024-Oct-20) - THE file info and duplicates search tool SSD - Set Sound Device (2017-Sep-16)
jchd Posted March 21, 2010 Posted March 21, 2010 I was refraining to put that solution on the scene (you know I'm a SQLite-maniac)! Maybe a bit too much for a beginner? Anyway it depends on the context. If it's a one-time lookup then it isn't worth it, but if there are frequent queries, then it makes full sense (and offers a myriad of more complex queries for almost free, if needed). This wonderful site allows debugging and testing regular expressions (many flavors available). An absolute must have in your bookmarks.Another excellent RegExp tutorial. Don't forget downloading your copy of up-to-date pcretest.exe and pcregrep.exe hereRegExp tutorial: enough to get startedPCRE v8.33 regexp documentation latest available release and currently implemented in AutoIt beta. SQLitespeed is another feature-rich premier SQLite manager (includes import/export). Well worth a try.SQLite Expert (freeware Personal Edition or payware Pro version) is a very useful SQLite database manager.An excellent eBook covering almost every aspect of SQLite3: a must-read for anyone doing serious work.SQL tutorial (covers "generic" SQL, but most of it applies to SQLite as well)A work-in-progress SQLite3 tutorial. Don't miss other LxyzTHW pages!SQLite official website with full documentation (may be newer than the SQLite library that comes standard with AutoIt)
KaFu Posted March 21, 2010 Posted March 21, 2010 you know I'm a SQLite-maniacI use'em all over the place ... thanks to some people wrapping it all into a neat UDF ...Anyway it depends on the context. If it's a one-time lookup then it isn't worth it, but if there are frequent queries, then it makes full sense (and offers a myriad of more complex queries for almost free, if needed).Yep, you're right. CSV analysis sounds like a quiet repetitive task to me, and for a one time analysis I would stick to Excel ... Â OS: Win10-22H2 - 64bit - German, AutoIt Version: 3.3.16.1, AutoIt Editor: SciTE, Website: https://funk.eu AMT - Auto-Movie-Thumbnailer (2024-Oct-13)Â BIC - Batch-Image-Cropper (2023-Apr-01) COP - Color Picker (2009-May-21) DCS - Dynamic Cursor Selector (2024-Oct-13) HMW - Hide my Windows (2024-Oct-19) HRC - HotKey Resolution Changer (2012-May-16)Â ICU - Icon Configuration Utility (2018-Sep-16) SMF - Search my Files (2024-Oct-20) - THE file info and duplicates search tool SSD - Set Sound Device (2017-Sep-16)
MvGulik Posted March 21, 2010 Posted March 21, 2010 (edited) whatever Edited February 7, 2011 by MvGulik "Straight_and_Crooked_Thinking" : A "classic guide to ferreting out untruths, half-truths, and other distortions of facts in political and social discussions.""The Secrets of Quantum Physics" : New and excellent 2 part documentary on Quantum Physics by Jim Al-Khalili. (Dec 2014) "Believing what you know ain't so" ... Knock Knock ...Â
logo78 Posted March 21, 2010 Author Posted March 21, 2010 (edited) Unfortunately, the issuer of your data had the wrong idea to use a _display_ format to write dates. Hence you need to run loops at your side to get the timestamps into the correct ISO format. That's a good example of why a display format for dates isn't the smatest thing after slice bread: you can't compare (or sort) them in such format.Actually i am the issuer of the log files. Let me write more details:The log files are from 4 several and similar plants. Unfortunately our opc client industrialdatabridge is incapable to split the CSV or to compress them. For now I have to do it myself, respectively i'll write a routine to stop the service, to take the big files in an other directory, copy empty CSV to the target directory, start the service again and compress the large files casually with 7z. I found a great UDF (7z & MemoryDLL) here The OPC client is connected with one specific PLC (Process logical controller). I am running for each PLC one OPC server & OPC client because of the huge data amounts.Each Plant has about 20 Trolleys, for each of them i am have a specific CSV file (in sum 74). The reason is for troubleshooting (back tracing / reverse engineering). If there was a problem, i am picking up all log files, extracting the designated time (+/- 300secs), importing to Excel and looking backwards, what the problem could be.Every 500ms a new line is written, no matter if new data arrived or not (doesn't happen so often).To transform afterwards would be real mess, because of large files. But i fixed the issue with the time stamp. That was the origin german date & time format. After ur explanation i switched the regional settings in XP. The time stamp will be now logged like u mentioned (in ISO). And with picking up a designated time in the GUI, will hopefully much more comfortable now. The speed of fileReadline is now absolutely sufficient. Just tried to get last 500 from a 800Mbyte file (from line 579xxx to +500). It took about 10sec.But Industrialdatabridge is capable to log in to SQL (MS, MySQL, Oracle). I'll tried, its working. but i choosed the quick CSV way, because i dont have so much experience to handle with SQL-databases and on that time i principally didnt see much more advantages. But after your postings, i thinking about to switch to a real database. In addition, i faced with 2Gbyte CSV limits, with connection limits of the MS Jet OLE CSV driver: max. 4000 connections and...You all baffled me now Edited March 21, 2010 by logo78
Malkey Posted March 21, 2010 Posted March 21, 2010 To search between specified times. In this example, the position of the first occurrence of the "start search" sub-string returned from StringInStr() function is used as an offset from the beginning of the file in the FileSetPos() function in order to set the current file position. So when FileReadLine() is called the line which is read starts at the position in the file set by the FileSetPos() function.expandcollapse popupLocal $sFileName = "Your_File_Name" Local $iSearchStart = "17.03.2010 16:04:50" ;Including this date & time Local $iSearchEnd = "17.03.2010 16:05" ; Excluding this date & time. Local $sRes Local $hFile = FileOpen($sFileName) ; Check if file opened If $hFile = -1 Then MsgBox(0, "Error", "Unable to open file.") Exit EndIf Local $iCharNumStart = StringInStr(FileRead($sFileName), $iSearchStart, 0) - 1 If $iCharNumStart <= 0 Then MsgBox(16, "Error", 'Start date, "' & $iSearchStart & '" string not found in file.') Exit EndIf Local $iCharNumEnd = StringInStr(FileRead($sFileName), $iSearchEnd, 0) If $iCharNumEnd <= 0 Then MsgBox(16, "Error", 'End date, "' & $iSearchEnd & '" string not found in file.') Exit EndIf ;ConsoleWrite($iCharNumStart & " to " & $iCharNumEnd & @CRLF) FileSetPos($hFile, $iCharNumStart, 0) While 1 $line = FileReadLine($hFile) If @error = -1 Or FileGetPos($hFile) >= $iCharNumEnd Then ExitLoop $sRes &= $line & @CRLF ;If FileGetPos($hFile) >= $iCharNumEnd Then ExitLoop ; If this line is used and line #33 edited, then end search date included. WEnd ; Close the handle. FileClose($hFile) ConsoleWrite($sRes & @CRLF)
jchd Posted March 21, 2010 Posted March 21, 2010 Cool, that is the real-life question we all love to find --and answer to the best of our ability-- here! Personally I'm fed up by those "help me bot that game" posts. Honestly, you've made a huge progress in exposing your situation (compare to your first post). We certainly can drive you to a much better solution to your problem. In your case, there is no hesitation: switch to an SQL solution. You'll find help about that here as well since there are a number of pro users of RDBMS using AutoIt. To best help you select a working stable and flexible solution, you have to estimate the volume of your data (see below) the number of users of the database and a retention period. From what I understand there are currently 74 data sources which produce amalgamated two records per second. From your data samples, there's about 600-700 bytes in average ASCII records with 1-char separators. Looking and the content of your sample data, I've found what I interpret (possibly wrongly) as "plant separators": empty fields without delimiters. Therefore I assume each record holds data for every plant and every data source. Those record won't be much larger nor much shorter once stored as DB rows, but let's be safe and assume 1K rows. For the sake of evaluation, lest say you'll have 100 data sources (your company is growing, isn't it?) and a total of 3K/s of record data. 3K/s means 10.8Mb/h, nothing small. Are you producing 24/7? It would greatly help if you could determine what is your needed retention time. As an independant consultant for Y2K potential problems, I've been faced with industrials who were required to store production traces records for as long as 20 years! BTW, I'm proud to have discovered perhaps the only known potentially devastating side-effect of Y2K issue were a protocol-converter box was destroying real-time measurements (directly due to Y2K bug), inducing production of fragilized jet-engine motor blades. Should this have been ignored, then millions of such blades would have reached the market, making flying on commercial aircrafts the same as "russian roulette"... What did I get wrong in the above interpretation/assumptions? This wonderful site allows debugging and testing regular expressions (many flavors available). An absolute must have in your bookmarks.Another excellent RegExp tutorial. Don't forget downloading your copy of up-to-date pcretest.exe and pcregrep.exe hereRegExp tutorial: enough to get startedPCRE v8.33 regexp documentation latest available release and currently implemented in AutoIt beta. SQLitespeed is another feature-rich premier SQLite manager (includes import/export). Well worth a try.SQLite Expert (freeware Personal Edition or payware Pro version) is a very useful SQLite database manager.An excellent eBook covering almost every aspect of SQLite3: a must-read for anyone doing serious work.SQL tutorial (covers "generic" SQL, but most of it applies to SQLite as well)A work-in-progress SQLite3 tutorial. Don't miss other LxyzTHW pages!SQLite official website with full documentation (may be newer than the SQLite library that comes standard with AutoIt)
MvGulik Posted March 21, 2010 Posted March 21, 2010 (edited) whatever Edited February 7, 2011 by MvGulik "Straight_and_Crooked_Thinking" : A "classic guide to ferreting out untruths, half-truths, and other distortions of facts in political and social discussions.""The Secrets of Quantum Physics" : New and excellent 2 part documentary on Quantum Physics by Jim Al-Khalili. (Dec 2014) "Believing what you know ain't so" ... Knock Knock ...Â
jchd Posted March 21, 2010 Posted March 21, 2010 I wish this solution good luck dealing with > 2Gb files! "Breaking water" ? Idiomatic to a frenchy! This wonderful site allows debugging and testing regular expressions (many flavors available). An absolute must have in your bookmarks.Another excellent RegExp tutorial. Don't forget downloading your copy of up-to-date pcretest.exe and pcregrep.exe hereRegExp tutorial: enough to get startedPCRE v8.33 regexp documentation latest available release and currently implemented in AutoIt beta. SQLitespeed is another feature-rich premier SQLite manager (includes import/export). Well worth a try.SQLite Expert (freeware Personal Edition or payware Pro version) is a very useful SQLite database manager.An excellent eBook covering almost every aspect of SQLite3: a must-read for anyone doing serious work.SQL tutorial (covers "generic" SQL, but most of it applies to SQLite as well)A work-in-progress SQLite3 tutorial. Don't miss other LxyzTHW pages!SQLite official website with full documentation (may be newer than the SQLite library that comes standard with AutoIt)
Recommended Posts
Create an account or sign in to comment
You need to be a member in order to leave a comment
Create an account
Sign up for a new account in our community. It's easy!
Register a new accountSign in
Already have an account? Sign in here.
Sign In Now