# Script to find files fitting into filemask, then in every file find lines with certain beginning, and extract patterned information from those lines # Andriy Anishkin (anishkin@icqmail.com) UMCP #_____________________________ User parameters section #==> Folder with files to process #set folderName {C:/Temp/Cluster/namd_logs/long} # set folderName {C:/Temp/Cluster/namd_logs/water} set folderName {c:/Temp/MscL_c-o-int/closed_g10/test3} #==> FileMask to select proper files set fileMask {*.namd*} #set fileMask {min*.namd4} #==> Option to put <1> results for different files into separate output files or not <0> set separateOutputFiles 1 #==> Mask for output file name (timestamp and, if selected, original filename will be added to it set outputFileName "namd_logs_summary" #==> Format of the line of interest (- to extract information) and create output header for extracted information #format specifiers: # %s String # %d Decimal integer # %f Real number # %e Real number in mantissa-exponent form # %x Hexadecimal # %c Character # %*{s d f e x c} scip extracted value without assigning it to the output # #WallClock: 61.901933 CPUTime: 26.450000 Memory: 20807 kB # set lineFormat "WallClock: %f CPUTime: %f Memory: %*i kB" # set outputHeader "WallClock, s\tCPUTime, s" # TIMING: 400 CPU: 223.53, 0.5588/step Wall: 227.404, 0.566746/step, 0 hours remaining, 85432 kB of memory in use. set lineFormat "TIMING: %i CPU: %f, %f/step Wall: %f, %f/step, %*f hours remaining, %*i kB of memory in use." set outputHeader "TIMING, steps\tCPU Clock, s\tCPU slowness, s/step\tWall clock, s\tWall Clock slowness, s/step" #==> Beginning of the line to find # set lineMarker {WallClock:} # set lineMarker {TIMING:} set lineMarker [lindex $lineFormat 0]; # Autodetect the beginning of the line #__________________________________ Procedures declaration proc unzipTemp {oldFilename {mode unpack}} { # Procedure for temporary unzipping files. # Inputs: # oldFilename - filename of the suggested archive. Procedure checks if this # filename ends with zip. If yes - unpacks files or removes unpacked files # and returnes the list of unpacked files. Otherwise - does nothing. # mode - mode of work: # - unpacks archive and returns filelist # - tries to remove all previously unpacked files - as listed in oldFilename # Output: # newFilenames - list of files unpacked from archive case $mode { unpack { set newFilenamesRaw [] set newFilenames [] set batFile [open "temp.bat" w] puts $batFile "unzip -jo $oldFilename" close $batFile if {[catch {set newFilenamesRaw [exec temp.bat]}]==0} { puts "$mode $oldFilename" set filenameFlag 0 foreach filename $newFilenamesRaw { if {[lindex $filename 0]=={inflating:}} { # Add the next file to the filelist set filenameFlag 1 } elseif {$filenameFlag==1} { # Append this file to filelist lappend newFilenames [lindex $filename 0] set filenameFlag 0 } } } else { set newFilenames $oldFilename } catch {file delete "temp.bat"} return $newFilenames } cleanup { foreach filename $oldFilename { catch {file delete $filename} } } } } #__________________________________ Executable section #Correct Line marker, so that should be at the beginning of the line set lineMarker "^$lineMarker" #Add timestamp to the output file name set timeStamp [clock format [clock seconds] -format "%m-%d-%Y_%H.%M.%S"] set outputFileName [join "$outputFileName _ $timeStamp.txt" {}] #Add File name to the header for multiple files if {!$separateOutputFiles} { set outputHeader "File\t$outputHeader" } #Autodetection of the number of output parameters set outputValuesNumber [expr {[regsub -all {%} $lineFormat {} {}]-[regsub -all {[*]} $lineFormat {} {}]}] set outputValuesArray {} for {set i 0} {$i<$outputValuesNumber} {incr i} { lappend outputValuesArray "outputValue($i)" } #______________________ Make Files List set oldFolder [pwd] cd $folderName set fileList [ls $fileMask] #______________________ Find String in the files and if {!$separateOutputFiles} { #Open file for writing set outputFileID [open $outputFileName w] puts $outputFileID $outputHeader } #Go through the files set fileCounter 0 set selectionsCounter 0 foreach currentFileName $fileList { # Unzip file, if necessary set oldFileName $currentFileName set currentFileName [unzipTemp $oldFileName unpack] if {[file isfile $currentFileName]} { if {$separateOutputFiles} { #Open file for writing set outputFileID [open [join "$currentFileName _ $outputFileName" {}] w] puts $outputFileID $outputHeader } incr fileCounter set selectionsLocalCounter 0 set currentFileID [open $currentFileName r] #Read lines one-by-one and check for the line marker while {[gets $currentFileID lineRecognize] >= 0} { #puts "$currentFileName $lineRecognize $lineMarker" if {[regexp $lineMarker $lineRecognize]} { #Line was found. Extract the information incr selectionsLocalCounter incr selectionsCounter if {!$separateOutputFiles} { set outputLine "$currentFileName\t" } else { set outputLine {} } eval "scan \"$lineRecognize\" \"$lineFormat\" $outputValuesArray" for {set i 0} {$i<$outputValuesNumber} {incr i} { lappend outputLine "$outputValue($i)" } set outputLine [join $outputLine {\t}] eval "puts $outputFileID $outputLine" } } close $currentFileID puts "File '$currentFileName' scanned\t\t$selectionsLocalCounter selections found" if {$separateOutputFiles} { close $outputFileID } } # Remove unzipped files, if necessary if {[string equal -nocase $oldFileName $currentFileName]==0} { puts "CLEANING!" unzipTemp $currentFileName cleanup } } if {!$separateOutputFiles} { close $outputFileID } cd $oldFolder puts "Finished!!!\n$fileCounter files scanned\t\t$selectionsCounter selections found"