#!/usr/bin/env python # -*- coding: UTF-8 no BOM -*- import os,re,sys,string,fnmatch,math,random,numpy as np from optparse import OptionParser import damask scriptID = '$Id$' scriptName = os.path.splitext(scriptID.split()[1])[0] # -------------------------------------------------------------------- # MAIN # -------------------------------------------------------------------- parser = OptionParser(option_class=damask.extendableOption, usage='%prog options [file[s]]', description = """ Filter rows according to condition and columns by either white or black listing. Examples: Every odd row if x coordinate is positive -- " #ip.x# >= 0.0 and #_row_#%2 == 1 ). All rows where label 'foo' equals 'bar' -- " #foo# == \"bar\" " """, version = scriptID) parser.add_option('-w','--white', dest = 'whitelist', action = 'extend', metavar = '', help = 'whitelist of column labels (a,b,c,...)') parser.add_option('-b','--black', dest = 'blacklist', action = 'extend', metavar='', help = 'blacklist of column labels (a,b,c,...)') parser.add_option('-c','--condition', dest = 'condition', metavar='string', help = 'condition to filter rows') parser.set_defaults(condition = '', ) (options,filenames) = parser.parse_args() # --- loop over input files ------------------------------------------------------------------------- if filenames == []: filenames = ['STDIN'] for name in filenames: if not (name == 'STDIN' or os.path.exists(name)): continue table = damask.ASCIItable(name = name, outname = name+'_tmp', buffered = False) table.croak('\033[1m'+scriptName+'\033[0m'+(': '+name if name != 'STDIN' else '')) # ------------------------------------------ assemble info --------------------------------------- table.head_read() table.info_append(scriptID + '\t' + ' '.join(sys.argv[1:])) # read ASCII header info # ------------------------------------------ process data --------------------------------------- specials = { \ '_row_': 0, } labels = [] positions = [] for position,label in enumerate(table.labels): if (options.whitelist == None or any([ position in table.label_indexrange(needle) \ or fnmatch.fnmatch(label,needle) for needle in options.whitelist])) \ and (options.blacklist == None or not any([ position in table.label_indexrange(needle) \ or fnmatch.fnmatch(label,needle) for needle in options.blacklist])): # a label to keep? labels.append(label) # remember name... positions.append(position) # ...and position if len(labels) > 0 and options.whitelist != None and options.blacklist == None: # check whether reordering is possible position = np.zeros(len(labels)) for i,label in enumerate(labels): # check each selected label match = [ positions[i] in table.label_indexrange(needle) \ or fnmatch.fnmatch(label,needle) for needle in options.whitelist] # which whitelist items do match it position[i] = match.index(True) if np.sum(match) == 1 else -1 # unique match --> store which sorted = np.lexsort((labels,position)) order = range(len(labels)) if sorted[0] < 0 else sorted # skip reordering if non-unique, i.e. first sorted is "-1" else: order = range(len(labels)) # maintain original order of labels interpolator = [] condition = options.condition # copy per file, might be altered for position,operand in enumerate(set(re.findall(r'#(([s]#)?(.+?))#',condition))): # find three groups condition = condition.replace('#'+operand[0]+'#', { '': '{%i}'%position, 's#':'"{%i}"'%position}[operand[1]]) if operand[2] in specials: # special label ? interpolator += ['specials["%s"]'%operand[2]] else: try: interpolator += ['%s(table.data[%i])'%({ '':'float', 's#':'str'}[operand[1]], table.labels.index(operand[2]))] except: parser.error('column %s not found...\n'%operand[2]) evaluator = "'" + condition + "'.format(" + ','.join(interpolator) + ")" # ------------------------------------------ assemble header --------------------------------------- table.labels_clear() table.labels_append(np.array(labels)[order]) # update with new label set table.head_write() # ------------------------------------------ process and output data ------------------------------------------ positions = np.array(positions)[order] outputAlive = True while outputAlive and table.data_read(): # read next data line of ASCII table specials['_row_'] += 1 # count row if condition == '' or eval(eval(evaluator)): # valid row ? table.data = [table.data[position] for position in positions] # retain filtered columns outputAlive = table.data_write() # output processed line # ------------------------------------------ finalize output ----------------------------------------- table.close() # close input ASCII table (works for stdin) if name != 'STDIN': os.rename(name+'_tmp',name) # overwrite old one with tmp new