DAMASK_EICMD/processing/post/filterTable.py

139 lines
6.6 KiB
Python
Executable File

#!/usr/bin/env python
# -*- coding: UTF-8 no BOM -*-
import os,re,sys,fnmatch,math,random
import numpy as np
from optparse import OptionParser
import damask
scriptName = os.path.splitext(os.path.basename(__file__))[0]
scriptID = ' '.join([scriptName,damask.version])
def sortingList(labels,whitelistitems):
indices = []
names = []
for label in labels:
if re.match('^\d+_',label):
indices.append(int(label.split('_',1)[0]))
names.append(label.split('_',1)[1])
else:
indices.append(0)
names.append(label)
return [indices,names,whitelistitems]
# --------------------------------------------------------------------
# MAIN
# --------------------------------------------------------------------
parser = OptionParser(option_class=damask.extendableOption, usage='%prog options [file[s]]', description = """
Filter rows according to condition and columns by either white or black listing.
Examples:
Every odd row if x coordinate is positive -- " #ip.x# >= 0.0 and #_row_#%2 == 1 ).
All rows where label 'foo' equals 'bar' -- " #foo# == \"bar\" "
""", version = scriptID)
parser.add_option('-w','--white',
dest = 'whitelist',
action = 'extend', metavar = '<string LIST>',
help = 'whitelist of column labels (a,b,c,...)')
parser.add_option('-b','--black',
dest = 'blacklist',
action = 'extend', metavar='<string LIST>',
help = 'blacklist of column labels (a,b,c,...)')
parser.add_option('-c','--condition',
dest = 'condition', metavar='string',
help = 'condition to filter rows')
parser.set_defaults(condition = '',
)
(options,filenames) = parser.parse_args()
# --- loop over input files -------------------------------------------------------------------------
if filenames == []: filenames = [None]
for name in filenames:
try:
table = damask.ASCIItable(name = name,
buffered = False)
except: continue
damask.util.report(scriptName,name)
# ------------------------------------------ assemble info ---------------------------------------
table.head_read()
# ------------------------------------------ process data ---------------------------------------
specials = { \
'_row_': 0,
}
labels = []
positions = []
for position,label in enumerate(table.labels):
if (options.whitelist == None or any([ position in table.label_indexrange(needle) \
or fnmatch.fnmatch(label,needle) for needle in options.whitelist])) \
and (options.blacklist == None or not any([ position in table.label_indexrange(needle) \
or fnmatch.fnmatch(label,needle) for needle in options.blacklist])): # a label to keep?
labels.append(label) # remember name...
positions.append(position) # ...and position
if len(labels) > 0 and options.whitelist != None and options.blacklist == None: # check whether reordering is possible
whitelistitem = np.zeros(len(labels),dtype=int)
for i,label in enumerate(labels): # check each selected label
match = [ positions[i] in table.label_indexrange(needle) \
or fnmatch.fnmatch(label,needle) for needle in options.whitelist] # which whitelist items do match it
whitelistitem[i] = match.index(True) if np.sum(match) == 1 else -1 # unique match to a whitelist item --> store which
sorted = np.lexsort(sortingList(labels,whitelistitem))
order = range(len(labels)) if sorted[0] < 0 else sorted # skip reordering if non-unique, i.e. first sorted is "-1"
else:
order = range(len(labels)) # maintain original order of labels
interpolator = []
condition = options.condition # copy per file, might be altered
for position,operand in enumerate(set(re.findall(r'#(([s]#)?(.+?))#',condition))): # find three groups
condition = condition.replace('#'+operand[0]+'#',
{ '': '{%i}'%position,
's#':'"{%i}"'%position}[operand[1]])
if operand[2] in specials: # special label ?
interpolator += ['specials["%s"]'%operand[2]]
else:
try:
interpolator += ['%s(table.data[%i])'%({ '':'float',
's#':'str'}[operand[1]],
table.labels.index(operand[2]))]
except:
parser.error('column %s not found...\n'%operand[2])
evaluator = "'" + condition + "'.format(" + ','.join(interpolator) + ")"
# ------------------------------------------ assemble header ---------------------------------------
table.info_append(scriptID + '\t' + ' '.join(sys.argv[1:])) # read ASCII header info
table.labels_clear()
table.labels_append(np.array(labels)[order]) # update with new label set
table.head_write()
# ------------------------------------------ process and output data ------------------------------------------
positions = np.array(positions)[order]
outputAlive = True
while outputAlive and table.data_read(): # read next data line of ASCII table
specials['_row_'] += 1 # count row
if condition == '' or eval(eval(evaluator)): # valid row ?
table.data = [table.data[position] for position in positions] # retain filtered columns
outputAlive = table.data_write() # output processed line
# ------------------------------------------ finalize output -----------------------------------------
table.close() # close input ASCII table (works for stdin)