DAMASK_EICMD/processing/post/filterTable.py

155 lines
7.9 KiB
Python
Executable File

#!/usr/bin/env python2.7
# -*- coding: UTF-8 no BOM -*-
import os,re,sys,fnmatch
import math # noqa
import numpy as np
from optparse import OptionParser
import damask
scriptName = os.path.splitext(os.path.basename(__file__))[0]
scriptID = ' '.join([scriptName,damask.version])
def sortingList(labels,whitelistitems):
indices = []
names = []
for label in labels:
if re.match('^\d+_',label):
indices.append(int(label.split('_',1)[0]))
names.append(label.split('_',1)[1])
else:
indices.append(0)
names.append(label)
return [indices,names,whitelistitems]
# --------------------------------------------------------------------
# MAIN
# --------------------------------------------------------------------
parser = OptionParser(option_class=damask.extendableOption, usage='%prog options [file[s]]', description = """
Filter rows according to condition and columns by either white or black listing.
Examples:
Every odd row if x coordinate is positive -- " #ip.x# >= 0.0 and #_row_#%2 == 1 ).
All rows where label 'foo' equals 'bar' -- " #s#foo# == 'bar' "
""", version = scriptID)
parser.add_option('-w','--white',
dest = 'whitelist',
action = 'extend', metavar = '<string LIST>',
help = 'whitelist of column labels (a,b,c,...)')
parser.add_option('-b','--black',
dest = 'blacklist',
action = 'extend', metavar='<string LIST>',
help = 'blacklist of column labels (a,b,c,...)')
parser.add_option('-c','--condition',
dest = 'condition', metavar='string',
help = 'condition to filter rows')
parser.set_defaults(condition = None,
)
(options,filenames) = parser.parse_args()
# --- loop over input files -------------------------------------------------------------------------
if filenames == []: filenames = [None]
for name in filenames:
try: table = damask.ASCIItable(name = name,
buffered = False)
except: continue
damask.util.report(scriptName,name)
# ------------------------------------------ assemble info ---------------------------------------
table.head_read()
# ------------------------------------------ process data ---------------------------------------
specials = { \
'_row_': 0,
}
labels = []
positions = []
for position,label in enumerate(table.labels(raw = True)):
if (options.whitelist is None or any([ position in table.label_indexrange(needle) \
or fnmatch.fnmatch(label,needle) for needle in options.whitelist])) \
and (options.blacklist is None or not any([ position in table.label_indexrange(needle) \
or fnmatch.fnmatch(label,needle) for needle in options.blacklist])): # a label to keep?
labels.append(label) # remember name...
positions.append(position) # ...and position
if len(labels) > 0 and options.whitelist is not None and options.blacklist is None: # check whether reordering is possible
whitelistitem = np.zeros(len(labels),dtype=int)
for i,label in enumerate(labels): # check each selected label
match = [ positions[i] in table.label_indexrange(needle) \
or fnmatch.fnmatch(label,needle) for needle in options.whitelist] # which whitelist items do match it
whitelistitem[i] = match.index(True) if np.sum(match) == 1 else -1 # unique match to a whitelist item --> store which
order = range(len(labels)) if np.any(whitelistitem < 0) \
else np.lexsort(sortingList(labels,whitelistitem)) # reorder if unique, i.e. no "-1" in whitelistitem
else:
order = range(len(labels)) # maintain original order of labels
# --------------------------------------- evaluate condition ---------------------------------------
if options.condition is not None:
condition = options.condition # copy per file, since might be altered inline
breaker = False
for position,(all,marker,column) in enumerate(set(re.findall(r'#(([s]#)?(.+?))#',condition))): # find three groups
idx = table.label_index(column)
dim = table.label_dimension(column)
if idx < 0 and column not in specials:
damask.util.croak('column "{}" not found.'.format(column))
breaker = True
else:
if column in specials:
replacement = 'specials["{}"]'.format(column)
elif dim == 1: # scalar input
replacement = '{}(table.data[{}])'.format({ '':'float',
's#':'str'}[marker],idx) # take float or string value of data column
elif dim > 1: # multidimensional input (vector, tensor, etc.)
replacement = 'np.array(table.data[{}:{}],dtype=float)'.format(idx,idx+dim) # use (flat) array representation
condition = condition.replace('#'+all+'#',replacement)
if breaker: continue # found mistake in condition evaluation --> next file
# ------------------------------------------ assemble header ---------------------------------------
table.info_append(scriptID + '\t' + ' '.join(sys.argv[1:]))
table.labels_clear()
table.labels_append(np.array(labels)[order]) # update with new label set
table.head_write()
# ------------------------------------------ process and output data ------------------------------------------
positions = np.array(positions)[order]
atOnce = options.condition is None
if atOnce: # read full array and filter columns
try:
table.data_readArray(positions+1) # read desired columns (indexed 1,...)
table.data_writeArray() # directly write out
except:
atOnce = False # data contains items that prevent array chunking
if not atOnce: # read data line by line
outputAlive = True
while outputAlive and table.data_read(): # read next data line of ASCII table
specials['_row_'] += 1 # count row
if options.condition is None or eval(condition): # valid row ?
table.data = [table.data[position] for position in positions] # retain filtered columns
outputAlive = table.data_write() # output processed line
# ------------------------------------------ finalize output -----------------------------------------
table.close() # close input ASCII table (works for stdin)