#!/usr/bin/env python2 # -*- coding: UTF-8 no BOM -*- import os,re,sys import math # noqa import numpy as np from optparse import OptionParser import damask import collections scriptName = os.path.splitext(os.path.basename(__file__))[0] scriptID = ' '.join([scriptName,damask.version]) def listify(x): return (x if isinstance(x, collections.Iterable) else [x]) # -------------------------------------------------------------------- # MAIN # -------------------------------------------------------------------- parser = OptionParser(option_class=damask.extendableOption, usage='%prog options [file[s]]', description = """ Add or alter column(s) with derived values according to user-defined arithmetic operation between column(s). Column labels are tagged by '#label#' in formulas. Use ';' for ',' in functions. Numpy is available as np. Special variables: #_row_# -- row index Examples: (1) magnitude of vector -- "np.linalg.norm(#vec#)" (2) rounded root of row number -- "round(math.sqrt(#_row_#);3)" """, version = scriptID) parser.add_option('-l','--label', dest = 'labels', action = 'extend', metavar = '', help = '(list of) new column labels') parser.add_option('-f','--formula', dest = 'formulas', action = 'extend', metavar = '', help = '(list of) formulas corresponding to labels') parser.add_option('-c','--condition', dest = 'condition', metavar='string', help = 'condition to filter rows') parser.set_defaults(condition = None, ) (options,filenames) = parser.parse_args() if options.labels is None or options.formulas is None: parser.error('no formulas and/or labels specified.') if len(options.labels) != len(options.formulas): parser.error('number of labels ({}) and formulas ({}) do not match.'.format(len(options.labels),len(options.formulas))) for i in xrange(len(options.formulas)): options.formulas[i] = options.formulas[i].replace(';',',') # --- loop over input files ------------------------------------------------------------------------- if filenames == []: filenames = [None] for name in filenames: try: table = damask.ASCIItable(name = name, buffered = False) except: continue damask.util.report(scriptName,name) # ------------------------------------------ read header ------------------------------------------- table.head_read() # ----------------------------------------------------------------------------------------------------- specials = { \ '_row_': 0, } # ------------------------------------------ Evaluate condition --------------------------------------- if options.condition is not None: interpolator = [] condition = options.condition # copy per file, since might be altered inline breaker = False for position,operand in enumerate(set(re.findall(r'#(([s]#)?(.+?))#',condition))): # find three groups condition = condition.replace('#'+operand[0]+'#', { '': '{%i}'%position, 's#':'"{%i}"'%position}[operand[1]]) if operand[2] in specials: # special label interpolator += ['specials["%s"]'%operand[2]] else: try: interpolator += ['%s(table.data[%i])'%({ '':'float', 's#':'str'}[operand[1]], table.label_index(operand[2]))] # could be generalized to indexrange as array lookup except: damask.util.croak('column "{}" not found.'.format(operand[2])) breaker = True if breaker: continue # found mistake in condition evaluation --> next file evaluator_condition = "'" + condition + "'.format(" + ','.join(interpolator) + ")" # ------------------------------------------ build formulae ---------------------------------------- evaluator = {} for label,formula in zip(options.labels,options.formulas): for column in re.findall(r'#(.+?)#',formula): # loop over column labels in formula idx = table.label_index(column) dim = table.label_dimension(column) if column in specials: replacement = 'specials["{}"]'.format(column) elif dim == 1: # scalar input replacement = 'float(table.data[{}])'.format(idx) # take float value of data column elif dim > 1: # multidimensional input (vector, tensor, etc.) replacement = 'np.array(table.data[{}:{}],dtype=float)'.format(idx,idx+dim) # use (flat) array representation else: damask.util.croak('column {} not found, skipping {}...'.format(column,label)) options.labels.remove(label) break formula = formula.replace('#'+column+'#',replacement) evaluator[label] = formula # ------------------------------------------ process data ------------------------------------------ firstLine = True outputAlive = True while outputAlive and table.data_read(): # read next data line of ASCII table specials['_row_'] += 1 # count row # ------------------------------------------ calculate one result to get length of labels --------- if firstLine: firstLine = False resultDim = {} for label in list(options.labels): # iterate over stable copy resultDim[label] = np.size(eval(evaluator[label])) # get dimension of formula[label] if resultDim[label] == 0: options.labels.remove(label) # remove label if invalid result veterans = list(set(options.labels)&set(table.labels(raw=False)+table.labels(raw=True)) ) # intersection of requested and existing newbies = list(set(options.labels)-set(table.labels(raw=False)+table.labels(raw=True)) ) # requested but not existing for veteran in list(veterans): if resultDim[veteran] != table.label_dimension(veteran): damask.util.croak('{} is ignored due to inconsistent dimension...'.format(veteran)) veterans.remove(veteran) # ignore culprit for newby in newbies: table.labels_append(['{}_{}'.format(i+1,newby) for i in xrange(resultDim[newby])] if resultDim[newby] > 1 else newby) # ------------------------------------------ assemble header --------------------------------------- table.info_append(scriptID + '\t' + ' '.join(sys.argv[1:])) table.head_write() # ------------------------------------------ process data ------------------------------------------ if options.condition is None or eval(eval(evaluator_condition)): # condition for veteran replacement fulfilled for veteran in veterans: # evaluate formulae that overwrite table.data[table.label_index(veteran): table.label_index(veteran)+table.label_dimension(veteran)] = \ listify(eval(evaluator[veteran])) for newby in newbies: # evaluate formulae that append table.data_append(listify(eval(evaluator[newby]))) outputAlive = table.data_write() # output processed line # ------------------------------------------ output finalization ----------------------------------- table.close() # close ASCII table