2012-02-07 18:39:10 +05:30
|
|
|
#!/usr/bin/env python
|
2011-12-02 20:45:36 +05:30
|
|
|
|
2013-07-17 03:18:23 +05:30
|
|
|
import os,re,sys,math,string,damask,numpy
|
2011-12-02 20:45:36 +05:30
|
|
|
from optparse import OptionParser, Option
|
|
|
|
|
2013-12-09 21:24:47 +05:30
|
|
|
scriptID = '$Id$'
|
|
|
|
scriptName = scriptID.split()[1]
|
|
|
|
|
2013-07-17 03:18:23 +05:30
|
|
|
def unravel(item):
|
|
|
|
if hasattr(item,'__contains__'): return ' '.join(map(unravel,item))
|
|
|
|
else: return str(item)
|
|
|
|
|
2011-12-02 20:45:36 +05:30
|
|
|
# -----------------------------
|
|
|
|
class extendableOption(Option):
|
|
|
|
# -----------------------------
|
|
|
|
# used for definition of new option parser action 'extend', which enables to take multiple option arguments
|
|
|
|
# taken from online tutorial http://docs.python.org/library/optparse.html
|
|
|
|
|
|
|
|
ACTIONS = Option.ACTIONS + ("extend",)
|
|
|
|
STORE_ACTIONS = Option.STORE_ACTIONS + ("extend",)
|
|
|
|
TYPED_ACTIONS = Option.TYPED_ACTIONS + ("extend",)
|
|
|
|
ALWAYS_TYPED_ACTIONS = Option.ALWAYS_TYPED_ACTIONS + ("extend",)
|
|
|
|
|
|
|
|
def take_action(self, action, dest, opt, value, values, parser):
|
|
|
|
if action == "extend":
|
|
|
|
lvalue = value.split(",")
|
|
|
|
values.ensure_value(dest, []).extend(lvalue)
|
|
|
|
else:
|
|
|
|
Option.take_action(self, action, dest, opt, value, values, parser)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# --------------------------------------------------------------------
|
|
|
|
# MAIN
|
|
|
|
# --------------------------------------------------------------------
|
|
|
|
|
|
|
|
parser = OptionParser(option_class=extendableOption, usage='%prog options [file[s]]', description = """
|
2012-02-07 18:39:10 +05:30
|
|
|
Add column(s) with derived values according to user defined arithmetic operation between column(s).
|
2012-08-22 23:17:34 +05:30
|
|
|
Columns can be specified either by label or index. Use ';' for ',' in functions.
|
2011-12-02 20:45:36 +05:30
|
|
|
|
2012-08-22 23:17:34 +05:30
|
|
|
Example: distance to IP coordinates -- "math.sqrt( #ip.x#**2 + #ip.y#**2 + round(#ip.z#;3)**2 )"
|
2013-12-09 21:24:47 +05:30
|
|
|
""" + string.replace(scriptID,'\n','\\n')
|
2011-12-02 20:45:36 +05:30
|
|
|
)
|
|
|
|
|
|
|
|
|
2013-07-17 03:18:23 +05:30
|
|
|
parser.add_option('-l','--label', dest='labels', action='extend', type='string', \
|
|
|
|
help='(list of) new column labels', metavar='<LIST>')
|
|
|
|
parser.add_option('-f','--formula', dest='formulas', action='extend', type='string', \
|
|
|
|
help='(list of) formulas corresponding to labels', metavar='<LIST>')
|
2011-12-02 20:45:36 +05:30
|
|
|
parser.set_defaults(labels= [])
|
|
|
|
parser.set_defaults(formulas= [])
|
|
|
|
|
|
|
|
(options,filenames) = parser.parse_args()
|
|
|
|
|
|
|
|
if len(options.labels) != len(options.formulas):
|
|
|
|
parser.error('number of labels (%i) and formulas (%i) do not match'%(len(options.labels),len(options.formulas)))
|
2012-08-22 23:17:34 +05:30
|
|
|
|
|
|
|
for i in xrange(len(options.formulas)):
|
|
|
|
options.formulas[i]=options.formulas[i].replace(';',',')
|
|
|
|
|
2011-12-02 20:45:36 +05:30
|
|
|
# ------------------------------------------ setup file handles ---------------------------------------
|
|
|
|
|
|
|
|
files = []
|
|
|
|
if filenames == []:
|
2013-12-09 21:24:47 +05:30
|
|
|
files.append({'name':'STDIN', 'input':sys.stdin, 'output':sys.stdout, 'croak':sys.stderr})
|
2011-12-02 20:45:36 +05:30
|
|
|
else:
|
|
|
|
for name in filenames:
|
|
|
|
if os.path.exists(name):
|
2013-12-09 21:24:47 +05:30
|
|
|
files.append({'name':name, 'input':open(name), 'output':open(name+'_tmp','w'), 'croak':sys.stderr})
|
2011-12-02 20:45:36 +05:30
|
|
|
|
2013-12-09 21:24:47 +05:30
|
|
|
#--- loop over input files ------------------------------------------------------------------------
|
2011-12-02 20:45:36 +05:30
|
|
|
|
|
|
|
for file in files:
|
2013-12-09 21:24:47 +05:30
|
|
|
if file['name'] != 'STDIN': file['croak'].write('\033[1m'+scriptName+'\033[0m: '+file['name']+'\n')
|
|
|
|
else: file['croak'].write('\033[1m'+scriptName+'\033[0m\n')
|
2011-12-02 20:45:36 +05:30
|
|
|
|
2012-02-16 17:26:16 +05:30
|
|
|
specials = { \
|
|
|
|
'_row_': 0,
|
|
|
|
}
|
|
|
|
|
2013-07-17 03:18:23 +05:30
|
|
|
table = damask.ASCIItable(file['input'],file['output'],False) # make unbuffered ASCII_table
|
|
|
|
table.head_read() # read ASCII header info
|
2013-12-09 21:24:47 +05:30
|
|
|
table.info_append(string.replace(scriptID,'\n','\\n') + \
|
2011-12-02 20:45:36 +05:30
|
|
|
'\t' + ' '.join(sys.argv[1:]))
|
|
|
|
|
|
|
|
evaluator = {}
|
2013-12-09 21:24:47 +05:30
|
|
|
brokenFormula = {}
|
|
|
|
|
2011-12-02 20:45:36 +05:30
|
|
|
for label,formula in zip(options.labels,options.formulas):
|
|
|
|
interpolator = []
|
2013-12-09 21:24:47 +05:30
|
|
|
for position,column in enumerate(set(re.findall(r'#(.+?)#',formula))): # loop over unique set of column labels in formula
|
|
|
|
formula = formula.replace('#'+column+'#','{%i}'%position)
|
|
|
|
if column in specials:
|
|
|
|
interpolator += ['specials["%s"]'%column]
|
|
|
|
elif column.isdigit():
|
|
|
|
if len(table.labels) > int(column):
|
|
|
|
interpolator += ['float(table.data[%i])'%(int(column))]
|
2012-02-23 19:23:12 +05:30
|
|
|
else:
|
2013-12-09 21:24:47 +05:30
|
|
|
file['croak'].write('column %s not found...\n'%column)
|
2013-12-11 20:00:27 +05:30
|
|
|
brokenFormula[label] = True
|
2012-02-16 17:26:16 +05:30
|
|
|
else:
|
2011-12-02 20:45:36 +05:30
|
|
|
try:
|
2013-12-09 21:24:47 +05:30
|
|
|
interpolator += ['float(table.data[%i])'%table.labels.index(column)]
|
2011-12-02 20:45:36 +05:30
|
|
|
except:
|
2013-12-09 21:24:47 +05:30
|
|
|
file['croak'].write('column %s not found...\n'%column)
|
2013-12-11 20:00:27 +05:30
|
|
|
brokenFormula[label] = True
|
2013-12-09 21:24:47 +05:30
|
|
|
|
|
|
|
if label not in brokenFormula:
|
|
|
|
evaluator[label] = "'" + formula + "'.format(" + ','.join(interpolator) + ")"
|
2013-07-17 03:18:23 +05:30
|
|
|
|
|
|
|
# ------------------------------------------ calculate one result to get length of labels ------
|
|
|
|
table.data_read()
|
2013-12-09 21:24:47 +05:30
|
|
|
labelLen = {}
|
2013-07-17 03:18:23 +05:30
|
|
|
for label in options.labels:
|
|
|
|
labelLen[label] = numpy.size(eval(eval(evaluator[label])))
|
2011-12-02 20:45:36 +05:30
|
|
|
|
|
|
|
# ------------------------------------------ assemble header ---------------------------------------
|
2013-07-17 03:18:23 +05:30
|
|
|
for label,formula in zip(options.labels,options.formulas):
|
|
|
|
if labelLen[label] == 0:
|
2013-12-09 21:24:47 +05:30
|
|
|
brokenFormula[label] = True
|
|
|
|
if label not in brokenFormula:
|
|
|
|
if labelLen[label] == 1:
|
|
|
|
table.labels_append(label)
|
|
|
|
else:
|
|
|
|
table.labels_append(['%i_%s'%(i+1,label) for i in xrange(labelLen[label])])
|
2011-12-02 20:45:36 +05:30
|
|
|
|
|
|
|
table.head_write()
|
|
|
|
|
|
|
|
# ------------------------------------------ process data ---------------------------------------
|
2012-02-16 17:26:16 +05:30
|
|
|
|
2012-02-23 19:23:12 +05:30
|
|
|
outputAlive = True
|
2013-07-17 03:18:23 +05:30
|
|
|
table.data_rewind()
|
2013-12-09 21:24:47 +05:30
|
|
|
|
2012-02-23 19:23:12 +05:30
|
|
|
while outputAlive and table.data_read(): # read next data line of ASCII table
|
2012-02-16 17:26:16 +05:30
|
|
|
|
|
|
|
specials['_row_'] += 1 # count row
|
2013-07-17 03:18:23 +05:30
|
|
|
for label in options.labels: table.data_append(unravel(eval(eval(evaluator[label]))))
|
2012-02-23 19:23:12 +05:30
|
|
|
outputAlive = table.data_write() # output processed line
|
2011-12-02 20:45:36 +05:30
|
|
|
|
|
|
|
# ------------------------------------------ output result ---------------------------------------
|
|
|
|
|
2012-02-23 19:23:12 +05:30
|
|
|
outputAlive and table.output_flush() # just in case of buffered ASCII table
|
2011-12-02 20:45:36 +05:30
|
|
|
|
|
|
|
file['input'].close() # close input ASCII table
|
|
|
|
if file['name'] != 'STDIN':
|
2012-02-23 19:23:12 +05:30
|
|
|
file['output'].close() # close output ASCII table
|
2011-12-02 20:45:36 +05:30
|
|
|
os.rename(file['name']+'_tmp',file['name']) # overwrite old one with tmp new
|