merged functionality of fillTable (change existing column values) into addCalculation. If labels already exist, they are altered. Otherwise, new columns are appended.

2016-05-16 20:06:13 -04:00 · 2016-05-16 20:06:13 -04:00 · 8e01247597
parent 1c36380a8a
commit 8e01247597
2 changed files with 82 additions and 171 deletions
--- a/processing/post/addCalculation.py
+++ b/processing/post/addCalculation.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python
 # -*- coding: UTF-8 no BOM -*-

 import os,re,sys
@ -10,16 +10,12 @@ import damask
 scriptName = os.path.splitext(os.path.basename(__file__))[0]
 scriptID   = ' '.join([scriptName,damask.version])

-def unravel(item):
-  if hasattr(item,'__contains__'): return ' '.join(map(unravel,item))
-  else: return str(item)
-
 # --------------------------------------------------------------------
 #                                MAIN
 # --------------------------------------------------------------------

 parser = OptionParser(option_class=damask.extendableOption, usage='%prog options [file[s]]', description = """
-Add column(s) with derived values according to user-defined arithmetic operation between column(s).
+Add or alter column(s) with derived values according to user-defined arithmetic operation between column(s).
 Column labels are tagged by '#label#' in formulas. Use ';' for ',' in functions.
 Numpy is available as np.

@ -37,6 +33,13 @@ parser.add_option('-f','--formula',
                  action = 'extend', metavar = '<string LIST>',
                  help = '(list of) formulas corresponding to labels')

+parser.add_option('-c','--condition',
+                  dest   = 'condition', metavar='string',
+                  help   = 'condition to filter rows')
+
+parser.set_defaults(condition = None,
+                   )
+
 (options,filenames) = parser.parse_args()

 if options.labels is None or options.formulas is None:
@ -53,7 +56,10 @@ if filenames == []: filenames = [None]

 for name in filenames:
  try:
-    table = damask.ASCIItable(name = name, buffered = False)
+    table = damask.ASCIItable(name = name,
+                              buffered = False)
+    output = damask.ASCIItable(name = name,
+                               buffered = False)
  except:
    continue
  damask.util.report(scriptName,name)
@ -62,14 +68,41 @@ for name in filenames:

  table.head_read()

-# ------------------------------------------ build formulae ----------------------------------------
-
+# -----------------------------------------------------------------------------------------------------
  specials = { \
               '_row_': 0,
             }

+# ------------------------------------------ Evaluate condition ---------------------------------------
+  if options.condition:  
+    interpolator = []
+    condition = options.condition                                                                     # copy per file, since might be altered inline
+    breaker = False
+  
+    for position,operand in enumerate(set(re.findall(r'#(([s]#)?(.+?))#',condition))):                # find three groups
+      condition = condition.replace('#'+operand[0]+'#',
+                                    {  '': '{%i}'%position,
+                                     's#':'"{%i}"'%position}[operand[1]])
+      if operand[2] in specials:                                                                      # special label 
+        interpolator += ['specials["%s"]'%operand[2]]
+      else:
+        try:
+          interpolator += ['%s(table.data[%i])'%({  '':'float',
+                                                  's#':'str'}[operand[1]],
+                                                 table.label_index(operand[2]))]                     # ccould be generalized to indexrange as array lookup
+        except:
+          damask.util.croak('column "{}" not found.'.format(operand[2]))
+          breaker = True
+        
+    if breaker: continue                                                                              # found mistake in condition evaluation --> next file
+  
+    evaluator_condition = "'" + condition + "'.format(" + ','.join(interpolator) + ")"
+
+  else: condition = ''
+
+# ------------------------------------------ build formulae ----------------------------------------
+
  evaluator = {}
-  brokenFormula = {}
  
  for label,formula in zip(options.labels,options.formulas):
    for column in re.findall(r'#(.+?)#',formula):                                                   # loop over column labels in formula
@ -82,15 +115,15 @@ for name in filenames:
      elif dim > 1:                                                                                 # multidimensional input (vector, tensor, etc.)
        replacement = 'np.array(table.data[{}:{}],dtype=float)'.format(idx,idx+dim)                 # use (flat) array representation
      else:
-        damask.util.croak('column {} not found...'.format(column))
-        brokenFormula[label] = True
+        damask.util.croak('column {} not found, skipping {}...'.format(column,label))
+        options.labels.remove(label)
        break

      formula = formula.replace('#'+column+'#',replacement)
-    
-    if label not in brokenFormula:
-      evaluator[label] = formula

+    evaluator[label] = formula
+
+    
 # ------------------------------------------ process data ------------------------------------------

  firstLine   = True
@ -98,32 +131,53 @@ for name in filenames:

  while outputAlive and table.data_read():                                                          # read next data line of ASCII table
    specials['_row_'] += 1                                                                          # count row
-
+    output.data_clear()
+    
 # ------------------------------------------ calculate one result to get length of labels  ---------

    if firstLine:
      firstLine = False
      labelDim  = {}
-      for label in [x for x in options.labels if x not in set(brokenFormula)]:
+      for label in [x for x in options.labels]:
        labelDim[label] = np.size(eval(evaluator[label]))
-        if labelDim[label] == 0: brokenFormula[label] = True
+        if labelDim[label] == 0: options.labels.remove(label)

 # ------------------------------------------ assemble header ---------------------------------------

-        if label not in brokenFormula:
-          table.labels_append(['{}_{}'.format(i+1,label) for i in xrange(labelDim[label])] if labelDim[label] > 1
-                               else label)
+      output.labels_clear()
+      tabLabels = table.labels()
+      for label in tabLabels:
+        dim = labelDim[label] if label in options.labels \
+                              else table.label_dimension(label)
+        output.labels_append(['{}_{}'.format(i+1,label) for i in xrange(dim)] if dim > 1 else label)

-      table.info_append(scriptID + '\t' + ' '.join(sys.argv[1:]))
-      table.head_write()
+      for label in options.labels:
+        if label in tabLabels: continue
+        output.labels_append(['{}_{}'.format(i+1,label) for i in xrange(labelDim[label])]
+                             if labelDim[label] > 1
+                             else label)
+
+      output.info = table.info
+      output.info_append(scriptID + '\t' + ' '.join(sys.argv[1:]))
+      output.head_write()

 # ------------------------------------------ process data ------------------------------------------

-    for label in [x for x in options.labels if x not in set(brokenFormula)]:
-      table.data_append(unravel(eval(evaluator[label])))
+    for label in output.labels():
+      oldIndices = table.label_indexrange(label)
+      Nold = max(1,len(oldIndices))                                                                  # Nold could be zero for new columns
+      Nnew = len(output.label_indexrange(label))
+      output.data_append(eval(evaluator[label]) if label in options.labels
+                                               and (condition == '' or eval(eval(evaluator_condition)))
+                     else np.tile([table.data[i] for i in oldIndices]
+                                  if label in tabLabels
+                                  else np.nan,
+                                  np.ceil(float(Nnew)/Nold))[:Nnew])                                 # spread formula result into given number of columns

-    outputAlive = table.data_write()                                                                # output processed line
+    outputAlive = output.data_write()                                                                # output processed line

-# ------------------------------------------ output finalization -----------------------------------  
+# ------------------------------------------ output finalization -----------------------------------

-  table.close()                                                                                     # close ASCII tables
+  table.input_close()                                                                                # close ASCII tables
+  output.close()                                                                                     # close ASCII tables
+  
--- a/processing/post/fillTable.py
+++ b/processing/post/fillTable.py
@ -1,143 +0,0 @@
-#!/usr/bin/env python2
-# -*- coding: UTF-8 no BOM -*-
-
-import os,re,sys
-import math                                                                                         # noqa
-import numpy as np
-from optparse import OptionParser
-import damask
-
-scriptName = os.path.splitext(os.path.basename(__file__))[0]
-scriptID   = ' '.join([scriptName,damask.version])
-
-def unravel(item):
-  if hasattr(item,'__contains__'): return ' '.join(map(unravel,item))
-  else: return str(item)
-
-# --------------------------------------------------------------------
-#                                MAIN
-# --------------------------------------------------------------------
-
-parser = OptionParser(option_class=damask.extendableOption, usage='%prog options [file[s]]', description = """
-Update existing value(s) to expression(s) respecting condition.
-
-Examples:
-Replace the 2nd value of column x by value "z - y" --> fillTable -l x -c "#_row_#==2" -f "#z#-#y#" 
-
-""", version = scriptID)
-
-parser.add_option('-l','--label',
-                  dest = 'labels',
-                  action = 'extend', metavar = '<string LIST>',
-                  help = '(list) of columns to be filled with formula(e)')
-parser.add_option('-f','--formula',
-                  dest = 'formulae',
-                  action = 'extend', metavar = '<string LIST>',
-                  help = '(list of) formulae corresponding to labels')
-parser.add_option('-c','--condition',
-                  dest   = 'condition', metavar='string',
-                  help   = 'condition to filter rows')
-
-parser.set_defaults(condition = '',
-                   )
-
-(options,filenames) = parser.parse_args()
-
-if options.labels is None or options.formulae is None:
-  parser.error('no formulae specified.')
-if len(options.labels) != len(options.formulae):
-  parser.error('number of labels ({}) and formulae ({}) do not match.'.format(len(options.labels),len(options.formulae)))
-
-for i in xrange(len(options.formulae)):
-  options.formulae[i] = options.formulae[i].replace(';',',')
-
-# --- loop over input files -------------------------------------------------------------------------
-
-if filenames == []: filenames = [None]
-
-for name in filenames:
-  try:
-    table = damask.ASCIItable(name = name,
-                              buffered = False)
-  except: continue
-  damask.util.report(scriptName,name)
-
-# ------------------------------------------ assemble info -----------------------------------------
-
-  table.head_read()
-# ------------------------------------------ Evaluate condition ---------------------------------------  
-
-  specials = { \
-               '_row_': 0,
-             }
-  
-  interpolator = []
-  condition = options.condition                                                                     # copy per file, since might be altered inline
-  breaker = False
-  
-  for position,operand in enumerate(set(re.findall(r'#(([s]#)?(.+?))#',condition))):                # find three groups
-    condition = condition.replace('#'+operand[0]+'#',
-                                  {  '': '{%i}'%position,
-                                   's#':'"{%i}"'%position}[operand[1]])
-    if operand[2] in specials:                                                                      # special label 
-      interpolator += ['specials["%s"]'%operand[2]]
-    else:
-      try:
-        interpolator += ['%s(table.data[%i])'%({  '':'float',
-                                                's#':'str'}[operand[1]],
-                                               table.labels.index(operand[2]))]                     # ccould be generalized to indexrange as array lookup
-      except:
-        damask.util.croak('column %s not found.'%operand[2])
-        breaker = True
-        
-  if breaker: continue                                                                              # found mistake in condition evaluation --> next file
-  
-  evaluator_condition = "'" + condition + "'.format(" + ','.join(interpolator) + ")"
-
-#----------------------------------- Formula -------------------------------------------------------
-
-  evaluator = {}
-  brokenFormula = {}
-  for label,formula in zip(options.labels,options.formulae):
-    for column in re.findall(r'#(.+?)#',formula):                                                   # loop over column labels in formula
-      idx = table.label_index(column)
-      dim = table.label_dimension(column)
-      if column in specials:
-        replacement = 'specials["{}"]'.format(column)
-      elif dim == 1:                                                                                # scalar input
-        replacement = 'float(table.data[{}])'.format(idx)                                           # take float value of data column
-      elif dim > 1:                                                                                 # multidimensional input (vector, tensor, etc.)
-        replacement = 'np.array(table.data[{}:{}],dtype=float)'.format(idx,idx+dim)                 # use (flat) array representation
-      else:
-        damask.util.croak('column {} not found...'.format(column))
-        brokenFormula[label] = True
-        break
-      formula = formula.replace('#'+column+'#',replacement)
-    if label not in brokenFormula:
-      evaluator[label] = formula
-
-# ------------------------------------------ assemble header ---------------------------------------
-
-  table.info_append(scriptID + '\t' + ' '.join(sys.argv[1:]))                                        # read ASCII header info
-  table.labels                                                                                       # update with label set
-  table.head_write()
-
-# ------------------------------------------ process data ------------------------------------------
-
-  outputAlive = True
-  while outputAlive and table.data_read():                                                          # read next data line of ASCII table
-    specials['_row_'] += 1
-    if condition == '' or eval(eval(evaluator_condition)):                                          # test row for condition
-      for label in [x for x in options.labels if x not in set(brokenFormula)]:
-        indices = table.label_indexrange(label)                                                     # affected columns
-        probe = np.array(eval(evaluator[label]))                                                    # get formula result (scalar, array, etc.)
-        container = np.tile(probe,np.ceil(float(len(indices))/probe.size))[:len(indices)]           # spread formula result into given number of columns
-        for i,ind in enumerate(indices):                                                            # copy one by one as table.data is NOT a numpy array
-          table.data[ind] = container[i]
-
-    outputAlive = table.data_write()                                                                # output processed line
-
-# ------------------------------------------ output finalization -----------------------------------  
-
-  table.close()                                                                                     # close ASCII tables
-