From b5a1295cb9842ebbe5710ad4606b04ff3ec21408 Mon Sep 17 00:00:00 2001
From: Martin Diehl <mail@martin-diehl.net>
Date: Wed, 18 Mar 2020 13:47:09 +0100
Subject: [PATCH] ASCIItable -> Table

---
 processing/post/binXY.py                      | 75 +++++------------
 .../pre/geom_fromVoronoiTessellation.py       |  4 +-
 processing/pre/hybridIA_linODFsampling.py     | 72 ++++++-----------
 processing/pre/seeds_fromDistribution.py      | 80 +++++++++----------
 4 files changed, 87 insertions(+), 144 deletions(-)

diff --git a/processing/post/binXY.py b/processing/post/binXY.py
index e9924a28a..860b70027 100755
--- a/processing/post/binXY.py
+++ b/processing/post/binXY.py
@@ -2,6 +2,7 @@
 
 import os
 import sys
+from io import StringIO
 from optparse import OptionParser
 
 import numpy as np
@@ -71,60 +72,30 @@ parser.set_defaults(bins = (10,10),
                    )
 
 (options,filenames) = parser.parse_args()
+if filenames == []: filenames = [None]
 
-minmax = np.array([np.array(options.xrange),
-                   np.array(options.yrange),
-                   np.array(options.zrange)])
-grid   = np.zeros(options.bins,'f')
-result = np.zeros((options.bins[0],options.bins[1],3),'f')
+minmax = np.array([options.xrange,options.yrange,options.zrange])
+result = np.empty((options.bins[0],options.bins[1],3),'f')
 
 if options.data is None: parser.error('no data columns specified.')
 
-labels = list(options.data)
-
-
-if options.weight is not None: labels += [options.weight]                                               # prevent character splitting of single string value
-
-# --- loop over input files -------------------------------------------------------------------------
-
-if filenames == []: filenames = [None]
-
 for name in filenames:
-  try:
-    table = damask.ASCIItable(name = name,
-                              outname = os.path.join(os.path.dirname(name),
-                                                    'binned-{}-{}_'.format(*options.data) +
-                                                   ('weighted-{}_'.format(options.weight) if options.weight else '') +
-                                                    os.path.basename(name)) if name else name)
-  except IOError:
-    continue
   damask.util.report(scriptName,name)
 
-# ------------------------------------------ read header ------------------------------------------
-
-  table.head_read()
-
-# ------------------------------------------ sanity checks ----------------------------------------
-
-  missing_labels = table.data_readArray(labels)
-
-  if len(missing_labels) > 0:
-    damask.util.croak('column{} {} not found.'.format('s' if len(missing_labels) > 1 else '',', '.join(missing_labels)))
-    table.close(dismiss = True)
-    continue
+  table = damask.Table.from_ASCII(StringIO(''.join(sys.stdin.read())) if name is None else name)
+  data = np.hstack((table.get(options.data[0]),table.get(options.data[1])))
 
   for c in (0,1):                                                                                   # check data minmax for x and y (i = 0 and 1)
-    if (minmax[c] == 0.0).all(): minmax[c] = [table.data[:,c].min(),table.data[:,c].max()]
+    if (minmax[c] == 0.0).all(): minmax[c] = [data[:,c].min(),data[:,c].max()]
     if options.type[c].lower() == 'log':                                                            # if log scale
-      table.data[:,c] = np.log(table.data[:,c])                                                     # change x,y coordinates to log
+      data[:,c] = np.log(data[:,c])                                                                 # change x,y coordinates to log
       minmax[c] = np.log(minmax[c])                                                                 # change minmax to log, too
 
   delta = minmax[:,1]-minmax[:,0]
-  (grid,xedges,yedges) = np.histogram2d(table.data[:,0],table.data[:,1],
+  (grid,xedges,yedges) = np.histogram2d(data[:,0],data[:,1],
                                         bins=options.bins,
                                         range=minmax[:2],
-                                        weights=None if options.weight is None else table.data[:,2])
-
+                                        weights=table.get(options.weight) if options.weight else None)
   if options.normCol:
     for x in range(options.bins[0]):
       sum = np.sum(grid[x,:])
@@ -153,24 +124,20 @@ for name in filenames:
     for y in range(options.bins[1]):
       result[x,y,:] = [minmax[0,0]+delta[0]/options.bins[0]*(x+0.5),
                        minmax[1,0]+delta[1]/options.bins[1]*(y+0.5),
-                       min(1.0,max(0.0,(grid[x,y]-minmax[2,0])/delta[2]))]
+                       np.clip((grid[x,y]-minmax[2,0])/delta[2],0.0,1.0)]
 
   for c in (0,1):
     if options.type[c].lower() == 'log': result[:,:,c] = np.exp(result[:,:,c])
 
   if options.invert: result[:,:,2] = 1.0 - result[:,:,2]
 
-# --- assemble header -------------------------------------------------------------------------------
-
-  table.info_clear()
-  table.info_append(scriptID + '\t' + ' '.join(sys.argv[1:]))
-  table.labels_clear()
-  table.labels_append(['bin_%s'%options.data[0],'bin_%s'%options.data[1],'z'])
-  table.head_write()
-
-# --- output result ---------------------------------------------------------------------------------
-
-  table.data = result.reshape(options.bins[0]*options.bins[1],3)
-  table.data_writeArray()
-
-  table.close()
+  comments = scriptID + '\t' + ' '.join(sys.argv[1:])
+  shapes = {'bin_%s'%options.data[0]:(1,),'bin_%s'%options.data[1]:(1,),'z':(1,)}
+  table  = damask.Table(result.reshape(options.bins[0]*options.bins[1],3),shapes,[comments])
+  if name:
+    outname = os.path.join(os.path.dirname(name),'binned-{}-{}_'.format(*options.data) +
+                                                ('weighted-{}_'.format(options.weight) if options.weight else '') +
+                                                 os.path.basename(name))
+    table.to_ASCII(outname)
+  else:
+    table.to_ASCII(sys.stdout)
diff --git a/processing/pre/geom_fromVoronoiTessellation.py b/processing/pre/geom_fromVoronoiTessellation.py
index 396007d78..95b197e9e 100755
--- a/processing/pre/geom_fromVoronoiTessellation.py
+++ b/processing/pre/geom_fromVoronoiTessellation.py
@@ -255,8 +255,8 @@ for name in filenames:
   table.data_readArray(labels)
   coords    = table.data[:,table.label_indexrange(options.pos)] * info['size'] if options.normalized \
         else  table.data[:,table.label_indexrange(options.pos)] - info['origin']
-  eulers    = table.data[:,table.label_indexrange(options.eulers)] if hasEulers \
-              else np.zeros(3*len(coords))
+  if hasEulers:
+    eulers  = table.data[:,table.label_indexrange(options.eulers)]
   grains    = table.data[:,table.label_indexrange(options.microstructure)].astype(int) if hasGrains \
               else np.arange(len(coords))+1
 
diff --git a/processing/pre/hybridIA_linODFsampling.py b/processing/pre/hybridIA_linODFsampling.py
index 78c70d89c..aee958581 100755
--- a/processing/pre/hybridIA_linODFsampling.py
+++ b/processing/pre/hybridIA_linODFsampling.py
@@ -1,11 +1,16 @@
 #!/usr/bin/env python3
-# -*- coding: UTF-8 no BOM -*-
 
+import os
+import sys
+import math
+import random
 from optparse import OptionParser
-import damask
-import os,sys,math,random
+
 import numpy as np
 
+import damask
+
+
 scriptName = os.path.splitext(os.path.basename(__file__))[0]
 scriptID   = ' '.join([scriptName,damask.version])
 
@@ -223,54 +228,29 @@ parser.set_defaults(randomSeed = None,
                    )
 
 (options,filenames) = parser.parse_args()
-
-nSamples       = options.number
-methods        = [options.algorithm]
-
-
-# --- loop over input files -------------------------------------------------------------------------
-
 if filenames == []: filenames = [None]
 
 for name in filenames:
-  try:
-    table = damask.ASCIItable(name = name, readonly=True)
-  except IOError:
-    continue
   damask.util.report(scriptName,name)
+  
+  table = damask.Table.from_ASCII(StringIO(''.join(sys.stdin.read())) if name is None else name)
 
-  randomSeed = int(os.urandom(4).hex(), 16)  if options.randomSeed is None else options.randomSeed         # random seed per file for second phase
+  randomSeed = int(os.urandom(4).hex(),16)  if options.randomSeed is None else options.randomSeed  # random seed per file
   random.seed(randomSeed)
 
-# ------------------------------------------ read header and data ---------------------------------
-  table.head_read()
-
-  errors = []
-  labels = ['1_euler','2_euler','3_euler','intensity']
-  for i,index in enumerate(table.label_index(labels)):
-    if index < 0: errors.append('label {} not present.'.format(labels[i]))
-  
-  if errors != []:
-    damask.util.croak(errors)
-    table.close(dismiss = True)
-    continue
-
-  table.data_readArray(labels)
-
 # --------------- figure out limits (left/right), delta, and interval -----------------------------
-
   ODF = {}
-  limits = np.array([np.min(table.data[:,0:3],axis=0),
-                     np.max(table.data[:,0:3],axis=0)])                                             # min/max euler angles in degrees
+  eulers = table.get('euler')
+  limits = np.array([np.min(eulers,axis=0),np.max(eulers,axis=0)])                                  # min/max euler angles in degrees
   ODF['limit'] = np.radians(limits[1,:])                                                            # right hand limits in radians
   ODF['center'] = 0.0 if all(limits[0,:]<1e-8) else 0.5                                             # vertex or cell centered
 
-  ODF['interval'] = np.array(list(map(len,[np.unique(table.data[:,i]) for i in range(3)])),'i')     # steps are number of distict values
+  ODF['interval'] = np.array(list(map(len,[np.unique(eulers[:,i]) for i in range(3)])),'i')         # steps are number of distict values
   ODF['nBins'] = ODF['interval'].prod()
   ODF['delta'] = np.radians(np.array(limits[1,0:3]-limits[0,0:3])/(ODF['interval']-1))              # step size
 
-  if table.data.shape[0] != ODF['nBins']:
-    damask.util.croak('expecting %i values but got %i'%(ODF['nBins'],table.data.shape[0]))
+  if eulers.shape[0] != ODF['nBins']:
+    damask.util.croak('expecting %i values but got %i'%(ODF['nBins'],eulers.shape[0]))
     continue
   
 # ----- build binnedODF array and normalize ------------------------------------------------------
@@ -278,9 +258,10 @@ for name in filenames:
   ODF['dV_V'] = [None]*ODF['nBins']
   ODF['nNonZero'] = 0
   dg = ODF['delta'][0]*2.0*math.sin(ODF['delta'][1]/2.0)*ODF['delta'][2]
+  intensity = table.get('intensity')
   for b in range(ODF['nBins']):
     ODF['dV_V'][b] = \
-      max(0.0,table.data[b,table.label_index('intensity')]) * dg * \
+      max(0.0,intensity[b,0]) * dg * \
       math.sin(((b//ODF['interval'][2])%ODF['interval'][1]+ODF['center'])*ODF['delta'][1])
     if ODF['dV_V'][b] > 0.0:
       sumdV_V += ODF['dV_V'][b]
@@ -296,11 +277,10 @@ for name in filenames:
                'Reference Integral: %12.11f\n'%(ODF['limit'][0]*ODF['limit'][2]*(1-math.cos(ODF['limit'][1]))),
                ])
                                                      
-# call methods
   Functions = {'IA': 'directInversion', 'STAT': 'TothVanHoutteSTAT', 'MC': 'MonteCarloBins'}
   method = Functions[options.algorithm]
 
-  Orientations, ReconstructedODF = (globals()[method])(ODF,nSamples)
+  Orientations, ReconstructedODF = (globals()[method])(ODF,options.number)
   
 # calculate accuracy of sample
   squaredDiff     = {'orig':0.0,method:0.0}
@@ -319,7 +299,7 @@ for name in filenames:
   indivSum['orig'] += ODF['dV_V'][bin]
   indivSquaredSum['orig'] += ODF['dV_V'][bin]**2
   
-  damask.util.croak(['sqrt(N*)RMSD of ODFs:\t %12.11f'% math.sqrt(nSamples*squaredDiff[method]),
+  damask.util.croak(['sqrt(N*)RMSD of ODFs:\t %12.11f'% math.sqrt(options.number*squaredDiff[method]),
                'RMSrD of ODFs:\t %12.11f'%math.sqrt(squaredRelDiff[method]),
                'rMSD of ODFs:\t %12.11f'%(squaredDiff[method]/indivSquaredSum['orig']),
                'nNonZero correlation slope:\t %12.11f'\
@@ -331,10 +311,10 @@ for name in filenames:
                         (indivSquaredSum[method]/ODF['nNonZero']-(indivSum[method]/ODF['nNonZero'])**2)))),
               ])
   
-  if method == 'IA' and nSamples < ODF['nNonZero']:
+  if method == 'IA' and options.number < ODF['nNonZero']:
     strOpt = '(%i)'%ODF['nNonZero']
   
-  formatwidth = 1+int(math.log10(nSamples))
+  formatwidth = 1+int(math.log10(options.number))
 
   materialConfig = [
       '#' + scriptID + ' ' + ' '.join(sys.argv[1:]),
@@ -344,7 +324,7 @@ for name in filenames:
       '#-------------------#',
       ]
   
-  for i,ID in enumerate(range(nSamples)):
+  for i,ID in enumerate(range(options.number)):
     materialConfig += ['[Grain%s]'%(str(ID+1).zfill(formatwidth)),
                       '(constituent)   phase %i   texture %s   fraction 1.0'%(options.phase,str(ID+1).rjust(formatwidth)),
                      ]
@@ -355,7 +335,7 @@ for name in filenames:
       '#-------------------#',
       ]
 
-  for ID in range(nSamples):
+  for ID in range(options.number):
     eulers = Orientations[ID]
   
     materialConfig += ['[Grain%s]'%(str(ID+1).zfill(formatwidth)),
@@ -364,7 +344,5 @@ for name in filenames:
 
 #--- output finalization -------------------------------------------------------------------------- 
 
-  with (open(os.path.splitext(name)[0]+'_'+method+'_'+str(nSamples)+'_material.config','w')) as outfile:
+  with (open(os.path.splitext(name)[0]+'_'+method+'_'+str(options.number)+'_material.config','w')) as outfile:
     outfile.write('\n'.join(materialConfig)+'\n')
-
-  table.close()
diff --git a/processing/pre/seeds_fromDistribution.py b/processing/pre/seeds_fromDistribution.py
index 318598ba0..2045d68bf 100755
--- a/processing/pre/seeds_fromDistribution.py
+++ b/processing/pre/seeds_fromDistribution.py
@@ -1,9 +1,15 @@
 #!/usr/bin/env python3
 
-import threading,time,os,sys,random
-import numpy as np
+import threading
+import time
+import os
+import sys
+import random
 from optparse import OptionParser
 from io import StringIO
+
+import numpy as np
+
 import damask
 
 scriptName = os.path.splitext(os.path.basename(__file__))[0]
@@ -35,11 +41,11 @@ class myThread (threading.Thread):
     s.acquire()
     bestMatch = match
     s.release()
-    
+
     random.seed(options.randomSeed+self.threadID)                                                   # initializes to given seeds
     knownSeedsUpdate = bestSeedsUpdate -1.0                                                         # trigger update of local best seeds
     randReset = True                                                                                # aquire new direction
-    
+
     myBestSeedsVFile    = StringIO()                                                                # store local copy of best seeds file
     perturbedSeedsVFile = StringIO()                                                                # perturbed best seeds file
     perturbedGeomVFile  = StringIO()                                                                # tessellated geom file
@@ -49,14 +55,14 @@ class myThread (threading.Thread):
       s.acquire()                                                                                   # ensure only one thread acces global data
       if bestSeedsUpdate > knownSeedsUpdate:                                                        # write best fit to virtual file
         knownSeedsUpdate = bestSeedsUpdate
-        bestSeedsVFile.reset()
+        bestSeedsVFile.seek(0)
         myBestSeedsVFile.close()
         myBestSeedsVFile = StringIO()
         i=0
         for line in bestSeedsVFile:
           myBestSeedsVFile.write(line)
       s.release()
-      
+
       if randReset:                                                                                 # new direction because current one led to worse fit
 
         randReset = False
@@ -67,46 +73,38 @@ class myThread (threading.Thread):
         for i in range(NmoveGrains):
           selectedMs.append(random.randrange(1,nMicrostructures))
 
-          direction.append(np.array(((random.random()-0.5)*delta[0],
-                                     (random.random()-0.5)*delta[1],
-                                     (random.random()-0.5)*delta[2])))
-        
+          direction.append((np.random.random()-0.5)*delta)
+
       perturbedSeedsVFile.close()                                                                   # reset virtual file
       perturbedSeedsVFile = StringIO()
-      myBestSeedsVFile.reset()
+      myBestSeedsVFile.seek(0)
 
-      perturbedSeedsTable = damask.ASCIItable(myBestSeedsVFile,perturbedSeedsVFile,labeled=True)    # write best fit to perturbed seed file
-      perturbedSeedsTable.head_read()
-      perturbedSeedsTable.head_write()
-      outputAlive=True
-      ms = 1
+      perturbedSeedsTable = damask.Table.from_ASCII(myBestSeedsVFile)
+      coords = perturbedSeedsTable.get('pos')
       i = 0
-      while outputAlive and perturbedSeedsTable.data_read():                                        # perturbe selected microstructure
+      for ms,coord in enumerate(coords):
         if ms in selectedMs:
-          newCoords=np.array(tuple(map(float,perturbedSeedsTable.data[0:3]))+direction[i])
+          newCoords=coord+direction[i]
           newCoords=np.where(newCoords>=1.0,newCoords-1.0,newCoords)                                # ensure that the seeds remain in the box
           newCoords=np.where(newCoords <0.0,newCoords+1.0,newCoords)
-          perturbedSeedsTable.data[0:3]=[format(f, '8.6f') for f in newCoords]
+          coords[i]=newCoords
           direction[i]*=2.
           i+= 1
-        ms+=1
-        perturbedSeedsTable.data_write()
-#--- do tesselation with perturbed seed file ----------------------------------------------------------
+      perturbedSeedsTable.set('pos',coords)
+      perturbedSeedsTable.to_ASCII(perturbedSeedsVFile)
+
+#--- do tesselation with perturbed seed file ------------------------------------------------------
       perturbedGeomVFile.close()
       perturbedGeomVFile = StringIO()
-      perturbedSeedsVFile.reset()
+      perturbedSeedsVFile.seek(0)
       perturbedGeomVFile.write(damask.util.execute('geom_fromVoronoiTessellation '+
                      ' -g '+' '.join(list(map(str, options.grid))),streamIn=perturbedSeedsVFile)[0])
-      perturbedGeomVFile.reset()
+      perturbedGeomVFile.seek(0)
 
-#--- evaluate current seeds file ----------------------------------------------------------------------
-      perturbedGeomTable = damask.ASCIItable(perturbedGeomVFile,None,labeled=False,readonly=True)
-      perturbedGeomTable.head_read()
-      for i in perturbedGeomTable.info:
-        if i.startswith('microstructures'): myNmicrostructures = int(i.split('\t')[1])
-      perturbedGeomTable.data_readArray()
-      perturbedGeomTable.output_flush()
-      currentData=np.bincount(perturbedGeomTable.data.astype(int).ravel())[1:]/points
+#--- evaluate current seeds file ------------------------------------------------------------------
+      perturbedGeom = damask.Geom.from_file(perturbedGeomVFile)
+      myNmicrostructures = len(np.unique(perturbedGeom.microstructure))
+      currentData=np.bincount(perturbedGeom.microstructure.ravel())[1:]/points
       currentError=[]
       currentHist=[]
       for i in range(nMicrostructures):                                                             # calculate the deviation in all bins per histogram
@@ -114,8 +112,8 @@ class myThread (threading.Thread):
         currentError.append(np.sqrt(np.square(np.array(target[i]['histogram']-currentHist[i])).sum()))
 
 # as long as not all grains are within the range of the target, use the deviation to left and right as error
-      if currentError[0]>0.0:                                                                       
-        currentError[0] *=((target[0]['bins'][0]-np.min(currentData))**2.0+                        
+      if currentError[0]>0.0:
+        currentError[0] *=((target[0]['bins'][0]-np.min(currentData))**2.0+
                            (target[0]['bins'][1]-np.max(currentData))**2.0)**0.5                    # norm of deviations by number of usual bin deviation
       s.acquire()                                                                                   # do the evaluation serially
       bestMatch = match
@@ -137,7 +135,7 @@ class myThread (threading.Thread):
             damask.util.croak('          target: '+np.array_str(target[i]['histogram']))
             damask.util.croak('          best:   '+np.array_str(currentHist[i]))
             currentSeedsName = baseFile+'_'+str(bestSeedsUpdate).replace('.','-')                   # name of new seed file (use time as unique identifier)
-            perturbedSeedsVFile.reset()
+            perturbedSeedsVFile.seek(0)
             bestSeedsVFile.close()
             bestSeedsVFile = StringIO()
             sys.stdout.flush()
@@ -154,7 +152,7 @@ class myThread (threading.Thread):
             break
           if i == min(nMicrostructures,myMatch+options.bins)-1:                                     # same quality as before: take it to keep on moving
             bestSeedsUpdate = time.time()
-            perturbedSeedsVFile.reset()
+            perturbedSeedsVFile.seek(0)
             bestSeedsVFile.close()
             bestSeedsVFile = StringIO()
             for line in perturbedSeedsVFile:
@@ -167,7 +165,7 @@ class myThread (threading.Thread):
                                                          .format(self.threadID,myNmicrostructures))
         randReset = True
 
-      
+
       s.release()
 
 
@@ -192,7 +190,7 @@ parser.add_option('--target',        dest='target', metavar='string',
                                      help='name of the geom file with target distribution [%default]')
 parser.add_option('--tolerance',     dest='threshold', type='int', metavar='int',
                                      help='stopping criterion (bin number) [%default]')
-parser.add_option('--scale',         dest='scale',type='float', metavar='float',                                     
+parser.add_option('--scale',         dest='scale',type='float', metavar='float',
                                      help='maximum moving distance of perturbed seed in pixel [%default]')
 parser.add_option('--bins',          dest='bins', type='int', metavar='int',
                                      help='bins to sort beyond current best fit [%default]')
@@ -216,7 +214,7 @@ damask.util.report(scriptName,options.seedFile)
 if options.randomSeed is None:
   options.randomSeed = int(os.urandom(4).hex(),16)
 damask.util.croak(options.randomSeed)
-delta = (options.scale/options.grid[0],options.scale/options.grid[1],options.scale/options.grid[2])
+delta = options.scale/np.array(options.grid)
 baseFile=os.path.splitext(os.path.basename(options.seedFile))[0]
 points = np.array(options.grid).prod().astype('float')
 
@@ -257,7 +255,7 @@ for i in range(nMicrostructures):
   initialHist = np.histogram(initialData,bins=target[i]['bins'])[0]
   target[i]['error']=np.sqrt(np.square(np.array(target[i]['histogram']-initialHist)).sum())
 
-# as long as not all grain sizes are within the range, the error is the deviation to left and right 
+# as long as not all grain sizes are within the range, the error is the deviation to left and right
 if target[0]['error'] > 0.0:
   target[0]['error'] *=((target[0]['bins'][0]-np.min(initialData))**2.0+
                         (target[0]['bins'][1]-np.max(initialData))**2.0)**0.5
@@ -267,7 +265,7 @@ for i in range(nMicrostructures):
   match = i+1
 
 
-if options.maxseeds < 1: 
+if options.maxseeds < 1:
   maxSeeds = len(np.unique(initialGeom.microstructure))
 else:
   maxSeeds = options.maxseeds