ASCIItable -> Table

2020-03-18 13:47:09 +01:00 · 2020-03-18 13:47:09 +01:00 · b5a1295cb9
parent 1c75198af5
commit b5a1295cb9
4 changed files with 87 additions and 144 deletions
--- a/processing/post/binXY.py
+++ b/processing/post/binXY.py
@ -2,6 +2,7 @@

 import os
 import sys
+from io import StringIO
 from optparse import OptionParser

 import numpy as np
@ -71,60 +72,30 @@ parser.set_defaults(bins = (10,10),
                   )

 (options,filenames) = parser.parse_args()
+if filenames == []: filenames = [None]

-minmax = np.array([np.array(options.xrange),
-                   np.array(options.yrange),
-                   np.array(options.zrange)])
-grid   = np.zeros(options.bins,'f')
-result = np.zeros((options.bins[0],options.bins[1],3),'f')
+minmax = np.array([options.xrange,options.yrange,options.zrange])
+result = np.empty((options.bins[0],options.bins[1],3),'f')

 if options.data is None: parser.error('no data columns specified.')

-labels = list(options.data)
-
-
-if options.weight is not None: labels += [options.weight]                                               # prevent character splitting of single string value
-
-# --- loop over input files -------------------------------------------------------------------------
-
-if filenames == []: filenames = [None]
-
 for name in filenames:
-  try:
-    table = damask.ASCIItable(name = name,
-                              outname = os.path.join(os.path.dirname(name),
-                                                    'binned-{}-{}_'.format(*options.data) +
-                                                   ('weighted-{}_'.format(options.weight) if options.weight else '') +
-                                                    os.path.basename(name)) if name else name)
-  except IOError:
-    continue
  damask.util.report(scriptName,name)

-# ------------------------------------------ read header ------------------------------------------
-
-  table.head_read()
-
-# ------------------------------------------ sanity checks ----------------------------------------
-
-  missing_labels = table.data_readArray(labels)
-
-  if len(missing_labels) > 0:
-    damask.util.croak('column{} {} not found.'.format('s' if len(missing_labels) > 1 else '',', '.join(missing_labels)))
-    table.close(dismiss = True)
-    continue
+  table = damask.Table.from_ASCII(StringIO(''.join(sys.stdin.read())) if name is None else name)
+  data = np.hstack((table.get(options.data[0]),table.get(options.data[1])))

  for c in (0,1):                                                                                   # check data minmax for x and y (i = 0 and 1)
-    if (minmax[c] == 0.0).all(): minmax[c] = [table.data[:,c].min(),table.data[:,c].max()]
+    if (minmax[c] == 0.0).all(): minmax[c] = [data[:,c].min(),data[:,c].max()]
    if options.type[c].lower() == 'log':                                                            # if log scale
-      table.data[:,c] = np.log(table.data[:,c])                                                     # change x,y coordinates to log
+      data[:,c] = np.log(data[:,c])                                                                 # change x,y coordinates to log
      minmax[c] = np.log(minmax[c])                                                                 # change minmax to log, too

  delta = minmax[:,1]-minmax[:,0]
-  (grid,xedges,yedges) = np.histogram2d(table.data[:,0],table.data[:,1],
+  (grid,xedges,yedges) = np.histogram2d(data[:,0],data[:,1],
                                        bins=options.bins,
                                        range=minmax[:2],
-                                        weights=None if options.weight is None else table.data[:,2])
-
+                                        weights=table.get(options.weight) if options.weight else None)
  if options.normCol:
    for x in range(options.bins[0]):
      sum = np.sum(grid[x,:])
@ -153,24 +124,20 @@ for name in filenames:
    for y in range(options.bins[1]):
      result[x,y,:] = [minmax[0,0]+delta[0]/options.bins[0]*(x+0.5),
                       minmax[1,0]+delta[1]/options.bins[1]*(y+0.5),
-                       min(1.0,max(0.0,(grid[x,y]-minmax[2,0])/delta[2]))]
+                       np.clip((grid[x,y]-minmax[2,0])/delta[2],0.0,1.0)]

  for c in (0,1):
    if options.type[c].lower() == 'log': result[:,:,c] = np.exp(result[:,:,c])

  if options.invert: result[:,:,2] = 1.0 - result[:,:,2]

-# --- assemble header -------------------------------------------------------------------------------
-
-  table.info_clear()
-  table.info_append(scriptID + '\t' + ' '.join(sys.argv[1:]))
-  table.labels_clear()
-  table.labels_append(['bin_%s'%options.data[0],'bin_%s'%options.data[1],'z'])
-  table.head_write()
-
-# --- output result ---------------------------------------------------------------------------------
-
-  table.data = result.reshape(options.bins[0]*options.bins[1],3)
-  table.data_writeArray()
-
-  table.close()
+  comments = scriptID + '\t' + ' '.join(sys.argv[1:])
+  shapes = {'bin_%s'%options.data[0]:(1,),'bin_%s'%options.data[1]:(1,),'z':(1,)}
+  table  = damask.Table(result.reshape(options.bins[0]*options.bins[1],3),shapes,[comments])
+  if name:
+    outname = os.path.join(os.path.dirname(name),'binned-{}-{}_'.format(*options.data) +
+                                                ('weighted-{}_'.format(options.weight) if options.weight else '') +
+                                                 os.path.basename(name))
+    table.to_ASCII(outname)
+  else:
+    table.to_ASCII(sys.stdout)
--- a/processing/pre/geom_fromVoronoiTessellation.py
+++ b/processing/pre/geom_fromVoronoiTessellation.py
@ -255,8 +255,8 @@ for name in filenames:
  table.data_readArray(labels)
  coords    = table.data[:,table.label_indexrange(options.pos)] * info['size'] if options.normalized \
        else  table.data[:,table.label_indexrange(options.pos)] - info['origin']
-  eulers    = table.data[:,table.label_indexrange(options.eulers)] if hasEulers \
-              else np.zeros(3*len(coords))
+  if hasEulers:
+    eulers  = table.data[:,table.label_indexrange(options.eulers)]
  grains    = table.data[:,table.label_indexrange(options.microstructure)].astype(int) if hasGrains \
              else np.arange(len(coords))+1

--- a/processing/pre/hybridIA_linODFsampling.py
+++ b/processing/pre/hybridIA_linODFsampling.py
@ -1,11 +1,16 @@
 #!/usr/bin/env python3
-# -*- coding: UTF-8 no BOM -*-

+import os
+import sys
+import math
+import random
 from optparse import OptionParser
-import damask
-import os,sys,math,random
+
 import numpy as np

+import damask
+
+
 scriptName = os.path.splitext(os.path.basename(__file__))[0]
 scriptID   = ' '.join([scriptName,damask.version])

@ -223,54 +228,29 @@ parser.set_defaults(randomSeed = None,
                   )

 (options,filenames) = parser.parse_args()
-
-nSamples       = options.number
-methods        = [options.algorithm]
-
-
-# --- loop over input files -------------------------------------------------------------------------
-
 if filenames == []: filenames = [None]

 for name in filenames:
-  try:
-    table = damask.ASCIItable(name = name, readonly=True)
-  except IOError:
-    continue
  damask.util.report(scriptName,name)
+  
+  table = damask.Table.from_ASCII(StringIO(''.join(sys.stdin.read())) if name is None else name)

-  randomSeed = int(os.urandom(4).hex(), 16)  if options.randomSeed is None else options.randomSeed         # random seed per file for second phase
+  randomSeed = int(os.urandom(4).hex(),16)  if options.randomSeed is None else options.randomSeed  # random seed per file
  random.seed(randomSeed)

-# ------------------------------------------ read header and data ---------------------------------
-  table.head_read()
-
-  errors = []
-  labels = ['1_euler','2_euler','3_euler','intensity']
-  for i,index in enumerate(table.label_index(labels)):
-    if index < 0: errors.append('label {} not present.'.format(labels[i]))
-  
-  if errors != []:
-    damask.util.croak(errors)
-    table.close(dismiss = True)
-    continue
-
-  table.data_readArray(labels)
-
 # --------------- figure out limits (left/right), delta, and interval -----------------------------
-
  ODF = {}
-  limits = np.array([np.min(table.data[:,0:3],axis=0),
-                     np.max(table.data[:,0:3],axis=0)])                                             # min/max euler angles in degrees
+  eulers = table.get('euler')
+  limits = np.array([np.min(eulers,axis=0),np.max(eulers,axis=0)])                                  # min/max euler angles in degrees
  ODF['limit'] = np.radians(limits[1,:])                                                            # right hand limits in radians
  ODF['center'] = 0.0 if all(limits[0,:]<1e-8) else 0.5                                             # vertex or cell centered

-  ODF['interval'] = np.array(list(map(len,[np.unique(table.data[:,i]) for i in range(3)])),'i')     # steps are number of distict values
+  ODF['interval'] = np.array(list(map(len,[np.unique(eulers[:,i]) for i in range(3)])),'i')         # steps are number of distict values
  ODF['nBins'] = ODF['interval'].prod()
  ODF['delta'] = np.radians(np.array(limits[1,0:3]-limits[0,0:3])/(ODF['interval']-1))              # step size

-  if table.data.shape[0] != ODF['nBins']:
-    damask.util.croak('expecting %i values but got %i'%(ODF['nBins'],table.data.shape[0]))
+  if eulers.shape[0] != ODF['nBins']:
+    damask.util.croak('expecting %i values but got %i'%(ODF['nBins'],eulers.shape[0]))
    continue
  
 # ----- build binnedODF array and normalize ------------------------------------------------------
@ -278,9 +258,10 @@ for name in filenames:
  ODF['dV_V'] = [None]*ODF['nBins']
  ODF['nNonZero'] = 0
  dg = ODF['delta'][0]*2.0*math.sin(ODF['delta'][1]/2.0)*ODF['delta'][2]
+  intensity = table.get('intensity')
  for b in range(ODF['nBins']):
    ODF['dV_V'][b] = \
-      max(0.0,table.data[b,table.label_index('intensity')]) * dg * \
+      max(0.0,intensity[b,0]) * dg * \
      math.sin(((b//ODF['interval'][2])%ODF['interval'][1]+ODF['center'])*ODF['delta'][1])
    if ODF['dV_V'][b] > 0.0:
      sumdV_V += ODF['dV_V'][b]
@ -296,11 +277,10 @@ for name in filenames:
               'Reference Integral: %12.11f\n'%(ODF['limit'][0]*ODF['limit'][2]*(1-math.cos(ODF['limit'][1]))),
               ])
                                                     
-# call methods
  Functions = {'IA': 'directInversion', 'STAT': 'TothVanHoutteSTAT', 'MC': 'MonteCarloBins'}
  method = Functions[options.algorithm]

-  Orientations, ReconstructedODF = (globals()[method])(ODF,nSamples)
+  Orientations, ReconstructedODF = (globals()[method])(ODF,options.number)
  
 # calculate accuracy of sample
  squaredDiff     = {'orig':0.0,method:0.0}
@ -319,7 +299,7 @@ for name in filenames:
  indivSum['orig'] += ODF['dV_V'][bin]
  indivSquaredSum['orig'] += ODF['dV_V'][bin]**2
  
-  damask.util.croak(['sqrt(N*)RMSD of ODFs:\t %12.11f'% math.sqrt(nSamples*squaredDiff[method]),
+  damask.util.croak(['sqrt(N*)RMSD of ODFs:\t %12.11f'% math.sqrt(options.number*squaredDiff[method]),
               'RMSrD of ODFs:\t %12.11f'%math.sqrt(squaredRelDiff[method]),
               'rMSD of ODFs:\t %12.11f'%(squaredDiff[method]/indivSquaredSum['orig']),
               'nNonZero correlation slope:\t %12.11f'\
@ -331,10 +311,10 @@ for name in filenames:
                        (indivSquaredSum[method]/ODF['nNonZero']-(indivSum[method]/ODF['nNonZero'])**2)))),
              ])
  
-  if method == 'IA' and nSamples < ODF['nNonZero']:
+  if method == 'IA' and options.number < ODF['nNonZero']:
    strOpt = '(%i)'%ODF['nNonZero']
  
-  formatwidth = 1+int(math.log10(nSamples))
+  formatwidth = 1+int(math.log10(options.number))

  materialConfig = [
      '#' + scriptID + ' ' + ' '.join(sys.argv[1:]),
@ -344,7 +324,7 @@ for name in filenames:
      '#-------------------#',
      ]
  
-  for i,ID in enumerate(range(nSamples)):
+  for i,ID in enumerate(range(options.number)):
    materialConfig += ['[Grain%s]'%(str(ID+1).zfill(formatwidth)),
                      '(constituent)   phase %i   texture %s   fraction 1.0'%(options.phase,str(ID+1).rjust(formatwidth)),
                     ]
@ -355,7 +335,7 @@ for name in filenames:
      '#-------------------#',
      ]

-  for ID in range(nSamples):
+  for ID in range(options.number):
    eulers = Orientations[ID]
  
    materialConfig += ['[Grain%s]'%(str(ID+1).zfill(formatwidth)),
@ -364,7 +344,5 @@ for name in filenames:

 #--- output finalization -------------------------------------------------------------------------- 

-  with (open(os.path.splitext(name)[0]+'_'+method+'_'+str(nSamples)+'_material.config','w')) as outfile:
+  with (open(os.path.splitext(name)[0]+'_'+method+'_'+str(options.number)+'_material.config','w')) as outfile:
    outfile.write('\n'.join(materialConfig)+'\n')
-
-  table.close()
--- a/processing/pre/seeds_fromDistribution.py
+++ b/processing/pre/seeds_fromDistribution.py
@ -1,9 +1,15 @@
 #!/usr/bin/env python3

-import threading,time,os,sys,random
-import numpy as np
+import threading
+import time
+import os
+import sys
+import random
 from optparse import OptionParser
 from io import StringIO
+
+import numpy as np
+
 import damask

 scriptName = os.path.splitext(os.path.basename(__file__))[0]
@ -35,11 +41,11 @@ class myThread (threading.Thread):
    s.acquire()
    bestMatch = match
    s.release()
-    
+
    random.seed(options.randomSeed+self.threadID)                                                   # initializes to given seeds
    knownSeedsUpdate = bestSeedsUpdate -1.0                                                         # trigger update of local best seeds
    randReset = True                                                                                # aquire new direction
-    
+
    myBestSeedsVFile    = StringIO()                                                                # store local copy of best seeds file
    perturbedSeedsVFile = StringIO()                                                                # perturbed best seeds file
    perturbedGeomVFile  = StringIO()                                                                # tessellated geom file
@ -49,14 +55,14 @@ class myThread (threading.Thread):
      s.acquire()                                                                                   # ensure only one thread acces global data
      if bestSeedsUpdate > knownSeedsUpdate:                                                        # write best fit to virtual file
        knownSeedsUpdate = bestSeedsUpdate
-        bestSeedsVFile.reset()
+        bestSeedsVFile.seek(0)
        myBestSeedsVFile.close()
        myBestSeedsVFile = StringIO()
        i=0
        for line in bestSeedsVFile:
          myBestSeedsVFile.write(line)
      s.release()
-      
+
      if randReset:                                                                                 # new direction because current one led to worse fit

        randReset = False
@ -67,46 +73,38 @@ class myThread (threading.Thread):
        for i in range(NmoveGrains):
          selectedMs.append(random.randrange(1,nMicrostructures))

-          direction.append(np.array(((random.random()-0.5)*delta[0],
-                                     (random.random()-0.5)*delta[1],
-                                     (random.random()-0.5)*delta[2])))
-        
+          direction.append((np.random.random()-0.5)*delta)
+
      perturbedSeedsVFile.close()                                                                   # reset virtual file
      perturbedSeedsVFile = StringIO()
-      myBestSeedsVFile.reset()
+      myBestSeedsVFile.seek(0)

-      perturbedSeedsTable = damask.ASCIItable(myBestSeedsVFile,perturbedSeedsVFile,labeled=True)    # write best fit to perturbed seed file
-      perturbedSeedsTable.head_read()
-      perturbedSeedsTable.head_write()
-      outputAlive=True
-      ms = 1
+      perturbedSeedsTable = damask.Table.from_ASCII(myBestSeedsVFile)
+      coords = perturbedSeedsTable.get('pos')
      i = 0
-      while outputAlive and perturbedSeedsTable.data_read():                                        # perturbe selected microstructure
+      for ms,coord in enumerate(coords):
        if ms in selectedMs:
-          newCoords=np.array(tuple(map(float,perturbedSeedsTable.data[0:3]))+direction[i])
+          newCoords=coord+direction[i]
          newCoords=np.where(newCoords>=1.0,newCoords-1.0,newCoords)                                # ensure that the seeds remain in the box
          newCoords=np.where(newCoords <0.0,newCoords+1.0,newCoords)
-          perturbedSeedsTable.data[0:3]=[format(f, '8.6f') for f in newCoords]
+          coords[i]=newCoords
          direction[i]*=2.
          i+= 1
-        ms+=1
-        perturbedSeedsTable.data_write()
-#--- do tesselation with perturbed seed file ----------------------------------------------------------
+      perturbedSeedsTable.set('pos',coords)
+      perturbedSeedsTable.to_ASCII(perturbedSeedsVFile)
+
+#--- do tesselation with perturbed seed file ------------------------------------------------------
      perturbedGeomVFile.close()
      perturbedGeomVFile = StringIO()
-      perturbedSeedsVFile.reset()
+      perturbedSeedsVFile.seek(0)
      perturbedGeomVFile.write(damask.util.execute('geom_fromVoronoiTessellation '+
                     ' -g '+' '.join(list(map(str, options.grid))),streamIn=perturbedSeedsVFile)[0])
-      perturbedGeomVFile.reset()
+      perturbedGeomVFile.seek(0)

-#--- evaluate current seeds file ----------------------------------------------------------------------
-      perturbedGeomTable = damask.ASCIItable(perturbedGeomVFile,None,labeled=False,readonly=True)
-      perturbedGeomTable.head_read()
-      for i in perturbedGeomTable.info:
-        if i.startswith('microstructures'): myNmicrostructures = int(i.split('\t')[1])
-      perturbedGeomTable.data_readArray()
-      perturbedGeomTable.output_flush()
-      currentData=np.bincount(perturbedGeomTable.data.astype(int).ravel())[1:]/points
+#--- evaluate current seeds file ------------------------------------------------------------------
+      perturbedGeom = damask.Geom.from_file(perturbedGeomVFile)
+      myNmicrostructures = len(np.unique(perturbedGeom.microstructure))
+      currentData=np.bincount(perturbedGeom.microstructure.ravel())[1:]/points
      currentError=[]
      currentHist=[]
      for i in range(nMicrostructures):                                                             # calculate the deviation in all bins per histogram
@ -114,8 +112,8 @@ class myThread (threading.Thread):
        currentError.append(np.sqrt(np.square(np.array(target[i]['histogram']-currentHist[i])).sum()))

 # as long as not all grains are within the range of the target, use the deviation to left and right as error
-      if currentError[0]>0.0:                                                                       
-        currentError[0] *=((target[0]['bins'][0]-np.min(currentData))**2.0+                        
+      if currentError[0]>0.0:
+        currentError[0] *=((target[0]['bins'][0]-np.min(currentData))**2.0+
                           (target[0]['bins'][1]-np.max(currentData))**2.0)**0.5                    # norm of deviations by number of usual bin deviation
      s.acquire()                                                                                   # do the evaluation serially
      bestMatch = match
@ -137,7 +135,7 @@ class myThread (threading.Thread):
            damask.util.croak('          target: '+np.array_str(target[i]['histogram']))
            damask.util.croak('          best:   '+np.array_str(currentHist[i]))
            currentSeedsName = baseFile+'_'+str(bestSeedsUpdate).replace('.','-')                   # name of new seed file (use time as unique identifier)
-            perturbedSeedsVFile.reset()
+            perturbedSeedsVFile.seek(0)
            bestSeedsVFile.close()
            bestSeedsVFile = StringIO()
            sys.stdout.flush()
@ -154,7 +152,7 @@ class myThread (threading.Thread):
            break
          if i == min(nMicrostructures,myMatch+options.bins)-1:                                     # same quality as before: take it to keep on moving
            bestSeedsUpdate = time.time()
-            perturbedSeedsVFile.reset()
+            perturbedSeedsVFile.seek(0)
            bestSeedsVFile.close()
            bestSeedsVFile = StringIO()
            for line in perturbedSeedsVFile:
@ -167,7 +165,7 @@ class myThread (threading.Thread):
                                                         .format(self.threadID,myNmicrostructures))
        randReset = True

-      
+
      s.release()


@ -192,7 +190,7 @@ parser.add_option('--target',        dest='target', metavar='string',
                                     help='name of the geom file with target distribution [%default]')
 parser.add_option('--tolerance',     dest='threshold', type='int', metavar='int',
                                     help='stopping criterion (bin number) [%default]')
-parser.add_option('--scale',         dest='scale',type='float', metavar='float',                                     
+parser.add_option('--scale',         dest='scale',type='float', metavar='float',
                                     help='maximum moving distance of perturbed seed in pixel [%default]')
 parser.add_option('--bins',          dest='bins', type='int', metavar='int',
                                     help='bins to sort beyond current best fit [%default]')
@ -216,7 +214,7 @@ damask.util.report(scriptName,options.seedFile)
 if options.randomSeed is None:
  options.randomSeed = int(os.urandom(4).hex(),16)
 damask.util.croak(options.randomSeed)
-delta = (options.scale/options.grid[0],options.scale/options.grid[1],options.scale/options.grid[2])
+delta = options.scale/np.array(options.grid)
 baseFile=os.path.splitext(os.path.basename(options.seedFile))[0]
 points = np.array(options.grid).prod().astype('float')

@ -257,7 +255,7 @@ for i in range(nMicrostructures):
  initialHist = np.histogram(initialData,bins=target[i]['bins'])[0]
  target[i]['error']=np.sqrt(np.square(np.array(target[i]['histogram']-initialHist)).sum())

-# as long as not all grain sizes are within the range, the error is the deviation to left and right 
+# as long as not all grain sizes are within the range, the error is the deviation to left and right
 if target[0]['error'] > 0.0:
  target[0]['error'] *=((target[0]['bins'][0]-np.min(initialData))**2.0+
                        (target[0]['bins'][1]-np.max(initialData))**2.0)**0.5
@ -267,7 +265,7 @@ for i in range(nMicrostructures):
  match = i+1


-if options.maxseeds < 1: 
+if options.maxseeds < 1:
  maxSeeds = len(np.unique(initialGeom.microstructure))
 else:
  maxSeeds = options.maxseeds