From 5257a2161f74cf29098d6ba171445dcaaa3c2b5e Mon Sep 17 00:00:00 2001
From: Tias	Maiti <maititia@egr.msu.edu>
Date: Thu, 9 Apr 2015 06:45:21 +0000
Subject: [PATCH] added options for x and y normalization

---
 processing/post/binXY.py | 153 +++++++++++++++++++++++++--------------
 1 file changed, 97 insertions(+), 56 deletions(-)

diff --git a/processing/post/binXY.py b/processing/post/binXY.py
index 73c73d37b..6eda517ac 100755
--- a/processing/post/binXY.py
+++ b/processing/post/binXY.py
@@ -18,24 +18,28 @@ Produces a binned grid of two columns from an ASCIItable, i.e. a two-dimensional
 
 """, version = scriptID)
 
-parser.add_option('-d','--data',    dest='data', nargs=2, type='int', metavar='int int',
-                                    help='columns containing x and y %default')
-parser.add_option('-w','--weight',  dest='weight', metavar='int', type='int',
-                                    help='column containing weight of (x,y) point [%default]')
+parser.add_option('-d','--data',    dest='data', nargs=2, type='string', metavar='string string',
+                                    help='column labels containing x and y %default')
+parser.add_option('-w','--weight',  dest='weight', metavar='string', type='string',
+                                    help='column label containing weight of (x,y) point [%default]')
 parser.add_option('-b','--bins',    dest='bins', nargs=2, type='int', metavar='int int',
                                     help='number of bins in x and y direction %default')
 parser.add_option('-t','--type',    dest='type', nargs=3, metavar='string string string',
                                     help='type (linear/log) of x, y, and z axis [linear]')
 parser.add_option('-x','--xrange',  dest='xrange', nargs=2, type='float', metavar='float float',
-                                    help='value range in x direction [auto]')
+                                    help='value minmax in x direction [auto]')
 parser.add_option('-y','--yrange',  dest='yrange', nargs=2, type='float', metavar='float float',
-                                    help='value range in y direction [auto]')
+                                    help='value minmax in y direction [auto]')
 parser.add_option('-z','--zrange',  dest='zrange', nargs=2, type='float', metavar='float float',
-                                    help='value range in z direction [auto]')
+                                    help='value minmax in z direction [auto]')
 parser.add_option('-i','--invert',  dest='invert', action='store_true',
                                     help='invert probability density [%default]')
+parser.add_option('-r','--rownormalize',  dest='normRow', action='store_true',
+                                    help='normalize probability density in each row [%default]')
+parser.add_option('-c','--colnormalize',  dest='normCol', action='store_true',
+                                    help='normalize probability density in each column [%default]')
 
-parser.set_defaults(data = (1,2))
+parser.set_defaults(data = None)
 parser.set_defaults(weight = None)
 parser.set_defaults(bins = (10,10))
 parser.set_defaults(type = ('linear','linear','linear'))
@@ -43,76 +47,113 @@ parser.set_defaults(xrange = (0.0,0.0))
 parser.set_defaults(yrange = (0.0,0.0))
 parser.set_defaults(zrange = (0.0,0.0))
 parser.set_defaults(invert = False)
+parser.set_defaults(normRow = False)
+parser.set_defaults(normCol = False)
 
 (options,filenames) = parser.parse_args()
 
-range =  np.array([np.array(options.xrange),
-                   np.array(options.yrange),
-                   np.array(options.zrange)])
+minmax =  np.array([np.array(options.xrange),
+                    np.array(options.yrange),
+                    np.array(options.zrange)])
 grid =   np.zeros(options.bins,'i')
-result = np.zeros((options.bins[0]*options.bins[1],3),'f')
+result = np.zeros((options.bins[0],options.bins[1],3),'f')
 
-prefix='binned%i-%i_'%(options.data[0],options.data[1])+ \
-                      ('weighted%i_'%(options.weight) if options.weight != None else '')
+datainfo = {                                                                                        # list of requested labels per datatype
+             'scalar':     {'len':1,
+                            'label':[]},
+           }
 
-# ------------------------------------------ setup file handles ------------------------------------
-files = []
+if options.data != None:   datainfo['scalar']['label'] += options.data
+if options.weight != None: datainfo['scalar']['label'] += options.weight
+
+if len(datainfo['scalar']['label']) < 2:
+  parser.error('missing column labels')
+
+# --- loop over input files -------------------------------------------------------------------------
 if filenames == []:
-  files.append({'name':'STDIN', 'input':sys.stdin, 'output':sys.stdout, 'croak':sys.stderr})
-else:
-  for name in filenames:
-    if os.path.exists(name):
-      files.append({'name':name, 'input':open(name), 'output':open(name+'_tmp','w'), 'croak':sys.stderr})
+  filenames = ['STDIN']
 
-# ------------------------------------------ loop over input files ---------------------------------
-for file in files:
-  if file['name'] != 'STDIN': file['croak'].write('\033[1m'+scriptName+'\033[0m: '+file['name']+'\n')
-  else: file['croak'].write('\033[1m'+scriptName+'\033[0m\n')
+for name in filenames:
+  if name == 'STDIN':
+    file = {'name':'STDIN', 'input':sys.stdin, 'output':sys.stdout, 'croak':sys.stderr}
+    file['croak'].write('\033[1m'+scriptName+'\033[0m\n')
+  else:
+    if not os.path.exists(name): continue
+    file = {'name':name, 'input':open(name), 'output':open(name+'_tmp','w'), 'croak':sys.stderr}
+    file['croak'].write('\033[1m'+scriptName+'\033[0m: '+file['name']+'\n')
 
-  skip = int(file['input'].readline().split()[0])
-  for i in xrange(skip): headers = file['input'].readline().split()
-  data = np.loadtxt(file['input'],usecols=np.array(options.data+((options.weight,) if options.weight != None else ()))-1)
-  file['input'].close()                                                                              # close input ASCII table
+  table = damask.ASCIItable(file['input'],file['output'],buffered = False)                          # make unbuffered ASCII_table
+  table.head_read()                                                                                 # read ASCII header info
 
-  for i in (0,1):                                                                                    # check data range for x and y
-    if (range[i] == 0.0).all(): range[i] = [data[:,i].min(),data[:,i].max()]
+# --------------- figure out columns to process  ---------------------------------------------------
+  active = []
+  column = {}
+
+  for label in datainfo['scalar']['label']:
+    if label in table.labels:
+      active.append(label)
+      column[label] = table.labels.index(label)                                                     # remember columns of requested data
+    else:
+      file['croak'].write('column %s not found...\n'%label)
+       
+# ------------------------------------------ assemble header ---------------------------------------
+  table.info_clear()
+  table.info_append(scriptID + '\t' + ' '.join(sys.argv[1:]))
+  table.labels = ['bin_%s'%options.data[0],'bin_%s'%options.data[1],'z']
+  table.head_write()
+
+# ------------------------------------------ process data ------------------------------------------
+  table.data_readArray([column[label] for label in active])
+
+  for i in (0,1):                                                                                    # check data minmax for x and y
+    if (minmax[i] == 0.0).all(): minmax[i] = [table.data[:,i].min(),table.data[:,i].max()]
     if options.type[i].lower() == 'log':                                                             # if log scale
-      data[:,i] = np.log(data[:,i])                                                                  # change x,y coordinates to log
-      range[i] = np.log(range[i])                                                                    # change range to log, too
+      table.data[:,i] = np.log(table.data[:,i])                                                                  # change x,y coordinates to log
+      minmax[i] = np.log(minmax[i])                                                                    # change minmax to log, too
+
+  delta = minmax[:,1]-minmax[:,0]
   
-  delta = range[:,1]-range[:,0]
+  for i in xrange(len(table.data)):
+    x = int(options.bins[0]*(table.data[i,0]-minmax[0,0])/delta[0])
+    y = int(options.bins[1]*(table.data[i,1]-minmax[1,0])/delta[1])
+    if x >= 0 and x < options.bins[0] and y >= 0 and y < options.bins[1]:
+      grid[x,y] += 1 if options.weight == None else table.data[i,2]                                        # count (weighted) occurrences
   
-  for i in xrange(len(data)):
-    x = int(options.bins[0]*(data[i,0]-range[0,0])/delta[0])
-    y = int(options.bins[1]*(data[i,1]-range[1,0])/delta[1])
-    if x >=0 and x < options.bins[0] and y >= 0 and y < options.bins[1]: grid[x,y] += 1 if options.weight == None else data[i,2]
-  
-  if (range[2] == 0.0).all(): range[2] = [grid.min(),grid.max()]
-  if (range[2] == 0.0).all():                                                                       # no data in grid?
+  if (minmax[2] == 0.0).all(): minmax[2] = [grid.min(),grid.max()]                                   # auto scale from data
+  if minmax[2,0] == minmax[2,1]:
+    minmax[2,0] -= 1.
+    minmax[2,1] += 1.
+  if (minmax[2] == 0.0).all():                                                                       # no data in grid?
     file['croak'].write('no data found on grid...\n')
-    range[2,:] = np.array([0.0,1.0])                                                                # making up arbitrary z range
+    minmax[2,:] = np.array([0.0,1.0])                                                                # making up arbitrary z minmax
   if options.type[2].lower() == 'log':
     grid = np.log(grid)
-    range[2] = np.log(range[2])
+    minmax[2] = np.log(minmax[2])
     
-  delta[2] = range[2,1]-range[2,0]
+  delta[2] = minmax[2,1]-minmax[2,0]
 
-  
-  i = 0
   for x in xrange(options.bins[0]):
     for y in xrange(options.bins[1]):
-      result[i,:] = [range[0,0]+delta[0]/options.bins[0]*(x+0.5),
-                     range[1,0]+delta[1]/options.bins[1]*(y+0.5),
-                     min(1.0,max(0.0,(grid[x,y]-range[2,0])/delta[2]))]
-      if options.type[0].lower() == 'log': result[i,0] = np.exp(result[i,0])
-      if options.type[1].lower() == 'log': result[i,1] = np.exp(result[i,1])
-      if options.invert:                   result[i,2] = 1.0-result[i,2]
-      i += 1
+      result[x,y,:] = [minmax[0,0]+delta[0]/options.bins[0]*(x+0.5),
+                       minmax[1,0]+delta[1]/options.bins[1]*(y+0.5),
+                       min(1.0,max(0.0,(grid[x,y]-minmax[2,0])/delta[2]))]
+
+  if options.normCol:
+    for x in xrange(options.bins[0]):
+      result[x,:,2] /= np.sum(result[x,:,2])
+  if options.normRow:
+    for y in xrange(options.bins[1]):
+      result[:,y,2] /= np.sum(result[:,y,2])
+
+  for i in xrange(2):
+    if options.type[i].lower() == 'log': result[:,:,i] = np.exp(result[:,:,i])
+
+  if options.invert: result[:,:,2] = 1.0-result[:,:,2]
 
  # ------------------------------------------ output result -----------------------------------------       
-  file['output'].write('1\thead\n')
-  file['output'].write('bin_%s\tbin_%s\tz\n'%(headers[options.data[0]-1],headers[options.data[1]-1]))
-  np.savetxt(file['output'],result)
+  prefix = 'binned%s-%s_'%(options.data[0],options.data[1])+ \
+                          ('weighted%s_'%(options.weight) if options.weight != None else '')
+  np.savetxt(file['output'],result.reshape(options.bins[0]*options.bins[1],3))
   file['output'].close()                                                                            # close output ASCII table
   if file['name'] != 'STDIN':
     os.rename(file['name']+'_tmp',\