DAMASK_EICMD/processing/post/sortTable.py

#!/usr/bin/env python
# -*- coding: UTF-8 no BOM -*-

import os,re,sys,string,numpy,damask
from optparse import OptionParser, Option

scriptID = '$Id$'
scriptName = scriptID.split()[1]

# -----------------------------
class extendableOption(Option):
# -----------------------------
# used for definition of new option parser action 'extend', which enables to take multiple option arguments
# taken from online tutorial http://docs.python.org/library/optparse.html
  
  ACTIONS = Option.ACTIONS + ("extend",)
  STORE_ACTIONS = Option.STORE_ACTIONS + ("extend",)
  TYPED_ACTIONS = Option.TYPED_ACTIONS + ("extend",)
  ALWAYS_TYPED_ACTIONS = Option.ALWAYS_TYPED_ACTIONS + ("extend",)

  def take_action(self, action, dest, opt, value, values, parser):
    if action == "extend":
      lvalue = value.split(",")
      values.ensure_value(dest, []).extend(lvalue)
    else:
      Option.take_action(self, action, dest, opt, value, values, parser)


# --------------------------------------------------------------------
#                                MAIN
# --------------------------------------------------------------------

parser = OptionParser(option_class=extendableOption, usage='%prog options [file[s]]', description = """
Sort rows by given column label.

Examples:
Coordinates may be given in column "x", "y", and "z".
Sorting with x fastest and z slowest varying index: --label x,y,z.
""" + string.replace(scriptID,'\n','\\n')
)


parser.add_option('-l','--label',   dest='keys', action='extend', type='string', metavar='<LIST>',
                                    help='list of column labels (a,b,c,...)')

parser.set_defaults(key = [])

(options,filenames) = parser.parse_args()

if options.keys == None:
  parser.error('No sorting column(s) specified.')

options.keys.reverse()                                                    # numpy sorts with most significant column as last

# ------------------------------------------ setup file handles ---------------------------------------  

files = []
if filenames == []:
  files.append({'name':'STDIN', 'input':sys.stdin, 'output':sys.stdout, 'croak':sys.stderr})
else:
  for name in filenames:
    if os.path.exists(name):
      files.append({'name':name, 'input':open(name), 'output':open(name+'_tmp','w'), 'croak':sys.stderr})

# ------------------------------------------ loop over input files ---------------------------------------  

for file in files:
  if file['name'] != 'STDIN': file['croak'].write('\033[1m'+scriptName+'\033[0m: '+file['name']+'\n')
  else: file['croak'].write('\033[1m'+scriptName+'\033[0m\n')

  table = damask.ASCIItable(file['input'],file['output'],False)             # make unbuffered ASCII_table
  table.head_read()                                                         # read ASCII header info
  table.info_append(string.replace(scriptID,'\n','\\n') + \
                    '\t' + ' '.join(sys.argv[1:]))

# ------------------------------------------ assemble header ---------------------------------------  

  table.head_write()

# ------------------------------------------ process data ---------------------------------------  

  table.data_readArray()
  cols = []
  for column in table.labels_index(options.keys):
    cols += [table.data[:,column]]

  ind = numpy.lexsort(cols)

  table.data = table.data[ind]
  table.data_writeArray()

# ------------------------------------------ output result ---------------------------------------  

  table.output_flush()                                      # just in case of buffered ASCII table

  file['input'].close()                                                     # close input ASCII table
  if file['name'] != 'STDIN':
    file['output'].close()                                                  # close output ASCII table
    os.rename(file['name']+'_tmp',file['name'])                             # overwrite old one with tmp new
new script to sort ASCIItable according to (multiple) columns 2013-12-14 09:18:50 +05:30			`#!/usr/bin/env python`
tested new scripts to update shebang, all files got same shebang (and for python files encoding) 2014-04-02 00:11:14 +05:30			`# -- coding: UTF-8 no BOM --`
new script to sort ASCIItable according to (multiple) columns 2013-12-14 09:18:50 +05:30
			`import os,re,sys,string,numpy,damask`
			`from optparse import OptionParser, Option`

			`scriptID = '$Id$'`
			`scriptName = scriptID.split()[1]`

			`# -----------------------------`
			`class extendableOption(Option):`
			`# -----------------------------`
			`# used for definition of new option parser action 'extend', which enables to take multiple option arguments`
			`# taken from online tutorial http://docs.python.org/library/optparse.html`

			`ACTIONS = Option.ACTIONS + ("extend",)`
			`STORE_ACTIONS = Option.STORE_ACTIONS + ("extend",)`
			`TYPED_ACTIONS = Option.TYPED_ACTIONS + ("extend",)`
			`ALWAYS_TYPED_ACTIONS = Option.ALWAYS_TYPED_ACTIONS + ("extend",)`

			`def take_action(self, action, dest, opt, value, values, parser):`
			`if action == "extend":`
			`lvalue = value.split(",")`
			`values.ensure_value(dest, []).extend(lvalue)`
			`else:`
			`Option.take_action(self, action, dest, opt, value, values, parser)`



			`# --------------------------------------------------------------------`
			`# MAIN`
			`# --------------------------------------------------------------------`

			`parser = OptionParser(option_class=extendableOption, usage='%prog options [file[s]]', description = """`
			`Sort rows by given column label.`

			`Examples:`
			`Coordinates may be given in column "x", "y", and "z".`
			`Sorting with x fastest and z slowest varying index: --label x,y,z.`
			`""" + string.replace(scriptID,'\n','\\n')`
			`)`


			`parser.add_option('-l','--label', dest='keys', action='extend', type='string', metavar='<LIST>',`
			`help='list of column labels (a,b,c,...)')`

			`parser.set_defaults(key = [])`

			`(options,filenames) = parser.parse_args()`

			`if options.keys == None:`
			`parser.error('No sorting column(s) specified.')`

			`options.keys.reverse() # numpy sorts with most significant column as last`

			`# ------------------------------------------ setup file handles ---------------------------------------`

			`files = []`
			`if filenames == []:`
			`files.append({'name':'STDIN', 'input':sys.stdin, 'output':sys.stdout, 'croak':sys.stderr})`
			`else:`
			`for name in filenames:`
			`if os.path.exists(name):`
			`files.append({'name':name, 'input':open(name), 'output':open(name+'_tmp','w'), 'croak':sys.stderr})`

			`# ------------------------------------------ loop over input files ---------------------------------------`

			`for file in files:`
			`if file['name'] != 'STDIN': file['croak'].write('\033[1m'+scriptName+'\033[0m: '+file['name']+'\n')`
			`else: file['croak'].write('\033[1m'+scriptName+'\033[0m\n')`

			`table = damask.ASCIItable(file['input'],file['output'],False) # make unbuffered ASCII_table`
			`table.head_read() # read ASCII header info`
			`table.info_append(string.replace(scriptID,'\n','\\n') + \`
			`'\t' + ' '.join(sys.argv[1:]))`

			`# ------------------------------------------ assemble header ---------------------------------------`

			`table.head_write()`

			`# ------------------------------------------ process data ---------------------------------------`

			`table.data_readArray()`
			`cols = []`
			`for column in table.labels_index(options.keys):`
			`cols += [table.data[:,column]]`

			`ind = numpy.lexsort(cols)`

			`table.data = table.data[ind]`
			`table.data_writeArray()`

			`# ------------------------------------------ output result ---------------------------------------`

			`table.output_flush() # just in case of buffered ASCII table`

			`file['input'].close() # close input ASCII table`
			`if file['name'] != 'STDIN':`
			`file['output'].close() # close output ASCII table`
			`os.rename(file['name']+'_tmp',file['name']) # overwrite old one with tmp new`