2011-12-22 16:00:25 +05:30
|
|
|
# $Id$
|
|
|
|
|
2011-12-14 01:32:26 +05:30
|
|
|
class ASCIItable():
|
|
|
|
'''
|
|
|
|
There should be a doc string here :)
|
|
|
|
'''
|
2012-12-07 03:16:19 +05:30
|
|
|
import sys,numpy
|
2011-12-14 01:32:26 +05:30
|
|
|
__slots__ = ['__IO__',
|
|
|
|
'info',
|
|
|
|
'labels',
|
|
|
|
'data',
|
|
|
|
]
|
|
|
|
|
2012-02-02 22:43:51 +05:30
|
|
|
# ------------------------------------------------------------------
|
2011-12-14 01:32:26 +05:30
|
|
|
def __init__(self,
|
|
|
|
fileIn = sys.stdin,
|
|
|
|
fileOut = sys.stdout,
|
2013-06-30 05:51:51 +05:30
|
|
|
buffered = True,
|
|
|
|
labels = True):
|
2011-12-14 01:32:26 +05:30
|
|
|
self.__IO__ = {'in': fileIn,
|
|
|
|
'out':fileOut,
|
|
|
|
'output':[],
|
|
|
|
'buffered':buffered,
|
2013-06-30 05:51:51 +05:30
|
|
|
'labels':labels,
|
2011-12-14 01:32:26 +05:30
|
|
|
'validReadSize': 0,
|
2012-01-20 02:07:53 +05:30
|
|
|
'dataStart': 0,
|
2011-12-14 01:32:26 +05:30
|
|
|
}
|
|
|
|
self.info = []
|
|
|
|
self.labels = []
|
|
|
|
self.data = []
|
|
|
|
|
2012-02-02 22:43:51 +05:30
|
|
|
|
|
|
|
# ------------------------------------------------------------------
|
|
|
|
def _transliterateToFloat(self,x):
|
|
|
|
try:
|
|
|
|
return float(x)
|
|
|
|
except:
|
|
|
|
return 0.0
|
|
|
|
|
|
|
|
# ------------------------------------------------------------------
|
2011-12-14 01:32:26 +05:30
|
|
|
def output_write(self,
|
|
|
|
what):
|
2013-12-12 08:06:05 +05:30
|
|
|
'''
|
|
|
|
aggregate a single row (string) or list of (possibly containing further lists of) rows into output
|
|
|
|
'''
|
|
|
|
if not isinstance(what, (str, unicode)):
|
|
|
|
try:
|
|
|
|
for item in what: self.output_write(item)
|
|
|
|
except:
|
|
|
|
self.__IO__['output'] += [str(what)]
|
2011-12-14 01:32:26 +05:30
|
|
|
else:
|
2013-12-12 08:06:05 +05:30
|
|
|
self.__IO__['output'] += [what]
|
|
|
|
|
|
|
|
return self.__IO__['buffered'] or self.output_flush()
|
2011-12-14 01:32:26 +05:30
|
|
|
|
2012-02-02 22:43:51 +05:30
|
|
|
# ------------------------------------------------------------------
|
2011-12-14 01:32:26 +05:30
|
|
|
def output_flush(self,
|
|
|
|
clear = True):
|
2012-02-17 00:12:04 +05:30
|
|
|
import sys
|
|
|
|
try:
|
|
|
|
self.__IO__['output'] == [] or self.__IO__['out'].write('\n'.join(self.__IO__['output']) + '\n')
|
2013-09-14 16:22:02 +05:30
|
|
|
except(IOError) as e:
|
2012-02-17 00:12:04 +05:30
|
|
|
return False
|
2011-12-14 01:32:26 +05:30
|
|
|
if clear: self.output_clear()
|
2012-02-17 00:12:04 +05:30
|
|
|
return True
|
|
|
|
|
2012-02-02 22:43:51 +05:30
|
|
|
# ------------------------------------------------------------------
|
2011-12-14 01:32:26 +05:30
|
|
|
def output_clear(self):
|
|
|
|
self.__IO__['output'] = []
|
|
|
|
|
2012-02-02 22:43:51 +05:30
|
|
|
# ------------------------------------------------------------------
|
2011-12-14 01:32:26 +05:30
|
|
|
def head_read(self):
|
|
|
|
'''
|
|
|
|
get column labels by either read the first row, or
|
|
|
|
--if keyword "head[*]" is present-- the last line of the header
|
|
|
|
'''
|
2011-12-18 21:19:44 +05:30
|
|
|
import re
|
2011-12-14 01:32:26 +05:30
|
|
|
try:
|
|
|
|
self.__IO__['in'].seek(0)
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
firstline = self.__IO__['in'].readline()
|
|
|
|
m = re.search('(\d+)\s*head', firstline.lower())
|
2013-06-30 05:51:51 +05:30
|
|
|
if self.__IO__['labels']: # table features labels
|
|
|
|
if m: # found header info
|
|
|
|
self.info = [self.__IO__['in'].readline().strip() for i in xrange(1,int(m.group(1)))]
|
|
|
|
self.labels = self.__IO__['in'].readline().split()
|
|
|
|
else: # no header info (but labels)
|
|
|
|
self.labels = firstline.split()
|
|
|
|
|
|
|
|
self.__IO__['validReadSize'] = len(self.labels)
|
|
|
|
|
|
|
|
else: # no labels present in table
|
|
|
|
if m: # found header info
|
|
|
|
self.info = [self.__IO__['in'].readline().strip() for i in xrange(0,int(m.group(1)))] # all header is info
|
|
|
|
# ... without any labels
|
2012-02-16 23:33:14 +05:30
|
|
|
try:
|
2013-06-30 05:51:51 +05:30
|
|
|
self.__IO__['dataStart'] = self.__IO__['in'].tell() # current file position is at start of data
|
2013-09-14 16:22:02 +05:30
|
|
|
except(IOError):
|
2012-02-16 23:33:14 +05:30
|
|
|
pass
|
2011-12-14 01:32:26 +05:30
|
|
|
|
2013-06-30 05:51:51 +05:30
|
|
|
if self.__IO__['validReadSize'] == 0: # in case no valid data length is known
|
|
|
|
self.__IO__['validReadSize'] = len(self.__IO__['in'].readline().split()) # assume constant data width from first line
|
|
|
|
|
2012-02-02 22:43:51 +05:30
|
|
|
# ------------------------------------------------------------------
|
2011-12-14 01:32:26 +05:30
|
|
|
def head_write(self):
|
2012-12-07 03:16:19 +05:30
|
|
|
'''
|
|
|
|
write current header information (info + labels)
|
|
|
|
'''
|
2013-06-30 05:51:51 +05:30
|
|
|
if self.__IO__['labels']:
|
|
|
|
return self.output_write ([
|
|
|
|
'%i\theader'%(len(self.info)+1),
|
|
|
|
self.info,
|
|
|
|
'\t'.join(self.labels),
|
|
|
|
])
|
|
|
|
else:
|
|
|
|
return self.output_write ([
|
|
|
|
'%i\theader'%(len(self.info)),
|
|
|
|
self.info,
|
|
|
|
])
|
2011-12-14 01:32:26 +05:30
|
|
|
|
2012-02-02 22:43:51 +05:30
|
|
|
# ------------------------------------------------------------------
|
2011-12-14 01:32:26 +05:30
|
|
|
def labels_append(self,
|
2012-12-07 03:16:19 +05:30
|
|
|
what):
|
|
|
|
'''
|
|
|
|
add item or list to existing set of labels
|
|
|
|
'''
|
2013-12-12 08:06:05 +05:30
|
|
|
if not isinstance(what, (str, unicode)):
|
|
|
|
try:
|
|
|
|
for item in what: self.labels_append(item)
|
|
|
|
except:
|
|
|
|
self.labels += [str(what)]
|
|
|
|
else:
|
|
|
|
self.labels += [what]
|
2011-12-14 01:32:26 +05:30
|
|
|
|
2013-10-08 19:24:13 +05:30
|
|
|
self.__IO__['labels'] = True # switch on processing (in particular writing) of labels
|
|
|
|
|
2013-06-30 05:51:51 +05:30
|
|
|
# ------------------------------------------------------------------
|
|
|
|
def labels_clear(self):
|
|
|
|
self.labels = []
|
|
|
|
|
2012-12-07 03:16:19 +05:30
|
|
|
# ------------------------------------------------------------------
|
|
|
|
def labels_index(self,
|
|
|
|
labels):
|
|
|
|
'''
|
|
|
|
tell index of column label(s)
|
|
|
|
'''
|
|
|
|
if isinstance(labels,list):
|
|
|
|
idx = []
|
|
|
|
for label in labels:
|
|
|
|
try:
|
2013-12-09 21:15:18 +05:30
|
|
|
idx.append(label+0)
|
|
|
|
except TypeError:
|
|
|
|
try:
|
|
|
|
idx.append(self.labels.index(label))
|
|
|
|
except ValueError:
|
|
|
|
idx.append(-1)
|
2012-12-07 03:16:19 +05:30
|
|
|
else:
|
|
|
|
try:
|
2013-12-11 20:08:09 +05:30
|
|
|
idx = labels+0
|
2013-12-09 21:15:18 +05:30
|
|
|
except TypeError:
|
|
|
|
try:
|
|
|
|
idx = self.labels.index(labels)
|
|
|
|
except(ValueError):
|
|
|
|
idx = -1
|
2012-12-07 03:16:19 +05:30
|
|
|
|
|
|
|
return idx
|
|
|
|
|
2012-02-02 22:43:51 +05:30
|
|
|
# ------------------------------------------------------------------
|
2011-12-14 01:32:26 +05:30
|
|
|
def info_append(self,
|
|
|
|
what):
|
2013-06-30 05:51:51 +05:30
|
|
|
'''
|
|
|
|
add item or list to existing set of infos
|
|
|
|
'''
|
2013-12-12 08:06:05 +05:30
|
|
|
if not isinstance(what, (str, unicode)):
|
|
|
|
try:
|
|
|
|
for item in what: self.info_append(item)
|
|
|
|
except:
|
|
|
|
self.info += [str(what)]
|
|
|
|
else:
|
|
|
|
self.info += [what]
|
2011-12-14 01:32:26 +05:30
|
|
|
|
2012-02-23 19:24:38 +05:30
|
|
|
# ------------------------------------------------------------------
|
|
|
|
def info_clear(self):
|
|
|
|
self.info = []
|
|
|
|
|
2012-02-02 22:43:51 +05:30
|
|
|
# ------------------------------------------------------------------
|
2012-01-20 02:07:53 +05:30
|
|
|
def data_rewind(self):
|
|
|
|
self.__IO__['in'].seek(self.__IO__['dataStart'])
|
|
|
|
|
2012-04-18 17:12:57 +05:30
|
|
|
# ------------------------------------------------------------------
|
|
|
|
def data_skipLines(self,lines):
|
|
|
|
for i in range(lines):
|
|
|
|
self.__IO__['in'].readline()
|
|
|
|
|
2012-02-02 22:43:51 +05:30
|
|
|
# ------------------------------------------------------------------
|
2011-12-14 01:32:26 +05:30
|
|
|
def data_read(self):
|
2013-06-30 05:51:51 +05:30
|
|
|
line = self.__IO__['in'].readline() # get next data row
|
|
|
|
if self.__IO__['labels']:
|
|
|
|
items = line.split()[:self.__IO__['validReadSize']] # use up to valid size (label count)
|
2013-12-14 09:21:22 +05:30
|
|
|
self.data = items if len(items) == self.__IO__['validReadSize'] else [] # take if correct number of entries
|
2013-06-30 05:51:51 +05:30
|
|
|
else:
|
|
|
|
self.data = line.split() # take all
|
|
|
|
|
2012-02-02 22:43:51 +05:30
|
|
|
return self.data != []
|
2012-04-18 17:12:57 +05:30
|
|
|
|
|
|
|
# ------------------------------------------------------------------
|
|
|
|
def data_readLine(self,line):
|
|
|
|
self.__IO__['in'].seek(self.__IO__['dataStart'])
|
|
|
|
for i in range(line-1):
|
|
|
|
self.__IO__['in'].readline()
|
|
|
|
self.data_read()
|
2013-12-14 09:21:22 +05:30
|
|
|
|
|
|
|
# ------------------------------------------------------------------
|
|
|
|
def data_readArray(self,
|
|
|
|
labels = []):
|
|
|
|
import numpy
|
|
|
|
'''
|
|
|
|
read whole data of all (given) labels as numpy array
|
|
|
|
'''
|
|
|
|
|
|
|
|
if labels == []: indices = range(self.__IO__['validReadSize']) # use all columns
|
|
|
|
else: indices = self.labels_index(labels) # use specified columns
|
|
|
|
|
|
|
|
self.data_rewind()
|
|
|
|
self.data = numpy.loadtxt(self.__IO__['in'], usecols=indices)
|
|
|
|
if len(self.data.shape) < 2: # single column
|
|
|
|
self.data = self.data.reshape(self.data.shape[0],1)
|
|
|
|
return self.data.shape
|
2012-04-18 17:12:57 +05:30
|
|
|
|
2012-02-02 22:43:51 +05:30
|
|
|
# ------------------------------------------------------------------
|
2013-12-17 13:46:29 +05:30
|
|
|
def data_write(self, delimiter='\t'):
|
2012-02-14 17:34:37 +05:30
|
|
|
if len(self.data) == 0: return
|
2013-12-12 08:06:05 +05:30
|
|
|
|
2011-12-14 01:32:26 +05:30
|
|
|
if isinstance(self.data[0],list):
|
2013-12-17 13:46:29 +05:30
|
|
|
return self.output_write([delimiter.join(map(str,items)) for items in self.data])
|
2011-12-14 01:32:26 +05:30
|
|
|
else:
|
2013-12-17 13:46:29 +05:30
|
|
|
return self.output_write(delimiter.join(map(str,self.data)))
|
2011-12-14 01:32:26 +05:30
|
|
|
|
2013-06-30 05:51:51 +05:30
|
|
|
# ------------------------------------------------------------------
|
2013-12-17 13:46:29 +05:30
|
|
|
def data_writeArray(self,format='%g',delimiter = '\t'):
|
2013-06-30 05:51:51 +05:30
|
|
|
import numpy
|
|
|
|
'''
|
|
|
|
write whole numpy array data
|
|
|
|
'''
|
2013-12-17 13:46:29 +05:30
|
|
|
return numpy.savetxt(self.__IO__['out'], self.data, fmt=format, delimiter=delimiter)
|
2013-06-30 05:51:51 +05:30
|
|
|
|
2012-02-02 22:43:51 +05:30
|
|
|
# ------------------------------------------------------------------
|
2011-12-14 01:32:26 +05:30
|
|
|
def data_append(self,
|
|
|
|
what):
|
2013-12-12 08:06:05 +05:30
|
|
|
if not isinstance(what, (str, unicode)):
|
|
|
|
try:
|
|
|
|
for item in what: self.data_append(item)
|
|
|
|
except:
|
|
|
|
self.data += [str(what)]
|
2013-12-09 21:15:18 +05:30
|
|
|
else:
|
2013-12-12 08:06:05 +05:30
|
|
|
self.data += [what]
|
2012-02-02 22:43:51 +05:30
|
|
|
|
2012-02-15 20:20:51 +05:30
|
|
|
# ------------------------------------------------------------------
|
|
|
|
def data_set(self,
|
|
|
|
what,where):
|
|
|
|
idx = -1
|
|
|
|
try:
|
|
|
|
idx = self.labels.index(where)
|
|
|
|
if len(self.data) <= idx:
|
|
|
|
self.data_append(['n/a' for i in xrange(idx+1-len(self.data))]) # grow data if too short
|
|
|
|
self.data[idx] = str(what)
|
2013-09-14 16:22:02 +05:30
|
|
|
except(ValueError):
|
2012-02-15 20:20:51 +05:30
|
|
|
pass
|
|
|
|
|
|
|
|
return idx
|
|
|
|
|
|
|
|
# ------------------------------------------------------------------
|
|
|
|
def data_clear(self):
|
|
|
|
self.data = []
|
|
|
|
|
2012-02-02 22:43:51 +05:30
|
|
|
# ------------------------------------------------------------------
|
|
|
|
def data_asFloat(self):
|
|
|
|
return map(self._transliterateToFloat,self.data)
|