# -*- coding: UTF-8 no BOM -*- # $Id$ import sys import numpy as np class ASCIItable(): ''' There should be a doc string here :) ''' __slots__ = ['__IO__', 'info', 'labels', 'data', ] # ------------------------------------------------------------------ def __init__(self, fileIn = sys.stdin, fileOut = sys.stdout, buffered = False, # flush writes labels = True): # assume table has labels self.__IO__ = {'in': fileIn, 'out':fileOut, 'output':[], 'buffered':buffered, 'labels':labels, 'validReadSize': 0, 'readBuffer': [], # buffer to hold non-advancing reads 'dataStart': 0, } self.info = [] self.labels = [] self.data = [] # ------------------------------------------------------------------ def _transliterateToFloat(self,x): try: return float(x) except: return 0.0 # ------------------------------------------------------------------ def input_close(self): return self.__IO__['in'].close() # ------------------------------------------------------------------ def output_write(self, what): ''' aggregate a single row (string) or list of (possibly containing further lists of) rows into output ''' if not isinstance(what, (str, unicode)): try: for item in what: self.output_write(item) except: self.__IO__['output'] += [str(what)] else: self.__IO__['output'] += [what] return self.__IO__['buffered'] or self.output_flush() # ------------------------------------------------------------------ def output_flush(self, clear = True): try: self.__IO__['output'] == [] or self.__IO__['out'].write('\n'.join(self.__IO__['output']) + '\n') except(IOError) as e: return False if clear: self.output_clear() return True # ------------------------------------------------------------------ def output_clear(self): self.__IO__['output'] = [] # ------------------------------------------------------------------ def output_close(self): return self.__IO__['out'].close() # ------------------------------------------------------------------ def head_read(self): ''' get column labels by either read the first row, or --if keyword "head[*]" is present-- the last line of the header ''' import re try: self.__IO__['in'].seek(0) except: pass firstline = self.__IO__['in'].readline() m = re.search('(\d+)\s*head', firstline.lower()) if self.__IO__['labels']: # table features labels if m: # found header info self.info = [self.__IO__['in'].readline().strip() for i in xrange(1,int(m.group(1)))] self.labels = self.__IO__['in'].readline().split() else: # no header info (but labels) self.labels = firstline.split() self.__IO__['validReadSize'] = len(self.labels) else: # no labels present in table if m: # found header info self.info = [self.__IO__['in'].readline().strip() for i in xrange(0,int(m.group(1)))] # all header is info # ... without any labels try: self.__IO__['dataStart'] = self.__IO__['in'].tell() # current file position is at start of data except(IOError): pass if self.__IO__['validReadSize'] == 0: # in case no valid data length is known self.data_read(advance = False) self.__IO__['validReadSize'] = len(self.data) # assume constant data width from first line # ------------------------------------------------------------------ def head_write(self): ''' write current header information (info + labels) ''' if self.__IO__['labels']: return self.output_write ([ '%i\theader'%(len(self.info)+1), self.info, '\t'.join(self.labels), ]) else: return self.output_write ([ '%i\theader'%(len(self.info)), self.info, ]) # ------------------------------------------------------------------ def labels_append(self, what): ''' add item or list to existing set of labels (and switch on labeling) ''' if not isinstance(what, (str, unicode)): try: for item in what: self.labels_append(item) except: self.labels += [str(what)] else: self.labels += [what] self.__IO__['labels'] = True # switch on processing (in particular writing) of labels # ------------------------------------------------------------------ def labels_clear(self): ''' delete existing labels and switch to no labeling ''' self.labels = [] self.__IO__['labels'] = False # ------------------------------------------------------------------ def labels_index(self, labels): ''' tell index of column label(s) ''' if isinstance(labels,list): idx = [] for label in labels: try: idx.append(label+0) except TypeError: try: idx.append(self.labels.index(label)) except ValueError: idx.append(-1) else: try: idx = labels+0 except TypeError: try: idx = self.labels.index(labels) except(ValueError): idx = -1 return idx # ------------------------------------------------------------------ def info_append(self, what): ''' add item or list to existing set of infos ''' if not isinstance(what, (str, unicode)): try: for item in what: self.info_append(item) except: self.info += [str(what)] else: self.info += [what] # ------------------------------------------------------------------ def info_clear(self): ''' delete any info block ''' self.info = [] # ------------------------------------------------------------------ def data_rewind(self): self.__IO__['in'].seek(self.__IO__['dataStart']) # position file to start of data section self.__IO__['readBuffer'] = [] # delete any non-advancing data reads # ------------------------------------------------------------------ def data_skipLines(self,count): ''' wind forward by count number of lines ''' for i in xrange(count): alive = self.data_read() return alive # ------------------------------------------------------------------ def data_read(self,advance = True): ''' read next line (possibly buffered) and parse it into data array ''' if len(self.__IO__['readBuffer']) > 0: line = self.__IO__['readBuffer'].pop(0) # take buffered content else: line = self.__IO__['in'].readline() # get next data row from file if not advance: self.__IO__['readBuffer'].append(line) # keep line just read in buffer if self.__IO__['labels']: items = line.split()[:self.__IO__['validReadSize']] # use up to valid size (label count) self.data = items if len(items) == self.__IO__['validReadSize'] else [] # take if correct number of entries else: self.data = line.split() # take all return self.data != [] # ------------------------------------------------------------------ def data_readLine(self,line): ''' seek beginning of data and wind forward to selected line ''' self.__IO__['in'].seek(self.__IO__['dataStart']) for i in xrange(line-1): self.__IO__['in'].readline() self.data_read() # ------------------------------------------------------------------ def data_readArray(self, labels = []): ''' read whole data of all (given) labels as numpy array ''' if labels != []: # read only some labels indices = self.labels_index(labels) # get indices to read tempDict = dict(zip(indices, labels)) # column <> label connections self.labels = [tempDict[label] for label in sorted(tempDict)] # sort labels to reflect order from np.readtxt self.__IO__['validReadSize'] = len(labels) else: indices = range(self.__IO__['validReadSize']) # use all columns try: self.data_rewind() # try to wind back to start of data except: pass # assume/hope we are at data start already... self.data = np.loadtxt(self.__IO__['in'], usecols=indices,ndmin=2) return self.data.shape # ------------------------------------------------------------------ def data_write(self,delimiter = '\t'): ''' write current data array and report alive output back ''' if len(self.data) == 0: return True if isinstance(self.data[0],list): return self.output_write([delimiter.join(map(str,items)) for items in self.data]) else: return self.output_write(delimiter.join(map(str,self.data))) # ------------------------------------------------------------------ def data_writeArray(self,format = '%g',delimiter = '\t'): ''' write whole numpy array data ''' return np.savetxt(self.__IO__['out'],self.data,fmt = format,delimiter = delimiter) # ------------------------------------------------------------------ def data_append(self, what): if not isinstance(what, (str, unicode)): try: for item in what: self.data_append(item) except: self.data += [str(what)] else: self.data += [what] # ------------------------------------------------------------------ def data_set(self, what,where): idx = -1 try: idx = self.labels.index(where) if len(self.data) <= idx: self.data_append(['n/a' for i in xrange(idx+1-len(self.data))]) # grow data if too short self.data[idx] = str(what) except(ValueError): pass return idx # ------------------------------------------------------------------ def data_clear(self): self.data = [] # ------------------------------------------------------------------ def data_asFloat(self): return map(self._transliterateToFloat,self.data)