# -*- coding: UTF-8 no BOM -*-

# $Id$

import os,sys
import numpy as np

class ASCIItable():
  '''
     There should be a doc string here  :)
  '''

  __slots__ = ['__IO__',
               'info',
               'labels',
               'data',
              ]

# ------------------------------------------------------------------
  def __init__(self,
               fileIn = sys.stdin,
               fileOut = sys.stdout,
               buffered = False,                            # flush writes
               labels = True):                              # assume table has labels
    self.__IO__ = {'in': fileIn,
                   'out':fileOut,
                   'output':[],
                   'buffered':buffered,
                   'labels':labels,
                   'validReadSize': 0,
                   'readBuffer': [],                        # buffer to hold non-advancing reads
                   'dataStart': 0,
                  }
    self.info = []
    self.labels = []
    self.data = []


# ------------------------------------------------------------------
  def _transliterateToFloat(self,x):
    try:
      return float(x)
    except:
      return 0.0

# ------------------------------------------------------------------
  def close(self,dismiss = False):
    self.input_close()
    self.output_close(dismiss)

# ------------------------------------------------------------------
  def input_close(self):
    self.__IO__['in'].close()

# ------------------------------------------------------------------
  def output_write(self,
                   what):
    '''
       aggregate a single row (string) or list of (possibly containing further lists of) rows into output
    '''
    if not isinstance(what, (str, unicode)):
      try:
        for item in what: self.output_write(item)
      except:
        self.__IO__['output'] += [str(what)]
    else:
      self.__IO__['output'] += [what]

    return self.__IO__['buffered'] or self.output_flush()

# ------------------------------------------------------------------
  def output_flush(self,
                   clear = True):
    try:
      self.__IO__['output'] == [] or self.__IO__['out'].write('\n'.join(self.__IO__['output']) + '\n')
    except(IOError) as e:
      return False
    if clear: self.output_clear()
    return True
    
# ------------------------------------------------------------------
  def output_clear(self):
    self.__IO__['output'] = []

# ------------------------------------------------------------------
  def output_close(self, dismiss = False):
    self.__IO__['out'].close()
    if dismiss: os.remove(self.__IO__['out'].name)

# ------------------------------------------------------------------
  def head_read(self):
    '''
       get column labels by either read the first row, or 
       --if keyword "head[*]" is present-- the last line of the header
    '''
    import re
    try:
      self.__IO__['in'].seek(0)
    except:
      pass
    firstline = self.__IO__['in'].readline()
    m = re.search('(\d+)\s+head', firstline.lower())
    if self.__IO__['labels']:                                                                       # table features labels
      if m:                                                                                         # found header info
        self.info      = [self.__IO__['in'].readline().strip() for i in xrange(1,int(m.group(1)))]
        self.labels    =  self.__IO__['in'].readline().split()
      else:                                                                                         # no header info (but labels)
        self.labels    = firstline.split()
  
      self.__IO__['validReadSize'] = len(self.labels)

    else:                                                                                           # no labels present in table
      if m:                                                                                         # found header info
        self.info      = [self.__IO__['in'].readline().strip() for i in xrange(0,int(m.group(1)))]  # all header is info
                                                                                                    # ... without any labels
    try:
      self.__IO__['dataStart'] = self.__IO__['in'].tell()                                           # current file position is at start of data
    except(IOError):
      pass

    if self.__IO__['validReadSize'] == 0:                                                           # in case no valid data length is known
      self.data_read(advance = False)
      self.__IO__['validReadSize'] = len(self.data)                                                 # assume constant data width from first line

# ------------------------------------------------------------------
  def head_write(self):
    '''
       write current header information (info + labels)
    '''
    if self.__IO__['labels']:
      return self.output_write ([
                                 '%i\theader'%(len(self.info)+1),
                                 self.info,
                                 '\t'.join(self.labels),
                                ])
    else:
      return self.output_write ([
                                 '%i\theader'%(len(self.info)),
                                 self.info,
                                ])

# ------------------------------------------------------------------
  def labels_append(self,
                    what):
    '''
       add item or list to existing set of labels (and switch on labeling)
    '''
    if not isinstance(what, (str, unicode)):
      try:
        for item in what: self.labels_append(item)
      except:
        self.labels += [str(what)]
    else:
      self.labels += [what]

    self.__IO__['labels'] = True                                        # switch on processing (in particular writing) of labels

# ------------------------------------------------------------------
  def labels_clear(self):
    '''
       delete existing labels and switch to no labeling
    '''
    self.labels = []
    self.__IO__['labels'] = False

# ------------------------------------------------------------------
  def labels_index(self,
                   labels):
    '''
       tell index of column label(s).
       return numpy array if asked for list of labels.
       transparently deals with label positions implicitly given as numbers or their headings given as strings.
    '''
    if isinstance(labels,list):                                         # check whether list of labels is requested
      idx = []
      for label in labels:
        if label != None:
          try:
            idx.append(int(label))                                        # column given as integer number?
          except ValueError:
            try:
              idx.append(self.labels.index(label))                        # locate string in label list
            except ValueError:
             idx.append(-1)                                               # not found...
    else:
      try:
        idx = int(labels)
      except ValueError:
        try:
          idx = self.labels.index(labels)
        except(ValueError):
          idx = None if labels == None else -1

    return np.array(idx) if isinstance(idx,list) else idx

# ------------------------------------------------------------------
  def info_append(self,
                  what):
    '''
       add item or list to existing set of infos
    '''
    if not isinstance(what, (str, unicode)):
      try:
        for item in what: self.info_append(item)
      except:
        self.info += [str(what)]
    else:
      self.info += [what]

# ------------------------------------------------------------------
  def info_clear(self):
    '''
       delete any info block
    '''
    self.info = []

# ------------------------------------------------------------------
  def data_rewind(self):
    self.__IO__['in'].seek(self.__IO__['dataStart'])                                  # position file to start of data section
    self.__IO__['readBuffer'] = []                                                    # delete any non-advancing data reads
    
# ------------------------------------------------------------------
  def data_skipLines(self,count):
    '''
       wind forward by count number of lines
    '''
    for i in xrange(count):
      alive = self.data_read()

    return alive

# ------------------------------------------------------------------
  def data_read(self,advance = True):
    '''
       read next line (possibly buffered) and parse it into data array
    '''
    if len(self.__IO__['readBuffer']) > 0:
      line = self.__IO__['readBuffer'].pop(0)                                         # take buffered content
    else:
      line = self.__IO__['in'].readline()                                             # get next data row from file
    
    if not advance:
      self.__IO__['readBuffer'].append(line)                                          # keep line just read in buffer

    if self.__IO__['labels']:
      items = line.split()[:self.__IO__['validReadSize']]                             # use up to valid size (label count)
      self.data = items if len(items) == self.__IO__['validReadSize'] else []         # take if correct number of entries
    else:
      self.data = line.split()                                                        # take all
      
    return self.data != []
    
# ------------------------------------------------------------------
  def data_readLine(self,line):
    '''
       seek beginning of data and wind forward to selected line
    '''
    self.__IO__['in'].seek(self.__IO__['dataStart'])
    for i in xrange(line-1):
      self.__IO__['in'].readline()
    self.data_read()

# ------------------------------------------------------------------
  def data_readArray(self,
                     labels = []):
    '''
       read whole data of all (given) labels as numpy array
    '''

    if not isinstance(labels,list):
      labels = [labels]
    if labels == [None] or labels == []:
      use = np.arange(self.__IO__['validReadSize'])                                             # use all columns (and keep labels intact)
      labels_missing = []
    else:
      indices = self.labels_index(labels)                                                           # check requested labels
      present  = np.where(indices >= 0)[0]                                                          # positions in request list of labels that are present ...
      missing  = np.where(indices <  0)[0]                                                          # ... and missing in table
      labels_missing    =      np.array(     labels)        [missing]                               # corresponding labels ...
      self.labels       = list(np.array(self.labels)[indices[present]])                             # ... for missing and present columns
      self.__IO__['validReadSize'] = len(present)                                                   # update data width
      use = indices[present]
    
    try:
      self.data_rewind()                                                                            # try to wind back to start of data
    except:
      pass                                                                                          # assume/hope we are at data start already...
    self.data = np.loadtxt(self.__IO__['in'], usecols=use,ndmin=2)
    return labels_missing
    
# ------------------------------------------------------------------
  def data_write(self,delimiter = '\t'):
    '''
       write current data array and report alive output back
    '''
    if len(self.data) == 0: return True
    
    if isinstance(self.data[0],list):
      return self.output_write([delimiter.join(map(str,items)) for items in self.data])
    else:
      return self.output_write(delimiter.join(map(str,self.data)))

# ------------------------------------------------------------------
  def data_writeArray(self,format = '%g',delimiter = '\t'):
    '''
       write whole numpy array data
    '''
    return np.savetxt(self.__IO__['out'],self.data,fmt = format,delimiter = delimiter)

# ------------------------------------------------------------------
  def data_append(self,
                  what):
    if not isinstance(what, (str, unicode)):
      try:
        for item in what: self.data_append(item)
      except:
        self.data += [str(what)]
    else:
      self.data += [what]

# ------------------------------------------------------------------
  def data_set(self,
               what,where):
    idx = -1
    try:
      idx = self.labels.index(where)
      if len(self.data) <= idx:
        self.data_append(['n/a' for i in xrange(idx+1-len(self.data))])         # grow data if too short
      self.data[idx] = str(what)
    except(ValueError):
      pass

    return idx
    
# ------------------------------------------------------------------
  def data_clear(self):
    self.data = []

# ------------------------------------------------------------------
  def data_asFloat(self):
    return map(self._transliterateToFloat,self.data)