changed "labels" property to "tags". added method to inquire about (abstract) labels, i.e. interpreting the tag list to distill underlying label "meaning".

2016-05-16 19:54:00 -04:00 · 2016-05-16 19:54:00 -04:00 · dc6260be58
parent 8540748220
commit dc6260be58
1 changed files with 75 additions and 37 deletions
--- a/lib/damask/asciitable.py
+++ b/lib/damask/asciitable.py
@ -1,6 +1,5 @@
 # -*- coding: UTF-8 no BOM -*-

-
 import os,sys
 import numpy as np

@ -26,7 +25,7 @@ class ASCIItable():
    self.__IO__ = {'output': [],
                   'buffered': buffered,
                   'labeled':  labeled,                                                             # header contains labels
-                   'labels': [],                                                                    # labels according to file info
+                   'tags': [],                                                                      # labels according to file info
                   'readBuffer': [],                                                                # buffer to hold non-advancing reads
                   'dataStart': 0,
                  }
@ -50,7 +49,7 @@ class ASCIItable():
      self.__IO__['out'] = outname

    self.info   = []
-    self.labels = []
+    self.tags  = []
    self.data   = []
    self.line   = ''

@ -160,7 +159,7 @@ class ASCIItable():
      if self.__IO__['labeled']:                                                                    # table features labels

        self.info   = [self.__IO__['in'].readline().strip() for i in xrange(1,int(m.group(1)))]
-        self.labels = shlex.split(self.__IO__['in'].readline())                                     # store labels found in last line
+        self.tags = shlex.split(self.__IO__['in'].readline())                                       # store tags found in last line

      else:

@ -179,11 +178,11 @@ class ASCIItable():
        else: break                                                                                 # last line of comments

      if self.__IO__['labeled']:                                                                    # table features labels
-        self.labels = self.data                                                                     # get labels from last line in "header"...
+        self.tags = self.data                                                                       # get tags from last line in "header"...
        self.data_read()                                                                            # ...and remove from buffer
        
-    if self.__IO__['labeled']:                                                                      # table features labels
-      self.__IO__['labels'] = list(self.labels)                                                     # backup labels (make COPY, not link)
+    if self.__IO__['labeled']:                                                                      # table features tags
+      self.__IO__['tags'] = list(self.tags)                                                         # backup tags (make COPY, not link)

    try:
      self.__IO__['dataStart'] = self.__IO__['in'].tell()                                           # current file position is at start of data
@ -196,7 +195,7 @@ class ASCIItable():
    """write current header information (info + labels)"""
    head = ['{}\theader'.format(len(self.info)+self.__IO__['labeled'])] if header else []
    head.append(self.info)
-    if self.__IO__['labeled']: head.append('\t'.join(map(self._quote,self.labels)))
+    if self.__IO__['labeled']: head.append('\t'.join(map(self._quote,self.tags)))
    
    return self.output_write(head)

@ -260,19 +259,58 @@ class ASCIItable():
      try:
        for item in what: self.labels_append(item)
      except:
-        self.labels += [self._removeCRLF(str(what))]
+        self.tags += [self._removeCRLF(str(what))]
    else:
-      self.labels += [self._removeCRLF(what)]
+      self.tags += [self._removeCRLF(what)]

-    self.__IO__['labeled'] = True                                                                  # switch on processing (in particular writing) of labels
-    if reset: self.__IO__['labels'] = list(self.labels)                                            # subsequent data_read uses current labels as data size
+    self.__IO__['labeled'] = True                                                                  # switch on processing (in particular writing) of tags
+    if reset: self.__IO__['tags'] = list(self.tags)                                                # subsequent data_read uses current tags as data size

 # ------------------------------------------------------------------
  def labels_clear(self):
    """delete existing labels and switch to no labeling"""
-    self.labels = []
+    self.tags = []
    self.__IO__['labeled'] = False

+# ------------------------------------------------------------------
+  def labels(self,
+             tags = None,
+             raw = False,
+             ):
+    """
+    returns abstract labels (e.g. "x" for "1_x","2_x",...)
+    unless raw output is requested.
+    operates on object tags or given list.
+    """
+    
+    from collections import Iterable
+    
+    if tags is None: tags = self.tags
+
+    if isinstance(tags, Iterable) and not raw:                                                    # check whether list of tags is requested
+      id = 0
+      dim = 1
+      labelList = []
+
+      while id < len(tags):
+        if not tags[id].startswith('1_'):
+          labelList.append(tags[id])
+        else:
+          label = tags[id][2:]                                                                    # get label
+          while id < len(tags) and tags[id] == '{}_{}'.format(dim,label):                         # check successors
+            id  += 1                                                                              # next label...
+            dim += 1                                                                              # ...should be one higher dimension
+          LabelList.append(label)                                                                 # reached end --> store
+          id -= 1                                                                                 # rewind one to consider again
+
+        id += 1
+        dim = 1
+
+    else:
+      labelList = self.tags
+
+    return labelList
+
 # ------------------------------------------------------------------
  def label_index(self,
                  labels):
@ -292,10 +330,10 @@ class ASCIItable():
            idx.append(int(label)-1)                                                                # column given as integer number?
          except ValueError:
            try:
-              idx.append(self.labels.index(label))                                                  # locate string in label list
+              idx.append(self.tags.index(label))                                                    # locate string in label list
            except ValueError:
              try:
-                idx.append(self.labels.index('1_'+label))                                           # locate '1_'+string in label list
+                idx.append(self.tags.index('1_'+label))                                             # locate '1_'+string in label list
              except ValueError:
               idx.append(-1)                                                                       # not found...
    else:
@ -303,10 +341,10 @@ class ASCIItable():
        idx = int(labels)-1                                                                         # offset for python array indexing
      except ValueError:
        try:
-          idx = self.labels.index(labels)
+          idx = self.tags.index(labels)
        except ValueError:
          try:
-            idx = self.labels.index('1_'+labels)                                                    # locate '1_'+string in label list
+            idx = self.tags.index('1_'+labels)                                                      # locate '1_'+string in label list
          except ValueError:
            idx = None if labels is None else -1

@ -331,16 +369,16 @@ class ASCIItable():
          try:                                                                                      # column given as number?
            idx = int(label)-1
            myDim = 1                                                                               # if found has at least dimension 1
-            if self.labels[idx].startswith('1_'):                                                   # column has multidim indicator?
-              while idx+myDim < len(self.labels) and self.labels[idx+myDim].startswith("%i_"%(myDim+1)):
+            if self.tags[idx].startswith('1_'):                                                     # column has multidim indicator?
+              while idx+myDim < len(self.tags) and self.tags[idx+myDim].startswith("%i_"%(myDim+1)):
                myDim += 1                                                                          # add while found
          except ValueError:                                                                        # column has string label
-            if label in self.labels:                                                                # can be directly found?
+            if label in self.tags:                                                                  # can be directly found?
              myDim = 1                                                                             # scalar by definition
-            elif '1_'+label in self.labels:                                                         # look for first entry of possible multidim object
-              idx = self.labels.index('1_'+label)                                                   # get starting column
+            elif '1_'+label in self.tags:                                                           # look for first entry of possible multidim object
+              idx = self.tags.index('1_'+label)                                                     # get starting column
              myDim = 1                                                                             # (at least) one-dimensional
-              while idx+myDim < len(self.labels) and self.labels[idx+myDim].startswith("%i_"%(myDim+1)):
+              while idx+myDim < len(self.tags) and self.tags[idx+myDim].startswith("%i_"%(myDim+1)):
                myDim += 1                                                                          # keep adding while going through object

          dim.append(myDim)
@ -350,16 +388,16 @@ class ASCIItable():
      try:                                                                                          # column given as number?
        idx = int(labels)-1
        dim = 1                                                                                     # if found has at least dimension 1
-        if self.labels[idx].startswith('1_'):                                                       # column has multidim indicator?
-          while idx+dim < len(self.labels) and self.labels[idx+dim].startswith("%i_"%(dim+1)):
+        if self.tags[idx].startswith('1_'):                                                         # column has multidim indicator?
+          while idx+dim < len(self.tags) and self.tags[idx+dim].startswith("%i_"%(dim+1)):
            dim += 1                                                                                # add as long as found
      except ValueError:                                                                            # column has string label
-        if labels in self.labels:                                                                   # can be directly found?
+        if labels in self.tags:                                                                     # can be directly found?
          dim = 1                                                                                   # scalar by definition
-        elif '1_'+labels in self.labels:                                                            # look for first entry of possible multidim object
-          idx = self.labels.index('1_'+labels)                                                      # get starting column
+        elif '1_'+labels in self.tags:                                                              # look for first entry of possible multidim object
+          idx = self.tags.index('1_'+labels)                                                        # get starting column
          dim = 1                                                                                   # is (at least) one-dimensional
-          while idx+dim < len(self.labels) and self.labels[idx+dim].startswith("%i_"%(dim+1)):
+          while idx+dim < len(self.tags) and self.tags[idx+dim].startswith("%i_"%(dim+1)):
            dim += 1                                                                                # keep adding while going through object

    return np.array(dim) if isinstance(dim,Iterable) else dim
@ -403,8 +441,8 @@ class ASCIItable():
  def data_rewind(self):
    self.__IO__['in'].seek(self.__IO__['dataStart'])                                                # position file to start of data section
    self.__IO__['readBuffer'] = []                                                                  # delete any non-advancing data reads
-    self.labels = list(self.__IO__['labels'])                                                       # restore label info found in header (as COPY, not link)
-    self.__IO__['labeled'] = len(self.labels) > 0
+    self.tags = list(self.__IO__['tags'])                                                           # restore label info found in header (as COPY, not link)
+    self.__IO__['labeled'] = len(self.tags) > 0

 # ------------------------------------------------------------------
  def data_skipLines(self,
@ -431,8 +469,8 @@ class ASCIItable():
    self.line = self.line.rstrip('\n')

    if self.__IO__['labeled'] and respectLabels:                                                    # if table has labels
-      items = shlex.split(self.line)[:len(self.__IO__['labels'])]                                   # use up to label count (from original file info)
-      self.data = items if len(items) == len(self.__IO__['labels']) else []                         # take entries if label count matches
+      items = shlex.split(self.line)[:len(self.__IO__['tags'])]                                     # use up to label count (from original file info)
+      self.data = items if len(items) == len(self.__IO__['tags']) else []                           # take entries if label count matches
    else:
      self.data = shlex.split(self.line)                                                            # otherwise take all

@ -469,7 +507,7 @@ class ASCIItable():
                           1))                                                                      
      use = np.array(columns)

-      self.labels = list(np.array(self.labels)[use])                                                # update labels with valid subset
+      self.tags = list(np.array(self.tags)[use])                                                    # update labels with valid subset

    self.data = np.loadtxt(self.__IO__['in'],usecols=use,ndmin=2)