changed "labels" property to "tags". added method to inquire about (abstract) labels, i.e. interpreting the tag list to distill underlying label "meaning".
This commit is contained in:
parent
8540748220
commit
dc6260be58
|
@ -1,6 +1,5 @@
|
|||
# -*- coding: UTF-8 no BOM -*-
|
||||
|
||||
|
||||
import os,sys
|
||||
import numpy as np
|
||||
|
||||
|
@ -26,7 +25,7 @@ class ASCIItable():
|
|||
self.__IO__ = {'output': [],
|
||||
'buffered': buffered,
|
||||
'labeled': labeled, # header contains labels
|
||||
'labels': [], # labels according to file info
|
||||
'tags': [], # labels according to file info
|
||||
'readBuffer': [], # buffer to hold non-advancing reads
|
||||
'dataStart': 0,
|
||||
}
|
||||
|
@ -50,7 +49,7 @@ class ASCIItable():
|
|||
self.__IO__['out'] = outname
|
||||
|
||||
self.info = []
|
||||
self.labels = []
|
||||
self.tags = []
|
||||
self.data = []
|
||||
self.line = ''
|
||||
|
||||
|
@ -160,7 +159,7 @@ class ASCIItable():
|
|||
if self.__IO__['labeled']: # table features labels
|
||||
|
||||
self.info = [self.__IO__['in'].readline().strip() for i in xrange(1,int(m.group(1)))]
|
||||
self.labels = shlex.split(self.__IO__['in'].readline()) # store labels found in last line
|
||||
self.tags = shlex.split(self.__IO__['in'].readline()) # store tags found in last line
|
||||
|
||||
else:
|
||||
|
||||
|
@ -179,11 +178,11 @@ class ASCIItable():
|
|||
else: break # last line of comments
|
||||
|
||||
if self.__IO__['labeled']: # table features labels
|
||||
self.labels = self.data # get labels from last line in "header"...
|
||||
self.tags = self.data # get tags from last line in "header"...
|
||||
self.data_read() # ...and remove from buffer
|
||||
|
||||
if self.__IO__['labeled']: # table features labels
|
||||
self.__IO__['labels'] = list(self.labels) # backup labels (make COPY, not link)
|
||||
if self.__IO__['labeled']: # table features tags
|
||||
self.__IO__['tags'] = list(self.tags) # backup tags (make COPY, not link)
|
||||
|
||||
try:
|
||||
self.__IO__['dataStart'] = self.__IO__['in'].tell() # current file position is at start of data
|
||||
|
@ -196,7 +195,7 @@ class ASCIItable():
|
|||
"""write current header information (info + labels)"""
|
||||
head = ['{}\theader'.format(len(self.info)+self.__IO__['labeled'])] if header else []
|
||||
head.append(self.info)
|
||||
if self.__IO__['labeled']: head.append('\t'.join(map(self._quote,self.labels)))
|
||||
if self.__IO__['labeled']: head.append('\t'.join(map(self._quote,self.tags)))
|
||||
|
||||
return self.output_write(head)
|
||||
|
||||
|
@ -260,19 +259,58 @@ class ASCIItable():
|
|||
try:
|
||||
for item in what: self.labels_append(item)
|
||||
except:
|
||||
self.labels += [self._removeCRLF(str(what))]
|
||||
self.tags += [self._removeCRLF(str(what))]
|
||||
else:
|
||||
self.labels += [self._removeCRLF(what)]
|
||||
self.tags += [self._removeCRLF(what)]
|
||||
|
||||
self.__IO__['labeled'] = True # switch on processing (in particular writing) of labels
|
||||
if reset: self.__IO__['labels'] = list(self.labels) # subsequent data_read uses current labels as data size
|
||||
self.__IO__['labeled'] = True # switch on processing (in particular writing) of tags
|
||||
if reset: self.__IO__['tags'] = list(self.tags) # subsequent data_read uses current tags as data size
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
def labels_clear(self):
|
||||
"""delete existing labels and switch to no labeling"""
|
||||
self.labels = []
|
||||
self.tags = []
|
||||
self.__IO__['labeled'] = False
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
def labels(self,
|
||||
tags = None,
|
||||
raw = False,
|
||||
):
|
||||
"""
|
||||
returns abstract labels (e.g. "x" for "1_x","2_x",...)
|
||||
unless raw output is requested.
|
||||
operates on object tags or given list.
|
||||
"""
|
||||
|
||||
from collections import Iterable
|
||||
|
||||
if tags is None: tags = self.tags
|
||||
|
||||
if isinstance(tags, Iterable) and not raw: # check whether list of tags is requested
|
||||
id = 0
|
||||
dim = 1
|
||||
labelList = []
|
||||
|
||||
while id < len(tags):
|
||||
if not tags[id].startswith('1_'):
|
||||
labelList.append(tags[id])
|
||||
else:
|
||||
label = tags[id][2:] # get label
|
||||
while id < len(tags) and tags[id] == '{}_{}'.format(dim,label): # check successors
|
||||
id += 1 # next label...
|
||||
dim += 1 # ...should be one higher dimension
|
||||
LabelList.append(label) # reached end --> store
|
||||
id -= 1 # rewind one to consider again
|
||||
|
||||
id += 1
|
||||
dim = 1
|
||||
|
||||
else:
|
||||
labelList = self.tags
|
||||
|
||||
return labelList
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
def label_index(self,
|
||||
labels):
|
||||
|
@ -292,10 +330,10 @@ class ASCIItable():
|
|||
idx.append(int(label)-1) # column given as integer number?
|
||||
except ValueError:
|
||||
try:
|
||||
idx.append(self.labels.index(label)) # locate string in label list
|
||||
idx.append(self.tags.index(label)) # locate string in label list
|
||||
except ValueError:
|
||||
try:
|
||||
idx.append(self.labels.index('1_'+label)) # locate '1_'+string in label list
|
||||
idx.append(self.tags.index('1_'+label)) # locate '1_'+string in label list
|
||||
except ValueError:
|
||||
idx.append(-1) # not found...
|
||||
else:
|
||||
|
@ -303,10 +341,10 @@ class ASCIItable():
|
|||
idx = int(labels)-1 # offset for python array indexing
|
||||
except ValueError:
|
||||
try:
|
||||
idx = self.labels.index(labels)
|
||||
idx = self.tags.index(labels)
|
||||
except ValueError:
|
||||
try:
|
||||
idx = self.labels.index('1_'+labels) # locate '1_'+string in label list
|
||||
idx = self.tags.index('1_'+labels) # locate '1_'+string in label list
|
||||
except ValueError:
|
||||
idx = None if labels is None else -1
|
||||
|
||||
|
@ -331,16 +369,16 @@ class ASCIItable():
|
|||
try: # column given as number?
|
||||
idx = int(label)-1
|
||||
myDim = 1 # if found has at least dimension 1
|
||||
if self.labels[idx].startswith('1_'): # column has multidim indicator?
|
||||
while idx+myDim < len(self.labels) and self.labels[idx+myDim].startswith("%i_"%(myDim+1)):
|
||||
if self.tags[idx].startswith('1_'): # column has multidim indicator?
|
||||
while idx+myDim < len(self.tags) and self.tags[idx+myDim].startswith("%i_"%(myDim+1)):
|
||||
myDim += 1 # add while found
|
||||
except ValueError: # column has string label
|
||||
if label in self.labels: # can be directly found?
|
||||
if label in self.tags: # can be directly found?
|
||||
myDim = 1 # scalar by definition
|
||||
elif '1_'+label in self.labels: # look for first entry of possible multidim object
|
||||
idx = self.labels.index('1_'+label) # get starting column
|
||||
elif '1_'+label in self.tags: # look for first entry of possible multidim object
|
||||
idx = self.tags.index('1_'+label) # get starting column
|
||||
myDim = 1 # (at least) one-dimensional
|
||||
while idx+myDim < len(self.labels) and self.labels[idx+myDim].startswith("%i_"%(myDim+1)):
|
||||
while idx+myDim < len(self.tags) and self.tags[idx+myDim].startswith("%i_"%(myDim+1)):
|
||||
myDim += 1 # keep adding while going through object
|
||||
|
||||
dim.append(myDim)
|
||||
|
@ -350,16 +388,16 @@ class ASCIItable():
|
|||
try: # column given as number?
|
||||
idx = int(labels)-1
|
||||
dim = 1 # if found has at least dimension 1
|
||||
if self.labels[idx].startswith('1_'): # column has multidim indicator?
|
||||
while idx+dim < len(self.labels) and self.labels[idx+dim].startswith("%i_"%(dim+1)):
|
||||
if self.tags[idx].startswith('1_'): # column has multidim indicator?
|
||||
while idx+dim < len(self.tags) and self.tags[idx+dim].startswith("%i_"%(dim+1)):
|
||||
dim += 1 # add as long as found
|
||||
except ValueError: # column has string label
|
||||
if labels in self.labels: # can be directly found?
|
||||
if labels in self.tags: # can be directly found?
|
||||
dim = 1 # scalar by definition
|
||||
elif '1_'+labels in self.labels: # look for first entry of possible multidim object
|
||||
idx = self.labels.index('1_'+labels) # get starting column
|
||||
elif '1_'+labels in self.tags: # look for first entry of possible multidim object
|
||||
idx = self.tags.index('1_'+labels) # get starting column
|
||||
dim = 1 # is (at least) one-dimensional
|
||||
while idx+dim < len(self.labels) and self.labels[idx+dim].startswith("%i_"%(dim+1)):
|
||||
while idx+dim < len(self.tags) and self.tags[idx+dim].startswith("%i_"%(dim+1)):
|
||||
dim += 1 # keep adding while going through object
|
||||
|
||||
return np.array(dim) if isinstance(dim,Iterable) else dim
|
||||
|
@ -403,8 +441,8 @@ class ASCIItable():
|
|||
def data_rewind(self):
|
||||
self.__IO__['in'].seek(self.__IO__['dataStart']) # position file to start of data section
|
||||
self.__IO__['readBuffer'] = [] # delete any non-advancing data reads
|
||||
self.labels = list(self.__IO__['labels']) # restore label info found in header (as COPY, not link)
|
||||
self.__IO__['labeled'] = len(self.labels) > 0
|
||||
self.tags = list(self.__IO__['tags']) # restore label info found in header (as COPY, not link)
|
||||
self.__IO__['labeled'] = len(self.tags) > 0
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
def data_skipLines(self,
|
||||
|
@ -431,8 +469,8 @@ class ASCIItable():
|
|||
self.line = self.line.rstrip('\n')
|
||||
|
||||
if self.__IO__['labeled'] and respectLabels: # if table has labels
|
||||
items = shlex.split(self.line)[:len(self.__IO__['labels'])] # use up to label count (from original file info)
|
||||
self.data = items if len(items) == len(self.__IO__['labels']) else [] # take entries if label count matches
|
||||
items = shlex.split(self.line)[:len(self.__IO__['tags'])] # use up to label count (from original file info)
|
||||
self.data = items if len(items) == len(self.__IO__['tags']) else [] # take entries if label count matches
|
||||
else:
|
||||
self.data = shlex.split(self.line) # otherwise take all
|
||||
|
||||
|
@ -469,7 +507,7 @@ class ASCIItable():
|
|||
1))
|
||||
use = np.array(columns)
|
||||
|
||||
self.labels = list(np.array(self.labels)[use]) # update labels with valid subset
|
||||
self.tags = list(np.array(self.tags)[use]) # update labels with valid subset
|
||||
|
||||
self.data = np.loadtxt(self.__IO__['in'],usecols=use,ndmin=2)
|
||||
|
||||
|
|
Loading…
Reference in New Issue