DAMASK_EICMD/python/damask/table.py

352 lines
10 KiB
Python
Raw Normal View History

2019-10-31 15:15:34 +05:30
import re
import pandas as pd
import numpy as np
2020-01-13 00:05:45 +05:30
from . import version
2020-03-13 05:00:49 +05:30
class Table:
"""Store spreadsheet-like data."""
2020-03-13 05:00:49 +05:30
2019-12-05 09:30:26 +05:30
def __init__(self,data,shapes,comments=None):
"""
2019-12-05 09:30:26 +05:30
New spreadsheet.
2020-03-13 05:00:49 +05:30
Parameters
----------
data : numpy.ndarray or pandas.DataFrame
Data. Column labels from a pandas.DataFrame will be replaced.
2019-12-05 09:30:26 +05:30
shapes : dict with str:tuple pairs
2020-03-13 05:00:49 +05:30
Shapes of the columns. Example 'F':(3,3) for a deformation gradient.
comments : iterable of str, optional
2019-12-05 09:30:26 +05:30
Additional, human-readable information.
2020-03-13 05:00:49 +05:30
"""
2020-01-12 12:23:41 +05:30
self.comments = [] if comments is None else [c for c in comments]
self.data = pd.DataFrame(data=data)
2019-12-05 19:35:50 +05:30
self.shapes = shapes
2020-03-18 18:19:53 +05:30
self._label_condensed()
2020-03-18 18:19:53 +05:30
def _label_flat(self):
"""Label data individually, e.g. v v v ==> 1_v 2_v 3_v."""
labels = []
for label,shape in self.shapes.items():
size = int(np.prod(shape))
labels += ['{}{}'.format('' if size == 1 else '{}_'.format(i+1),label) for i in range(size)]
self.data.columns = labels
2020-03-13 05:00:49 +05:30
2020-03-18 18:19:53 +05:30
def _label_condensed(self):
"""Label data condensed, e.g. 1_v 2_v 3_v ==> v v v."""
2019-12-05 19:35:50 +05:30
labels = []
for label,shape in self.shapes.items():
labels += [label] * int(np.prod(shape))
self.data.columns = labels
2020-03-18 18:19:53 +05:30
def _add_comment(self,label,shape,info):
if info:
c = '{}{}: {}'.format(label,' '+str(shape) if np.prod(shape,dtype=int) > 1 else '',info)
self.comments.append(c)
2020-03-13 05:00:49 +05:30
@staticmethod
def from_ASCII(fname):
"""
Create table from ASCII file.
2020-03-18 18:19:53 +05:30
The first line can indicate the number of subsequent header lines as 'n header',
alternatively first line is the header and comments are marked by '#' ('new style').
2019-12-05 09:30:26 +05:30
Vector data column labels are indicated by '1_v, 2_v, ..., n_v'.
Tensor data column labels are indicated by '3x3:1_T, 3x3:2_T, ..., 3x3:9_T'.
Parameters
----------
fname : file, str, or pathlib.Path
Filename or file for reading.
"""
try:
f = open(fname)
except TypeError:
f = fname
f.seek(0)
try:
2020-01-26 14:47:27 +05:30
N_comment_lines,keyword = f.readline().strip().split(maxsplit=1)
if keyword != 'header':
raise ValueError
2020-01-20 17:36:32 +05:30
else:
comments = [f.readline().strip() for i in range(1,int(N_comment_lines))]
2020-01-20 17:36:32 +05:30
labels = f.readline().split()
except ValueError:
f.seek(0)
comments = []
line = f.readline().strip()
while line.startswith('#'):
2020-01-20 17:36:32 +05:30
comments.append(line.lstrip('#').strip())
line = f.readline().strip()
labels = line.split()
2020-03-13 05:00:49 +05:30
2019-12-05 09:30:26 +05:30
shapes = {}
for label in labels:
tensor_column = re.search(r'[0-9,x]*?:[0-9]*?_',label)
if tensor_column:
my_shape = tensor_column.group().split(':',1)[0].split('x')
2019-12-05 09:30:26 +05:30
shapes[label.split('_',1)[1]] = tuple([int(d) for d in my_shape])
2019-10-31 15:15:34 +05:30
else:
vector_column = re.match(r'[0-9]*?_',label)
if vector_column:
2019-12-05 09:30:26 +05:30
shapes[label.split('_',1)[1]] = (int(label.split('_',1)[0]),)
2019-10-31 15:15:34 +05:30
else:
2019-12-05 19:35:50 +05:30
shapes[label] = (1,)
2020-03-13 05:00:49 +05:30
data = pd.read_csv(f,names=list(range(len(labels))),sep=r'\s+')
2019-12-05 19:35:50 +05:30
2019-12-05 10:15:27 +05:30
return Table(data,shapes,comments)
@staticmethod
def from_ang(fname):
"""
Create table from TSL ang file.
A valid TSL ang file needs to contains the following columns:
* Euler angles (Bunge notation) in radians, 3 floats, label 'eu'.
* Spatial position in meters, 2 floats, label 'pos'.
* Image quality, 1 float, label 'IQ'.
* Confidence index, 1 float, label 'CI'.
* Phase ID, 1 int, label 'ID'.
* SEM signal, 1 float, label 'intensity'.
* Fit, 1 float, label 'fit'.
Parameters
----------
fname : file, str, or pathlib.Path
Filename or file for reading.
"""
shapes = {'eu':(3,), 'pos':(2,),
'IQ':(1,), 'CI':(1,), 'ID':(1,), 'intensity':(1,), 'fit':(1,)}
try:
f = open(fname)
except TypeError:
f = fname
f.seek(0)
2020-03-13 05:00:49 +05:30
content = f.readlines()
comments = ['table.py:from_ang v {}'.format(version)]
for line in content:
if line.startswith('#'):
comments.append(line.strip())
else:
break
2020-03-13 05:00:49 +05:30
data = np.loadtxt(content)
2020-01-08 20:04:21 +05:30
for c in range(data.shape[1]-10):
2020-01-12 12:23:41 +05:30
shapes['n/a_{}'.format(c+1)] = (1,)
return Table(data,shapes,comments)
2020-01-08 20:04:21 +05:30
@property
2019-12-05 10:40:27 +05:30
def labels(self):
return list(self.shapes.keys())
def get(self,label):
"""
2019-12-05 10:40:27 +05:30
Get column data.
Parameters
----------
label : str
2019-12-05 10:40:27 +05:30
Column label.
"""
if re.match(r'[0-9]*?_',label):
idx,key = label.split('_',1)
2020-03-17 16:52:48 +05:30
data = self.data[key].to_numpy()[:,int(idx)-1].reshape(-1,1)
2020-03-13 05:00:49 +05:30
else:
data = self.data[label].to_numpy().reshape((-1,)+self.shapes[label])
return data.astype(type(data.flatten()[0]))
2019-12-05 19:35:50 +05:30
2019-10-31 15:15:34 +05:30
2019-12-05 10:40:27 +05:30
def set(self,label,data,info=None):
"""
2019-12-05 10:40:27 +05:30
Set column data.
Parameters
----------
label : str
2019-12-05 10:40:27 +05:30
Column label.
data : np.ndarray
New data.
2019-12-05 10:40:27 +05:30
info : str, optional
Human-readable information about the new data.
"""
2020-03-18 18:19:53 +05:30
self._add_comment(label,data.shape[1:],info)
if re.match(r'[0-9]*?_',label):
idx,key = label.split('_',1)
iloc = self.data.columns.get_loc(key).tolist().index(True) + int(idx) -1
2019-12-05 10:40:27 +05:30
self.data.iloc[:,iloc] = data
2020-03-13 05:00:49 +05:30
else:
2019-12-05 10:40:27 +05:30
self.data[label] = data.reshape(self.data[label].shape)
2019-10-31 15:15:34 +05:30
2019-12-05 19:35:50 +05:30
2019-12-05 10:40:27 +05:30
def add(self,label,data,info=None):
"""
2019-12-05 10:40:27 +05:30
Add column data.
Parameters
----------
label : str
2019-12-05 10:40:27 +05:30
Column label.
data : np.ndarray
Modified data.
info : str, optional
Human-readable information about the modified data.
"""
2020-03-18 18:19:53 +05:30
self._add_comment(label,data.shape[1:],info)
2019-12-05 10:40:27 +05:30
self.shapes[label] = data.shape[1:] if len(data.shape) > 1 else (1,)
size = np.prod(data.shape[1:],dtype=int)
new = pd.DataFrame(data=data.reshape(-1,size),
columns=[label]*size,
)
new.index = self.data.index
self.data = pd.concat([self.data,new],axis=1)
2019-12-05 19:35:50 +05:30
2019-12-05 10:40:27 +05:30
2019-12-05 11:20:06 +05:30
def delete(self,label):
"""
Delete column data.
Parameters
----------
label : str
Column label.
"""
self.data.drop(columns=label,inplace=True)
del self.shapes[label]
2019-12-05 19:35:50 +05:30
2019-12-05 11:20:06 +05:30
def rename(self,label_old,label_new,info=None):
"""
Rename column data.
Parameters
----------
label_old : str
Old column label.
label_new : str
New column label.
"""
self.data.rename(columns={label_old:label_new},inplace=True)
2020-03-18 18:19:53 +05:30
c = '{} => {}{}'.format(label_old,label_new,'' if not info else ': {}'.format(info))
self.comments.append(c)
self.shapes = {(label if label != label_old else label_new):self.shapes[label] for label in self.shapes}
2019-12-05 11:20:06 +05:30
2019-12-05 10:40:27 +05:30
2019-12-05 15:17:36 +05:30
def sort_by(self,labels,ascending=True):
"""
Sort table by values of given labels.
2019-12-05 15:17:36 +05:30
Parameters
----------
label : str or list
Column labels for sorting.
ascending : bool or list, optional
2019-12-05 15:17:36 +05:30
Set sort order.
"""
2020-03-18 18:19:53 +05:30
self._label_flat()
self.data.sort_values(labels,axis=0,inplace=True,ascending=ascending)
2020-03-18 18:19:53 +05:30
self._label_condensed()
2019-12-05 15:17:36 +05:30
self.comments.append('sorted by [{}]'.format(', '.join(labels)))
2019-12-05 19:35:50 +05:30
def append(self,other):
"""
2020-01-12 04:44:35 +05:30
Append other table vertically (similar to numpy.vstack).
Requires matching labels/shapes and order.
Parameters
----------
other : Table
Table to append.
"""
if self.shapes != other.shapes or not self.data.columns.equals(other.data.columns):
raise KeyError('Labels or shapes or order do not match')
else:
self.data = self.data.append(other.data,ignore_index=True)
def join(self,other):
"""
2020-01-12 04:44:35 +05:30
Append other table horizontally (similar to numpy.hstack).
Requires matching number of rows and no common labels.
Parameters
----------
other : Table
Table to join.
"""
if set(self.shapes) & set(other.shapes) or self.data.shape[0] != other.data.shape[0]:
raise KeyError('Dublicated keys or row count mismatch')
else:
self.data = self.data.join(other.data)
for key in other.shapes:
self.shapes[key] = other.shapes[key]
2020-03-18 18:19:53 +05:30
def to_ASCII(self,fname,new_style=False):
"""
Store as plain text file.
Parameters
----------
fname : file, str, or pathlib.Path
2020-03-18 18:19:53 +05:30
Filename or file for writing.
new_style : Boolean, optional
Write table in new style, indicating header lines by comment sign ('#') only.
"""
seen = set()
2019-10-31 15:15:34 +05:30
labels = []
for l in [x for x in self.data.columns if not (x in seen or seen.add(x))]:
2019-12-05 09:30:26 +05:30
if(self.shapes[l] == (1,)):
2019-10-31 15:15:34 +05:30
labels.append('{}'.format(l))
2019-12-05 09:30:26 +05:30
elif(len(self.shapes[l]) == 1):
2019-12-05 19:35:50 +05:30
labels += ['{}_{}'.format(i+1,l) \
for i in range(self.shapes[l][0])]
2019-10-31 15:15:34 +05:30
else:
2019-12-05 19:35:50 +05:30
labels += ['{}:{}_{}'.format('x'.join([str(d) for d in self.shapes[l]]),i+1,l) \
2020-01-15 18:15:05 +05:30
for i in range(np.prod(self.shapes[l]))]
2019-10-31 15:15:34 +05:30
2020-03-18 18:19:53 +05:30
if new_style:
header = ['# {}'.format(comment) for comment in self.comments]
else:
header = ['{} header'.format(len(self.comments)+1)] \
+ self.comments \
2019-10-31 15:15:34 +05:30
try:
f = open(fname,'w')
except TypeError:
f = fname
for line in header + [' '.join(labels)]: f.write(line+'\n')
self.data.to_csv(f,sep=' ',na_rep='nan',index=False,header=False)