DAMASK_EICMD/python/damask/_table.py

605 lines
18 KiB
Python
Raw Permalink Normal View History

2019-10-31 15:15:34 +05:30
import re
2020-09-14 10:34:01 +05:30
import copy
from typing import Union, Tuple, List, Iterable
2019-10-31 15:15:34 +05:30
import pandas as pd
import numpy as np
2022-01-23 18:45:25 +05:30
from ._typehints import FileHandle
from . import util
2020-03-13 05:00:49 +05:30
class Table:
"""Manipulate multi-dimensional spreadsheet-like data."""
2020-03-13 05:00:49 +05:30
def __init__(self,
shapes: dict = {},
data: np.ndarray = None,
comments: Union[str, Iterable[str]] = None):
"""
2019-12-05 09:30:26 +05:30
New spreadsheet.
2020-03-13 05:00:49 +05:30
Parameters
----------
2022-08-15 11:34:02 +05:30
shapes : dict with str:tuple pairs, optional
Shapes of the data columns. Mandatory if 'data' is given.
For instance, 'F':(3,3) for a deformation gradient, or 'r':(1,) for a scalar.
2022-08-15 11:34:02 +05:30
data : numpy.ndarray or pandas.DataFrame, optional
Data. Existing column labels of a pandas.DataFrame will be replaced.
comments : str or iterable of str, optional
2019-12-05 09:30:26 +05:30
Additional, human-readable information.
2020-03-13 05:00:49 +05:30
"""
comments_ = [comments] if isinstance(comments,str) else comments
self.comments = [] if comments_ is None else [str(c) for c in comments_]
2021-02-10 23:05:13 +05:30
self.shapes = { k:(v,) if isinstance(v,(np.int64,np.int32,int)) else v for k,v in shapes.items() }
self.data = pd.DataFrame(data=data)
self._relabel('uniform')
2022-01-12 21:40:13 +05:30
def __repr__(self) -> str:
"""
Return repr(self).
Give short human-readable summary.
"""
self._relabel('shapes')
data_repr = self.data.__repr__()
self._relabel('uniform')
return '\n'.join(['# '+c for c in self.comments])+'\n'+data_repr
def __eq__(self,
other: object) -> bool:
"""
Return self==other.
2022-08-09 18:59:22 +05:30
Test equality of other.
"""
return NotImplemented if not isinstance(other,Table) else \
self.shapes == other.shapes and self.data.equals(other.data)
def __getitem__(self,
item: Union[slice, Tuple[slice, ...]]) -> 'Table':
"""
2022-08-09 18:59:22 +05:30
Return self[item].
2022-08-09 18:59:22 +05:30
Return slice according to item.
Parameters
----------
item : row and/or column indexer
Slice to select from Table.
Returns
-------
slice : damask.Table
Sliced part of the Table.
Examples
--------
>>> import damask
>>> import numpy as np
>>> tbl = damask.Table(shapes=dict(colA=(1,),colB=(1,),colC=(1,)),
... data=np.arange(12).reshape((4,3)))
>>> tbl['colA','colB']
colA colB
0 0 1
1 3 4
2 6 7
3 9 10
>>> tbl[::2,['colB','colA']]
colB colA
0 1 0
2 7 6
>>> tbl[[True,False,False,True],'colB']
colB
0 1
3 10
"""
item_ = (item,slice(None,None,None)) if isinstance(item,(slice,np.ndarray)) else \
(np.array(item),slice(None,None,None)) if isinstance(item,list) and np.array(item).dtype == np.bool_ else \
(np.array(item[0]),item[1]) if isinstance(item[0],list) else \
item if isinstance(item[0],(slice,np.ndarray)) else \
(slice(None,None,None),item)
sliced = self.data.loc[item_]
cols = np.array(sliced.columns if isinstance(sliced,pd.core.frame.DataFrame) else [item_[1]])
_,idx = np.unique(cols,return_index=True)
return self.__class__(data=sliced,
shapes={k:self.shapes[k] for k in cols[np.sort(idx)]},
comments=self.comments)
2022-01-12 21:40:13 +05:30
def __len__(self) -> int:
"""
Return len(self).
Number of rows.
"""
2020-10-09 11:15:20 +05:30
return len(self.data)
2022-01-23 18:45:25 +05:30
def __copy__(self) -> 'Table':
"""
Return deepcopy(self).
Create deep copy.
"""
2020-09-14 10:34:01 +05:30
return copy.deepcopy(self)
2021-01-03 16:33:40 +05:30
copy = __copy__
2020-09-14 10:34:01 +05:30
def _label(self,
what: Union[str, List[str]],
how: str) -> List[str]:
"""
Expand labels according to data shape.
2020-03-13 05:00:49 +05:30
Parameters
----------
what : str or list
Labels to expand.
2022-01-25 02:39:13 +05:30
how : {'uniform, 'shapes', 'linear'}
Mode of labeling.
'uniform' ==> v v v
'shapes' ==> 3:v v v
'linear' ==> 1_v 2_v 3_v
2020-03-13 05:00:49 +05:30
"""
what = [what] if isinstance(what,str) else what
2019-12-05 19:35:50 +05:30
labels = []
for label in what:
shape = self.shapes[label]
size = np.prod(shape,dtype=np.int64)
if how == 'uniform':
labels += [label] * size
elif how == 'shapes':
labels += [('' if size == 1 or i>0 else f'{util.srepr(shape,"x")}:')+label for i in range(size)]
elif how == 'linear':
labels += [('' if size == 1 else f'{i+1}_')+label for i in range(size)]
else:
raise KeyError
return labels
2022-01-27 04:07:07 +05:30
def _relabel(self,
how: str):
"""
Modify labeling of data in-place.
Parameters
----------
2022-01-25 02:39:13 +05:30
how : {'uniform, 'shapes', 'linear'}
Mode of labeling.
'uniform' ==> v v v
'shapes' ==> 3:v v v
'linear' ==> 1_v 2_v 3_v
"""
2022-05-23 11:31:17 +05:30
self.data.columns = self._label(self.shapes,how) # type: ignore
def _add_comment(self,
label: str,
shape: Tuple[int, ...],
info: str = None):
2020-03-21 15:37:21 +05:30
if info is not None:
specific = f'{label}{" "+str(shape) if np.prod(shape,dtype=np.int64) > 1 else ""}: {info}'
2020-08-25 02:58:26 +05:30
general = util.execution_stamp('Table')
self.comments.append(f'{specific} / {general}')
2020-03-13 05:00:49 +05:30
2022-01-12 21:40:13 +05:30
def isclose(self,
2022-01-25 02:39:13 +05:30
other: 'Table',
rtol: float = 1e-5,
atol: float = 1e-8,
equal_nan: bool = True) -> np.ndarray:
"""
Report where values are approximately equal to corresponding ones of other Table.
Parameters
----------
other : damask.Table
Table to compare against.
rtol : float, optional
Relative tolerance of equality.
atol : float, optional
Absolute tolerance of equality.
equal_nan : bool, optional
Consider matching NaN values as equal. Defaults to True.
Returns
-------
mask : numpy.ndarray of bool
Mask indicating where corresponding table values are close.
"""
return np.isclose( self.data.to_numpy(),
other.data.to_numpy(),
rtol=rtol,
atol=atol,
equal_nan=equal_nan)
2022-01-12 21:40:13 +05:30
def allclose(self,
2022-01-23 18:45:25 +05:30
other: 'Table',
2022-01-12 21:40:13 +05:30
rtol: float = 1e-5,
atol: float = 1e-8,
equal_nan: bool = True) -> bool:
"""
Test whether all values are approximately equal to corresponding ones of other Table.
Parameters
----------
other : damask.Table
Table to compare against.
rtol : float, optional
Relative tolerance of equality.
atol : float, optional
Absolute tolerance of equality.
equal_nan : bool, optional
Consider matching NaN values as equal. Defaults to True.
Returns
-------
answer : bool
Whether corresponding values are close between both tables.
"""
return np.allclose( self.data.to_numpy(),
other.data.to_numpy(),
rtol=rtol,
atol=atol,
equal_nan=equal_nan)
@staticmethod
2022-01-23 18:45:25 +05:30
def load(fname: FileHandle) -> 'Table':
"""
2020-12-04 02:28:24 +05:30
Load from ASCII table file.
Initial comments are marked by '#', the first non-comment line
containing the column labels.
- Vector data column labels are indicated by '1_v, 2_v, ..., n_v'.
- Tensor data column labels are indicated by '3x3:1_T, 3x3:2_T, ..., 3x3:9_T'.
Parameters
----------
fname : file, str, or pathlib.Path
Filename or file for reading.
Returns
-------
loaded : damask.Table
Table data from file.
2022-01-23 18:45:25 +05:30
"""
f = util.open_text(fname)
2022-01-23 18:45:25 +05:30
f.seek(0)
comments = []
2022-02-11 02:01:52 +05:30
while (line := f.readline().strip()).startswith('#'):
comments.append(line.lstrip('#').strip())
labels = line.split()
2020-03-13 05:00:49 +05:30
2019-12-05 09:30:26 +05:30
shapes = {}
for label in labels:
tensor_column = re.search(r'[0-9,x]*?:[0-9]*?_',label)
if tensor_column:
my_shape = tensor_column.group().split(':',1)[0].split('x')
2019-12-05 09:30:26 +05:30
shapes[label.split('_',1)[1]] = tuple([int(d) for d in my_shape])
2019-10-31 15:15:34 +05:30
else:
vector_column = re.match(r'[0-9]*?_',label)
if vector_column:
2019-12-05 09:30:26 +05:30
shapes[label.split('_',1)[1]] = (int(label.split('_',1)[0]),)
2019-10-31 15:15:34 +05:30
else:
2019-12-05 19:35:50 +05:30
shapes[label] = (1,)
2020-03-13 05:00:49 +05:30
data = pd.read_csv(f,names=list(range(len(labels))),sep=r'\s+')
2019-12-05 19:35:50 +05:30
return Table(shapes,data,comments)
2019-12-05 10:15:27 +05:30
@staticmethod
2022-01-23 18:45:25 +05:30
def load_ang(fname: FileHandle) -> 'Table':
"""
2020-12-04 02:28:24 +05:30
Load from ang file.
A valid TSL ang file has to have the following columns:
- Euler angles (Bunge notation) in radians, 3 floats, label 'eu'.
- Spatial position in meters, 2 floats, label 'pos'.
- Image quality, 1 float, label 'IQ'.
- Confidence index, 1 float, label 'CI'.
- Phase ID, 1 int, label 'ID'.
- SEM signal, 1 float, label 'intensity'.
- Fit, 1 float, label 'fit'.
Parameters
----------
fname : file, str, or pathlib.Path
Filename or file for reading.
Returns
-------
loaded : damask.Table
Table data from file.
"""
f = util.open_text(fname)
2022-01-23 18:45:25 +05:30
f.seek(0)
2020-03-13 05:00:49 +05:30
content = f.readlines()
2020-08-25 02:58:26 +05:30
comments = [util.execution_stamp('Table','from_ang')]
for line in content:
if line.startswith('#'):
2020-11-13 02:01:09 +05:30
comments.append(line.split('#',1)[1].strip())
else:
break
2020-03-13 05:00:49 +05:30
data = np.loadtxt(content)
shapes = {'eu':3, 'pos':2, 'IQ':1, 'CI':1, 'ID':1, 'intensity':1, 'fit':1}
if (remainder := data.shape[1]-sum(shapes.values())) > 0:
shapes['unknown'] = remainder
return Table(shapes,data,comments)
2020-01-08 20:04:21 +05:30
@property
2022-02-16 03:08:02 +05:30
def labels(self) -> List[str]:
return list(self.shapes)
2019-12-05 10:40:27 +05:30
def get(self,
label: str) -> np.ndarray:
"""
2019-12-05 10:40:27 +05:30
Get column data.
Parameters
----------
label : str
2019-12-05 10:40:27 +05:30
Column label.
2021-03-08 21:32:27 +05:30
Returns
-------
data : numpy.ndarray
Array of column data.
"""
2021-04-01 03:43:07 +05:30
data = self.data[label].to_numpy().reshape((-1,)+self.shapes[label])
return data.astype(type(data.flatten()[0]))
2019-12-05 19:35:50 +05:30
2019-10-31 15:15:34 +05:30
def set(self,
label: str,
data: np.ndarray,
info: str = None) -> 'Table':
"""
2022-05-11 18:49:48 +05:30
Add new or replace existing column data.
Parameters
----------
label : str
2019-12-05 10:40:27 +05:30
Column label.
data : numpy.ndarray
2022-05-13 09:32:22 +05:30
Column data. First dimension needs to match number of rows.
2019-12-05 10:40:27 +05:30
info : str, optional
2022-05-13 09:32:22 +05:30
Human-readable information about the data.
2021-03-08 21:32:27 +05:30
Returns
-------
updated : damask.Table
2021-03-08 21:32:27 +05:30
Updated table.
"""
2020-09-14 10:34:01 +05:30
dup = self.copy()
dup._add_comment(label, data.shape[1:], info)
2022-05-11 18:49:48 +05:30
if m := re.match(r'(.*)\[((\d+,)*(\d+))\]',label):
key = m.group(1)
2020-03-13 05:00:49 +05:30
else:
2022-05-11 18:49:48 +05:30
key = label
2019-12-05 19:35:50 +05:30
2022-05-11 18:49:48 +05:30
if key in dup.shapes:
2020-11-13 02:01:09 +05:30
2022-05-11 18:49:48 +05:30
if m:
idx = np.ravel_multi_index(tuple(map(int,m.group(2).split(","))),
self.shapes[key])
iloc = dup.data.columns.get_loc(key).tolist().index(True) + idx
dup.data.iloc[:,iloc] = data
else:
dup.data[label] = data.reshape(dup.data[label].shape)
2022-05-11 18:49:48 +05:30
else:
2022-05-11 18:49:48 +05:30
dup.shapes[label] = data.shape[1:] if len(data.shape) > 1 else (1,)
size = np.prod(data.shape[1:],dtype=np.int64)
2022-05-11 18:49:48 +05:30
new = pd.DataFrame(data=data.reshape(-1,size),
columns=[label]*size,
)
2022-05-19 04:00:18 +05:30
new.index = new.index if dup.data.index.empty else dup.data.index
2022-05-11 18:49:48 +05:30
dup.data = pd.concat([dup.data,new],axis=1)
2021-03-08 21:32:27 +05:30
2020-09-14 10:34:01 +05:30
return dup
2019-12-05 19:35:50 +05:30
2019-12-05 10:40:27 +05:30
def delete(self,
label: str) -> 'Table':
2019-12-05 11:20:06 +05:30
"""
Delete column data.
Parameters
----------
label : str
Column label.
2021-03-08 21:32:27 +05:30
Returns
-------
2021-05-20 14:00:00 +05:30
updated : damask.Table
2021-03-08 21:32:27 +05:30
Updated table.
2019-12-05 11:20:06 +05:30
"""
2020-09-14 10:34:01 +05:30
dup = self.copy()
dup.data.drop(columns=label,inplace=True)
del dup.shapes[label]
return dup
2019-12-05 11:20:06 +05:30
2019-12-05 19:35:50 +05:30
def rename(self,
old: Union[str, Iterable[str]],
new: Union[str, Iterable[str]],
info: str = None) -> 'Table':
2019-12-05 11:20:06 +05:30
"""
Rename column data.
Parameters
----------
2020-09-14 10:34:01 +05:30
label_old : str or iterable of str
Old column label(s).
label_new : str or iterable of str
New column label(s).
2019-12-05 11:20:06 +05:30
2021-03-08 21:32:27 +05:30
Returns
-------
2021-05-20 14:00:00 +05:30
updated : damask.Table
2021-03-08 21:32:27 +05:30
Updated table.
2019-12-05 11:20:06 +05:30
"""
2020-09-14 10:34:01 +05:30
dup = self.copy()
columns = dict(zip([old] if isinstance(old,str) else old,
[new] if isinstance(new,str) else new))
dup.data.rename(columns=columns,inplace=True)
dup.comments.append(f'{old} => {new}'+('' if info is None else f': {info}'))
dup.shapes = {(label if label not in columns else columns[label]):dup.shapes[label] for label in dup.shapes}
return dup
2019-12-05 11:20:06 +05:30
2019-12-05 10:40:27 +05:30
def sort_by(self,
labels: Union[str, List[str]],
ascending: Union[bool, List[bool]] = True) -> 'Table':
2019-12-05 15:17:36 +05:30
"""
Sort table by data of given columns.
2019-12-05 15:17:36 +05:30
Parameters
----------
label : str or list
Column labels for sorting.
ascending : bool or list, optional
2019-12-05 15:17:36 +05:30
Set sort order.
2021-03-08 21:32:27 +05:30
Returns
-------
2021-05-20 14:00:00 +05:30
updated : damask.Table
2021-03-08 21:32:27 +05:30
Updated table.
2019-12-05 15:17:36 +05:30
"""
labels_ = [labels] if isinstance(labels,str) else labels.copy()
for i,l in enumerate(labels_):
if m := re.match(r'(.*)\[((\d+,)*(\d+))\]',l):
idx = np.ravel_multi_index(tuple(map(int,m.group(2).split(','))),
self.shapes[m.group(1)])
labels_[i] = f'{1+idx}_{m.group(1)}'
2020-09-14 10:34:01 +05:30
dup = self.copy()
dup._relabel('linear')
dup.data.sort_values(labels_,axis=0,inplace=True,ascending=ascending)
dup._relabel('uniform')
2020-09-14 10:34:01 +05:30
dup.comments.append(f'sorted {"ascending" if ascending else "descending"} by {labels}')
return dup
2019-12-05 15:17:36 +05:30
2019-12-05 19:35:50 +05:30
def append(self,
other: 'Table') -> 'Table':
"""
2020-01-12 04:44:35 +05:30
Append other table vertically (similar to numpy.vstack).
Requires matching labels/shapes and order.
Parameters
----------
other : damask.Table
Table to append.
2021-03-08 21:32:27 +05:30
Returns
-------
2021-05-20 14:00:00 +05:30
updated : damask.Table
Updated table.
2021-03-08 21:32:27 +05:30
"""
if self.shapes != other.shapes or not self.data.columns.equals(other.data.columns):
raise KeyError('mismatch of shapes or labels or their order')
dup = self.copy()
dup.data = pd.concat([dup.data,other.data],ignore_index=True)
return dup
def join(self,
other: 'Table') -> 'Table':
"""
2020-01-12 04:44:35 +05:30
Append other table horizontally (similar to numpy.hstack).
Requires matching number of rows and no common labels.
Parameters
----------
other : damask.Table
Table to join.
2021-03-08 21:32:27 +05:30
Returns
-------
2021-05-20 14:00:00 +05:30
updated : damask.Table
Updated table.
"""
if set(self.shapes) & set(other.shapes) or self.data.shape[0] != other.data.shape[0]:
raise KeyError('duplicated keys or row count mismatch')
dup = self.copy()
dup.data = dup.data.join(other.data)
for key in other.shapes:
dup.shapes[key] = other.shapes[key]
return dup
def save(self,
fname: FileHandle,
with_labels: bool = True):
"""
2020-09-18 18:33:51 +05:30
Save as plain text file.
Parameters
----------
fname : file, str, or pathlib.Path
2020-03-18 18:19:53 +05:30
Filename or file for writing.
with_labels : bool, optional
Write column labels. Defaults to True.
"""
labels = []
if with_labels:
for l in list(dict.fromkeys(self.data.columns)):
if self.shapes[l] == (1,):
labels.append(f'{l}')
elif len(self.shapes[l]) == 1:
labels += [f'{i+1}_{l}' \
for i in range(self.shapes[l][0])]
else:
labels += [f'{util.srepr(self.shapes[l],"x")}:{i+1}_{l}' \
for i in range(np.prod(self.shapes[l]))]
2019-10-31 15:15:34 +05:30
f = util.open_text(fname,'w')
f.write('\n'.join([f'# {c}' for c in self.comments] + [' '.join(labels)])+('\n' if labels else ''))
2022-03-27 02:30:08 +05:30
self.data.to_csv(f,sep=' ',na_rep='nan',index=False,header=False,line_terminator='\n')