DAMASK_EICMD/lib/damask/h5table.py

138 lines
5.6 KiB
Python
Raw Normal View History

2016-10-07 19:17:15 +05:30
# -*- coding: UTF-8 no BOM -*-
# ----------------------------------------------------------- #
# Ideally the h5py should be enough to serve as the data #
# interface for future DAMASK, but since we are still not #
# sure when this major shift will happen, it seems to be a #
# good idea to provide a interface class that help user ease #
# into using HDF5 as the new daily storage driver. #
# ----------------------------------------------------------- #
2016-10-07 19:17:15 +05:30
import os
import h5py
import numpy as np
import xml.etree.cElementTree as ET
2016-10-07 19:17:15 +05:30
# ---------------------------------------------------------------- #
# python 3 has no unicode object, this ensures that the code works #
# on Python 2&3 #
# ---------------------------------------------------------------- #
try:
test=isinstance('test', unicode)
except(NameError):
unicode=str
# ------------------------------------------------------- #
# Singleton class for converting feature name to H5F path #
# ------------------------------------------------------- #
# NOTE:
# use simple function to mimic the singleton class in
2016-10-07 19:17:15 +05:30
# C++/Java
def lables_to_path(label, dsXMLPath=None):
2016-10-10 04:36:24 +05:30
"""read the xml definition file and return the path."""
if dsXMLPath is None:
# use the default storage layout in DS_HDF5.xml
if "h5table.pyc" in __file__:
dsXMLPath = os.path.abspath(__file__).replace("h5table.pyc",
"DS_HDF5.xml")
else:
dsXMLPath = os.path.abspath(__file__).replace("h5table.py",
"DS_HDF5.xml")
# This current implementation requires that all variables
# stay under the root node, the nesting is defined through the
# h5path. This could be improved easily with more advanced parsing
# using ET interface, but for now I can not see the benefits in doing
# so.
tree = ET.parse(dsXMLPath)
dataType = tree.find('{}/type'.format(label)).text
h5path = tree.find('{}/h5path'.format(label)).text
return (dataType, h5path)
2016-10-07 19:17:15 +05:30
class H5Table(object):
"""Interface class for h5py
2016-10-10 04:37:41 +05:30
2016-10-07 19:17:15 +05:30
DESCRIPTION
-----------
2016-10-10 02:49:49 +05:30
Interface/wrapper class for manipulating data in HDF5 with DAMASK
specialized data structure.
2016-10-10 02:49:49 +05:30
-->Minimal API design.
2016-10-07 19:17:15 +05:30
PARAMETERS
----------
h5f_path: str
Absolute path the HDF5 file
2016-10-07 19:17:15 +05:30
METHOD
------
2016-10-10 02:49:49 +05:30
del_entry() -- Force delete attributes/group/datasets (Dangerous)
get_attr() -- Return attributes if possible
add_attr() -- Add NEW attributes to dataset/group (please delete old first!)
get_data() -- Retrieve data in numpy.ndarray
add_data() -- Add dataset to H5 file
get_cmdlog() -- Return the command used to generate the data if possible.
2016-10-07 19:17:15 +05:30
NOTE
----
1. As an interface class, it uses the lazy evaluation design
that read the data only when its absolutely necessary.
2016-10-08 03:13:52 +05:30
2. The command line used to generate new feature is stored with
each dataset as dataset attribute.
2016-10-07 19:17:15 +05:30
"""
def __init__(self, h5f_path, new_file=False, dsXMLFile=None):
2016-10-07 19:17:15 +05:30
self.h5f_path = h5f_path
self.dsXMLFile = dsXMLFile
msg = 'Created by H5Talbe from DAMASK'
mode = 'w' if new_file else 'a'
with h5py.File(self.h5f_path, mode) as h5f:
h5f['/'].attrs['description'] = msg
2016-10-07 19:17:15 +05:30
2016-10-08 03:13:52 +05:30
def del_entry(self, feature_name):
2016-10-10 04:36:24 +05:30
"""delete entry in HDF5 table"""
dataType, h5f_path = lables_to_path(feature_name,
dsXMLPath=self.dsXMLFile)
with h5py.File(self.h5f_path, 'a') as h5f:
del h5f[h5f_path]
2016-10-08 02:11:17 +05:30
2016-10-08 03:13:52 +05:30
def get_attr(self, attr_name):
dataType, h5f_path = lables_to_path(attr_name,
dsXMLPath=self.dsXMLFile)
with h5py.File(self.h5f_path, 'a') as h5f:
rst_attr = h5f[h5f_path].attrs[attr_name]
return rst_attr
2016-10-07 19:17:15 +05:30
2016-10-08 03:13:52 +05:30
def add_attr(self, attr_name, attr_data):
dataType, h5f_path = lables_to_path(attr_name,
dsXMLPath=self.dsXMLFile)
with h5py.File(self.h5f_path, 'a') as h5f:
2016-10-08 03:13:52 +05:30
h5f[h5f_path].attrs[attr_name] = attr_data
h5f.flush()
2016-10-07 19:17:15 +05:30
def get_data(self, feature_name=None):
2016-10-10 04:36:24 +05:30
"""extract dataset from HDF5 table and return it in a numpy array"""
dataType, h5f_path = lables_to_path(feature_name,
dsXMLPath=self.dsXMLFile)
with h5py.File(self.h5f_path, 'a') as h5f:
h5f_dst = h5f[h5f_path] # get the handle for target dataset(table)
rst_data = h5f_dst.read_direct(np.zeros(h5f_dst.shape))
return rst_data
2016-10-07 19:17:15 +05:30
def add_data(self, feature_name, dataset, cmd_log=None):
2016-10-10 04:36:24 +05:30
"""adding new feature into existing HDF5 file"""
dataType, h5f_path = lables_to_path(feature_name,
dsXMLPath=self.dsXMLFile)
with h5py.File(self.h5f_path, 'a') as h5f:
2016-10-08 02:11:17 +05:30
h5f.create_dataset(h5f_path, data=dataset)
# store the cmd in log is possible
if cmd_log is not None:
h5f[h5f_path].attrs['log'] = str(cmd_log)
h5f.flush()
2016-10-08 03:13:52 +05:30
def get_cmdlog(self, feature_name):
2016-10-10 04:36:24 +05:30
"""get cmd history used to generate the feature"""
dataType, h5f_path = lables_to_path(feature_name,
dsXMLPath=self.dsXMLFile)
2016-10-10 04:37:41 +05:30
with h5py.File(self.h5f_path, 'a') as h5f:
cmd_logs = h5f[h5f_path].attrs['log']
return cmd_logs