From 52e3fb50bc8e0a6b6b2bc0001248e6b587f1ff29 Mon Sep 17 00:00:00 2001 From: Martin Diehl Date: Sun, 6 Dec 2020 10:20:32 +0100 Subject: [PATCH] compress. Datasets are chunked along first timension. Chunk size (1MB for real) is probably not optimal --- python/damask/_result.py | 10 ++++++++- src/HDF5_utilities.f90 | 47 ++++++++++++++++++++++++++++++++-------- 2 files changed, 47 insertions(+), 10 deletions(-) diff --git a/python/damask/_result.py b/python/damask/_result.py index 32970e523..6d83c8872 100644 --- a/python/damask/_result.py +++ b/python/damask/_result.py @@ -1132,6 +1132,7 @@ class Result: Arguments parsed to func. """ + chunk_size = 1024**2//8 num_threads = damask.environment.options['DAMASK_NUM_THREADS'] pool = mp.Pool(int(num_threads) if num_threads is not None else None) lock = mp.Manager().Lock() @@ -1155,7 +1156,14 @@ class Result: dataset.attrs['Overwritten'] = 'Yes' if h5py3 else \ 'Yes'.encode() else: - dataset = f[result[0]].create_dataset(result[1]['label'],data=result[1]['data']) + if result[1]['data'].size >= chunk_size*2: + shape = result[1]['data'].shape + chunks = (chunk_size//np.prod(shape[1:]),)+shape[1:] + dataset = f[result[0]].create_dataset(result[1]['label'],data=result[1]['data'], + maxshape=shape,chunks=chunks,compression = 'gzip') + else: + dataset = f[result[0]].create_dataset(result[1]['label'],data=result[1]['data'], + maxshape=result[1]['data'].shape) now = datetime.datetime.now().astimezone() dataset.attrs['Created'] = now.strftime('%Y-%m-%d %H:%M:%S%z') if h5py3 else \ diff --git a/src/HDF5_utilities.f90 b/src/HDF5_utilities.f90 index 47f4243e7..48b98812b 100644 --- a/src/HDF5_utilities.f90 +++ b/src/HDF5_utilities.f90 @@ -1789,7 +1789,7 @@ subroutine initialize_read(dset_id, filespace_id, memspace_id, plist_id, aplist_ !-------------------------------------------------------------------------------------------------- ! creating a property list for IO and set it to collective call h5pcreate_f(H5P_DATASET_ACCESS_F, aplist_id, hdferr) - if(hdferr < 0) error stop 'HDF5 error' + if(hdferr < 0) error stop 'HDF5 error' #ifdef PETSc call h5pset_all_coll_metadata_ops_f(aplist_id, .true., hdferr) if(hdferr < 0) error stop 'HDF5 error' @@ -1815,7 +1815,7 @@ end subroutine initialize_read !-------------------------------------------------------------------------------------------------- subroutine finalize_read(dset_id, filespace_id, memspace_id, plist_id, aplist_id) - integer(HID_T), intent(in) :: dset_id, filespace_id, memspace_id, plist_id, aplist_id + integer(HID_T), intent(in) :: dset_id, filespace_id, memspace_id, plist_id, aplist_id integer :: hdferr call h5pclose_f(plist_id, hdferr) @@ -1836,8 +1836,8 @@ end subroutine finalize_read !> @brief initialize HDF5 handles, determines global shape and start for parallel write !-------------------------------------------------------------------------------------------------- subroutine initialize_write(dset_id, filespace_id, memspace_id, plist_id, & - myStart, totalShape, & - loc_id,myShape,datasetName,datatype,parallel) + myStart, totalShape, & + loc_id,myShape,datasetName,datatype,parallel) integer(HID_T), intent(in) :: loc_id !< file or group handle character(len=*), intent(in) :: datasetName !< name of the dataset in the file @@ -1850,10 +1850,10 @@ subroutine initialize_write(dset_id, filespace_id, memspace_id, plist_id, & totalShape !< shape of the dataset (all processes) integer(HID_T), intent(out) :: dset_id, filespace_id, memspace_id, plist_id - integer, dimension(worldsize) :: & - writeSize !< contribution of all processes - integer :: ierr - integer :: hdferr + integer, dimension(worldsize) :: writeSize !< contribution of all processes + integer(HID_T) :: dcpl + integer :: ierr, hdferr + integer(HSIZE_T), parameter :: chunkSize = 1024_HSIZE_T**2/8_HSIZE_T !------------------------------------------------------------------------------------------------- ! creating a property list for transfer properties (is collective when reading in parallel) @@ -1880,6 +1880,17 @@ subroutine initialize_write(dset_id, filespace_id, memspace_id, plist_id, & myStart(ubound(myStart)) = int(sum(writeSize(1:worldrank)),HSIZE_T) totalShape = [myShape(1:ubound(myShape,1)-1),int(sum(writeSize),HSIZE_T)] +!-------------------------------------------------------------------------------------------------- +! compress (and chunk) larger datasets + call h5pcreate_f(H5P_DATASET_CREATE_F, dcpl, hdferr) + if(hdferr < 0) error stop 'HDF5 error' + if(product(totalShape) >= chunkSize*2_HSIZE_T) then + call h5pset_chunk_f(dcpl, size(totalShape), getChunks(totalShape,chunkSize), hdferr) + if(hdferr < 0) error stop 'HDF5 error' + call h5pset_deflate_f(dcpl, 6, hdferr) + if(hdferr < 0) error stop 'HDF5 error' + endif + !-------------------------------------------------------------------------------------------------- ! create dataspace in memory (local shape) and in file (global shape) call h5screate_simple_f(size(myShape), myShape, memspace_id, hdferr, myShape) @@ -1889,11 +1900,14 @@ subroutine initialize_write(dset_id, filespace_id, memspace_id, plist_id, & !-------------------------------------------------------------------------------------------------- ! create dataset in the file and select a hyperslab from it (the portion of the current process) - call h5dcreate_f(loc_id, trim(datasetName), datatype, filespace_id, dset_id, hdferr) + call h5dcreate_f(loc_id, trim(datasetName), datatype, filespace_id, dset_id, hdferr, dcpl) if(hdferr < 0) error stop 'HDF5 error' call h5sselect_hyperslab_f(filespace_id, H5S_SELECT_SET_F, myStart, myShape, hdferr) if(hdferr < 0) error stop 'HDF5 error' + call h5pclose_f(dcpl , hdferr) + if(hdferr < 0) error stop 'HDF5 error' + end subroutine initialize_write @@ -1916,4 +1930,19 @@ subroutine finalize_write(plist_id, dset_id, filespace_id, memspace_id) end subroutine finalize_write + +!-------------------------------------------------------------------------------------------------- +!> @brief determine chunk layout +!-------------------------------------------------------------------------------------------------- +pure function getChunks(totalShape,chunkSize) + + integer(HSIZE_T), dimension(:), intent(in) :: totalShape + integer(HSIZE_T), intent(in) :: chunkSize + integer(HSIZE_T), dimension(size(totalShape)) :: getChunks + + getChunks = [totalShape(1:size(totalShape)-1),& + chunkSize/product(totalShape(1:size(totalShape)-1))] + +end function getChunks + end module HDF5_Utilities