compress.

Datasets are chunked along first timension. Chunk size (1MB for real) is
probably not optimal
This commit is contained in:
Martin Diehl 2020-12-06 10:20:32 +01:00
parent 000de75617
commit 52e3fb50bc
2 changed files with 47 additions and 10 deletions

View File

@ -1132,6 +1132,7 @@ class Result:
Arguments parsed to func. Arguments parsed to func.
""" """
chunk_size = 1024**2//8
num_threads = damask.environment.options['DAMASK_NUM_THREADS'] num_threads = damask.environment.options['DAMASK_NUM_THREADS']
pool = mp.Pool(int(num_threads) if num_threads is not None else None) pool = mp.Pool(int(num_threads) if num_threads is not None else None)
lock = mp.Manager().Lock() lock = mp.Manager().Lock()
@ -1155,7 +1156,14 @@ class Result:
dataset.attrs['Overwritten'] = 'Yes' if h5py3 else \ dataset.attrs['Overwritten'] = 'Yes' if h5py3 else \
'Yes'.encode() 'Yes'.encode()
else: else:
dataset = f[result[0]].create_dataset(result[1]['label'],data=result[1]['data']) if result[1]['data'].size >= chunk_size*2:
shape = result[1]['data'].shape
chunks = (chunk_size//np.prod(shape[1:]),)+shape[1:]
dataset = f[result[0]].create_dataset(result[1]['label'],data=result[1]['data'],
maxshape=shape,chunks=chunks,compression = 'gzip')
else:
dataset = f[result[0]].create_dataset(result[1]['label'],data=result[1]['data'],
maxshape=result[1]['data'].shape)
now = datetime.datetime.now().astimezone() now = datetime.datetime.now().astimezone()
dataset.attrs['Created'] = now.strftime('%Y-%m-%d %H:%M:%S%z') if h5py3 else \ dataset.attrs['Created'] = now.strftime('%Y-%m-%d %H:%M:%S%z') if h5py3 else \

View File

@ -1850,10 +1850,10 @@ subroutine initialize_write(dset_id, filespace_id, memspace_id, plist_id, &
totalShape !< shape of the dataset (all processes) totalShape !< shape of the dataset (all processes)
integer(HID_T), intent(out) :: dset_id, filespace_id, memspace_id, plist_id integer(HID_T), intent(out) :: dset_id, filespace_id, memspace_id, plist_id
integer, dimension(worldsize) :: & integer, dimension(worldsize) :: writeSize !< contribution of all processes
writeSize !< contribution of all processes integer(HID_T) :: dcpl
integer :: ierr integer :: ierr, hdferr
integer :: hdferr integer(HSIZE_T), parameter :: chunkSize = 1024_HSIZE_T**2/8_HSIZE_T
!------------------------------------------------------------------------------------------------- !-------------------------------------------------------------------------------------------------
! creating a property list for transfer properties (is collective when reading in parallel) ! creating a property list for transfer properties (is collective when reading in parallel)
@ -1880,6 +1880,17 @@ subroutine initialize_write(dset_id, filespace_id, memspace_id, plist_id, &
myStart(ubound(myStart)) = int(sum(writeSize(1:worldrank)),HSIZE_T) myStart(ubound(myStart)) = int(sum(writeSize(1:worldrank)),HSIZE_T)
totalShape = [myShape(1:ubound(myShape,1)-1),int(sum(writeSize),HSIZE_T)] totalShape = [myShape(1:ubound(myShape,1)-1),int(sum(writeSize),HSIZE_T)]
!--------------------------------------------------------------------------------------------------
! compress (and chunk) larger datasets
call h5pcreate_f(H5P_DATASET_CREATE_F, dcpl, hdferr)
if(hdferr < 0) error stop 'HDF5 error'
if(product(totalShape) >= chunkSize*2_HSIZE_T) then
call h5pset_chunk_f(dcpl, size(totalShape), getChunks(totalShape,chunkSize), hdferr)
if(hdferr < 0) error stop 'HDF5 error'
call h5pset_deflate_f(dcpl, 6, hdferr)
if(hdferr < 0) error stop 'HDF5 error'
endif
!-------------------------------------------------------------------------------------------------- !--------------------------------------------------------------------------------------------------
! create dataspace in memory (local shape) and in file (global shape) ! create dataspace in memory (local shape) and in file (global shape)
call h5screate_simple_f(size(myShape), myShape, memspace_id, hdferr, myShape) call h5screate_simple_f(size(myShape), myShape, memspace_id, hdferr, myShape)
@ -1889,11 +1900,14 @@ subroutine initialize_write(dset_id, filespace_id, memspace_id, plist_id, &
!-------------------------------------------------------------------------------------------------- !--------------------------------------------------------------------------------------------------
! create dataset in the file and select a hyperslab from it (the portion of the current process) ! create dataset in the file and select a hyperslab from it (the portion of the current process)
call h5dcreate_f(loc_id, trim(datasetName), datatype, filespace_id, dset_id, hdferr) call h5dcreate_f(loc_id, trim(datasetName), datatype, filespace_id, dset_id, hdferr, dcpl)
if(hdferr < 0) error stop 'HDF5 error' if(hdferr < 0) error stop 'HDF5 error'
call h5sselect_hyperslab_f(filespace_id, H5S_SELECT_SET_F, myStart, myShape, hdferr) call h5sselect_hyperslab_f(filespace_id, H5S_SELECT_SET_F, myStart, myShape, hdferr)
if(hdferr < 0) error stop 'HDF5 error' if(hdferr < 0) error stop 'HDF5 error'
call h5pclose_f(dcpl , hdferr)
if(hdferr < 0) error stop 'HDF5 error'
end subroutine initialize_write end subroutine initialize_write
@ -1916,4 +1930,19 @@ subroutine finalize_write(plist_id, dset_id, filespace_id, memspace_id)
end subroutine finalize_write end subroutine finalize_write
!--------------------------------------------------------------------------------------------------
!> @brief determine chunk layout
!--------------------------------------------------------------------------------------------------
pure function getChunks(totalShape,chunkSize)
integer(HSIZE_T), dimension(:), intent(in) :: totalShape
integer(HSIZE_T), intent(in) :: chunkSize
integer(HSIZE_T), dimension(size(totalShape)) :: getChunks
getChunks = [totalShape(1:size(totalShape)-1),&
chunkSize/product(totalShape(1:size(totalShape)-1))]
end function getChunks
end module HDF5_Utilities end module HDF5_Utilities