compress.
Datasets are chunked along first timension. Chunk size (1MB for real) is probably not optimal
This commit is contained in:
parent
000de75617
commit
52e3fb50bc
|
@ -1132,6 +1132,7 @@ class Result:
|
|||
Arguments parsed to func.
|
||||
|
||||
"""
|
||||
chunk_size = 1024**2//8
|
||||
num_threads = damask.environment.options['DAMASK_NUM_THREADS']
|
||||
pool = mp.Pool(int(num_threads) if num_threads is not None else None)
|
||||
lock = mp.Manager().Lock()
|
||||
|
@ -1155,7 +1156,14 @@ class Result:
|
|||
dataset.attrs['Overwritten'] = 'Yes' if h5py3 else \
|
||||
'Yes'.encode()
|
||||
else:
|
||||
dataset = f[result[0]].create_dataset(result[1]['label'],data=result[1]['data'])
|
||||
if result[1]['data'].size >= chunk_size*2:
|
||||
shape = result[1]['data'].shape
|
||||
chunks = (chunk_size//np.prod(shape[1:]),)+shape[1:]
|
||||
dataset = f[result[0]].create_dataset(result[1]['label'],data=result[1]['data'],
|
||||
maxshape=shape,chunks=chunks,compression = 'gzip')
|
||||
else:
|
||||
dataset = f[result[0]].create_dataset(result[1]['label'],data=result[1]['data'],
|
||||
maxshape=result[1]['data'].shape)
|
||||
|
||||
now = datetime.datetime.now().astimezone()
|
||||
dataset.attrs['Created'] = now.strftime('%Y-%m-%d %H:%M:%S%z') if h5py3 else \
|
||||
|
|
|
@ -1850,10 +1850,10 @@ subroutine initialize_write(dset_id, filespace_id, memspace_id, plist_id, &
|
|||
totalShape !< shape of the dataset (all processes)
|
||||
integer(HID_T), intent(out) :: dset_id, filespace_id, memspace_id, plist_id
|
||||
|
||||
integer, dimension(worldsize) :: &
|
||||
writeSize !< contribution of all processes
|
||||
integer :: ierr
|
||||
integer :: hdferr
|
||||
integer, dimension(worldsize) :: writeSize !< contribution of all processes
|
||||
integer(HID_T) :: dcpl
|
||||
integer :: ierr, hdferr
|
||||
integer(HSIZE_T), parameter :: chunkSize = 1024_HSIZE_T**2/8_HSIZE_T
|
||||
|
||||
!-------------------------------------------------------------------------------------------------
|
||||
! creating a property list for transfer properties (is collective when reading in parallel)
|
||||
|
@ -1880,6 +1880,17 @@ subroutine initialize_write(dset_id, filespace_id, memspace_id, plist_id, &
|
|||
myStart(ubound(myStart)) = int(sum(writeSize(1:worldrank)),HSIZE_T)
|
||||
totalShape = [myShape(1:ubound(myShape,1)-1),int(sum(writeSize),HSIZE_T)]
|
||||
|
||||
!--------------------------------------------------------------------------------------------------
|
||||
! compress (and chunk) larger datasets
|
||||
call h5pcreate_f(H5P_DATASET_CREATE_F, dcpl, hdferr)
|
||||
if(hdferr < 0) error stop 'HDF5 error'
|
||||
if(product(totalShape) >= chunkSize*2_HSIZE_T) then
|
||||
call h5pset_chunk_f(dcpl, size(totalShape), getChunks(totalShape,chunkSize), hdferr)
|
||||
if(hdferr < 0) error stop 'HDF5 error'
|
||||
call h5pset_deflate_f(dcpl, 6, hdferr)
|
||||
if(hdferr < 0) error stop 'HDF5 error'
|
||||
endif
|
||||
|
||||
!--------------------------------------------------------------------------------------------------
|
||||
! create dataspace in memory (local shape) and in file (global shape)
|
||||
call h5screate_simple_f(size(myShape), myShape, memspace_id, hdferr, myShape)
|
||||
|
@ -1889,11 +1900,14 @@ subroutine initialize_write(dset_id, filespace_id, memspace_id, plist_id, &
|
|||
|
||||
!--------------------------------------------------------------------------------------------------
|
||||
! create dataset in the file and select a hyperslab from it (the portion of the current process)
|
||||
call h5dcreate_f(loc_id, trim(datasetName), datatype, filespace_id, dset_id, hdferr)
|
||||
call h5dcreate_f(loc_id, trim(datasetName), datatype, filespace_id, dset_id, hdferr, dcpl)
|
||||
if(hdferr < 0) error stop 'HDF5 error'
|
||||
call h5sselect_hyperslab_f(filespace_id, H5S_SELECT_SET_F, myStart, myShape, hdferr)
|
||||
if(hdferr < 0) error stop 'HDF5 error'
|
||||
|
||||
call h5pclose_f(dcpl , hdferr)
|
||||
if(hdferr < 0) error stop 'HDF5 error'
|
||||
|
||||
end subroutine initialize_write
|
||||
|
||||
|
||||
|
@ -1916,4 +1930,19 @@ subroutine finalize_write(plist_id, dset_id, filespace_id, memspace_id)
|
|||
|
||||
end subroutine finalize_write
|
||||
|
||||
|
||||
!--------------------------------------------------------------------------------------------------
|
||||
!> @brief determine chunk layout
|
||||
!--------------------------------------------------------------------------------------------------
|
||||
pure function getChunks(totalShape,chunkSize)
|
||||
|
||||
integer(HSIZE_T), dimension(:), intent(in) :: totalShape
|
||||
integer(HSIZE_T), intent(in) :: chunkSize
|
||||
integer(HSIZE_T), dimension(size(totalShape)) :: getChunks
|
||||
|
||||
getChunks = [totalShape(1:size(totalShape)-1),&
|
||||
chunkSize/product(totalShape(1:size(totalShape)-1))]
|
||||
|
||||
end function getChunks
|
||||
|
||||
end module HDF5_Utilities
|
||||
|
|
Loading…
Reference in New Issue