将时间序列数据存储在具有可扩展时间维度的 HDF5 中
Posted
技术标签:
【中文标题】将时间序列数据存储在具有可扩展时间维度的 HDF5 中【英文标题】:Storing time series data in HDF5 with extendable time dimension 【发布时间】:2020-02-21 15:52:09 【问题描述】:我想。我正在为向数据集写入(附加)额外数据部分的最后一步而苦苦挣扎。
我的代码中的主要数据结构是一个beads
的3D 数组。数组的第一个维度是笛卡尔坐标,第二个维度是珠子 ID,第三个维度是时间框架索引。我可以成功地将这个数组写入数据集。代码的第二部分是为额外的一个时间范围创建数据,扩展现有数据集并尝试将新的单个时间范围数据写入(附加)到扩展数据集。你能指出我正确的方向吗?我怀疑hyperslab
选择有误。也许元素的offset
或count
有问题?
示例代码:
PROGRAM H5_ARR_BEADS_EXT
USE HDF5
IMPLICIT NONE
! No. of coordinates, beads, time frames
integer, parameter :: NX = 3, NB = 10, NF = 5
! Beads array
real, allocatable :: beads(:,:,:)
! Coordinate arrays
real, allocatable :: gxx(:), gxy(:), gxz(:)
! File
integer(hid_t) :: fileID
character(len=8) :: fileName = "data.h5"
! Dataspace
integer :: spaceRank = 2
integer(size_t) :: spaceDims(2) = [NF, NB] ! frame, bead
integer(hsize_t) :: maxSpaceDims(2)
integer(hid_t) :: spaceID
! Memory space
integer :: memRank
integer(hsize_t) :: memDims(2)
integer(hid_t) :: memID
! Hyperslab
integer(hsize_t) :: offset(2), count(2)
! Datatype
integer :: arrTypeRank = 1
integer(size_t) :: arrTypeDims(1) = [NX]
integer(hid_t) :: arrTypeID, arrTypeWriteID
! Dataset
integer(size_t) :: dataSetDims(3) = [NF, NB, NX] ! frame, bead, coord
integer(hid_t) :: dataSetID
character(len=64) :: dataSetName
integer(hsize_t) :: dataSetExtDims(3)
! Property
integer(hid_t) :: propID
integer(hsize_t) :: chunkDims(2)
integer :: info ! I/O status
integer :: b, f ! counters
! coord, bead, frame
allocate(beads(dataSetDims(3),dataSetDims(2),dataSetDims(1)), &
gxx(dataSetDims(2)), gxy(dataSetDims(2)), gxz(dataSetDims(2)), &
stat=info)
! Initialise Fortran interface
call H5open_f(info)
! Create new file (with default properties)
call H5Fcreate_f(fileName, H5F_ACC_TRUNC_F, fileID, info)
!-----------------------------------------------------------------------
! Initialise data
! for each time frame
do f = 1, NF
! Initialise data arrays
call random_number(gxx)
call random_number(gxy)
call random_number(gxz)
! for each bead
do b = 1, NB
beads(:,b,f) = [gxx(b), gxy(b), gxz(b)]
print *, "Bead(",b,",",f,"):", beads(:,b,f)
end do
end do
! Create 2D array dataspace (frame, bead) with
! unlimited time frame dimension
! maxSpaceDims = [H5S_UNLIMITED_F, spaceDims(2)]
maxSpaceDims = [H5S_UNLIMITED_F, H5S_UNLIMITED_F]
call H5Screate_simple_f(spaceRank, spaceDims, spaceID, info, maxSpaceDims)
! Enable chunking
! chunkDims = [int(1,8), spaceDims(2)]
chunkDims = [int(1,8), int(1,8)]
call H5Pcreate_f(H5P_DATASET_CREATE_F, propID, info)
call H5Pset_chunk_f(propID, spaceRank, chunkDims, info)
! Create 1D array datatype (coord)
call H5Tarray_create_f(H5T_NATIVE_REAL, arrTypeRank, arrTypeDims, &
arrTypeID, info)
! Create dataset with chunking property
dataSetName = "Beads"
call H5Dcreate_f(fileID, dataSetName, arrTypeID, spaceID, dataSetID, &
info, propID)
! Write data into file
call H5Dwrite_f(dataSetID, arrTypeID, beads, dataSetDims, info)
!-----------------------------------------------------------------------
deallocate(beads, stat=info)
! coord, bead, frame
allocate(beads(dataSetDims(3),dataSetDims(2),1), &
stat=info)
print *, ""
print *, "----- ----- -----"
print *, ""
! Empty data array
beads = 0.0
! Initialise new data portion
! Initialise data arrays
call random_number(gxx)
call random_number(gxy)
call random_number(gxz)
! for each bead
do b = 1, NB
beads(:,b,1) = [gxx(b), gxy(b), gxz(b)]
print *, "Bead(",b,",",1,"):", beads(:,b,1)
end do
! Extend dataset
dataSetExtDims = [NF+1, NB, NX]
call H5Dset_extent_f(dataSetID, dataSetExtDims, info)
! Create 2D array memory space (frame, bead)
memRank = 2; memDims = [1, NB]
call H5Screate_simple_f(memRank, memDims, memID, info)
! Write to extended part of dataset
! Select hyperslab in dataspace
offset = [NF, 0]; count = [1, NB]
call H5Sselect_hyperslab_f(spaceID, H5S_SELECT_SET_F, offset, &
count, info)
! Write data
dataSetDims = [1, NB, NX]
call H5Dwrite_f(dataSetID, arrTypeID, beads(:,:,1), dataSetDims, info, &
memID, spaceID)
! Close dataset
call H5Dclose_f(dataSetID, info)
! Close datatype
call H5Tclose_f(arrTypeID, info)
! Close dataspace
call H5Sclose_f(spaceID, info)
! Close memory space
call H5Sclose_f(memID, info)
!-----------------------------------------------------------------------
! Close file
call h5fclose_f(fileID, info)
! Close Fortran interface
call h5close_f(info)
! Deallocate data arrays
deallocate(beads, gxx, gxy, gxz, stat=info)
END PROGRAM H5_ARR_BEADS_EXT
代码崩溃并显示以下错误消息:
HDF5-DIAG: Error detected in HDF5 (1.10.5) thread 0:
#000: ../../src/H5Dio.c line 322 in H5Dwrite(): could not get a validated dataspace from file_space_id
major: Invalid arguments to routine
minor: Bad value
#001: ../../src/H5S.c line 254 in H5S_get_validated_dataspace(): selection + offset not within extent
major: Dataspace
minor: Out of range
【问题讨论】:
您能否发布一个屏幕截图,说明数据集的外观(例如在 HDFView 中)并禁用导致程序崩溃的代码? 我查看了代码,数据和H5中索引的顺序似乎是相反的,这让我很困惑。我会研究 hyperslab 中索引的顺序,这看起来是错误的。 【参考方案1】:我已经发现了我的程序的问题:
元素的顺序应该是:坐标(NX)
,珠子(NB)
和框架(NF)
到处,例如spaceDims = [NB, NF]
和dataSetDims = [NX, NB, NF]
等
在追加新数据之前需要关闭和打开数据空间。
在附加数据之前,必须扩展数据空间,而不是扩展数据集。
请在下面找到更正的程序:
PROGRAM H5_ARR_BEADS_EXT
USE HDF5
IMPLICIT NONE
! No. of coordinates, beads, time frames
integer, parameter :: NX = 3, NB = 10, NF = 5
! Beads array
real, allocatable :: beads(:,:,:)
! Coordinate arrays
real, allocatable :: gxx(:), gxy(:), gxz(:)
! File
integer(hid_t) :: fileID
character(len=8) :: fileName = "data.h5"
! Dataspace
integer :: spaceRank = 2
integer(size_t) :: spaceDims(2) = [NB, NF] ! bead, frame
integer(hsize_t) :: maxSpaceDims(2)
integer(hid_t) :: spaceID
! Memory space
integer :: memRank
integer(hsize_t) :: memDims(2)
integer(hid_t) :: memID
! Hyperslab
integer(hsize_t) :: offset(2), count(2)
! Datatype
integer :: arrTypeRank = 1
integer(size_t) :: arrTypeDims(1) = [NX]
integer(hid_t) :: arrTypeID, arrTypeWriteID
! Dataset
integer(size_t) :: dataSetDims(3) = [NX, NB, NF] ! coord, bead, frame
integer(hid_t) :: dataSetID
character(len=64) :: dataSetName
integer(hsize_t) :: dataSetExtDims(3)
! Property
integer(hid_t) :: propID
integer(hsize_t) :: chunkDims(2)
integer :: info ! I/O status
integer :: b, f ! counters
! coord, bead, frame
allocate(beads(dataSetDims(1),dataSetDims(2),dataSetDims(3)), &
gxx(dataSetDims(2)), gxy(dataSetDims(2)), gxz(dataSetDims(2)), &
stat=info)
! Initialise Fortran interface
call H5open_f(info)
! Create new file (with default properties)
call H5Fcreate_f(fileName, H5F_ACC_TRUNC_F, fileID, info)
!-----------------------------------------------------------------------
! Initialise data
! for each time frame
do f = 1, NF
! Initialise data arrays
call random_number(gxx)
call random_number(gxy)
call random_number(gxz)
! for each bead
do b = 1, NB
beads(:,b,f) = [gxx(b), gxy(b), gxz(b)]
print *, "Bead(",b,",",f,"):", beads(:,b,f)
end do
end do
! Create 2D array dataspace (frame, bead) with
! unlimited time frame dimension
! maxSpaceDims = [H5S_UNLIMITED_F, spaceDims(2)]
maxSpaceDims = [H5S_UNLIMITED_F, H5S_UNLIMITED_F]
call H5Screate_simple_f(spaceRank, spaceDims, spaceID, info, maxSpaceDims)
! Enable chunking
! chunkDims = [int(1,8), spaceDims(2)]
chunkDims = [int(1,8), int(1,8)]
call H5Pcreate_f(H5P_DATASET_CREATE_F, propID, info)
call H5Pset_chunk_f(propID, spaceRank, chunkDims, info)
! Create 1D array datatype (coord)
call H5Tarray_create_f(H5T_NATIVE_REAL, arrTypeRank, arrTypeDims, &
arrTypeID, info)
! Create dataset with chunking property
dataSetName = "Beads"
call H5Dcreate_f(fileID, dataSetName, arrTypeID, spaceID, dataSetID, &
info, propID)
! Write data into file
call H5Dwrite_f(dataSetID, arrTypeID, beads, dataSetDims, info)
! Close dataset
call H5Dclose_f(dataSetID, info)
! Close dataspace
call H5Sclose_f(spaceID, info)
!-----------------------------------------------------------------------
deallocate(beads, stat=info)
! coord, bead, frame
allocate(beads(dataSetDims(1),dataSetDims(2),1), &
stat=info)
print *, ""
print *, "----- ----- -----"
print *, ""
! Empty data array
beads = 0.0
! Initialise new data portion
! Initialise data arrays
call random_number(gxx)
call random_number(gxy)
call random_number(gxz)
! for each bead
do b = 1, NB
beads(:,b,1) = [gxx(b), gxy(b), gxz(b)]
print *, "Bead(",b,",",1,"):", beads(:,b,1)
end do
! Get dataset
call H5Dopen_f(fileID, dataSetName, dataSetID, info)
! Extend space
spaceDims = [NB, NF+1]
call H5Dset_extent_f(dataSetID, spaceDims, info)
! Create 2D array memory space (frame, bead)
memRank = 2; memDims = [NB, 1]
call H5Screate_simple_f(memRank, memDims, memID, info)
! Get space
call H5Dget_space_f(dataSetID, spaceID, info)
! Write to extended part of dataset
! Select hyperslab in dataspace
offset = [0, NF]; count = [NB, 1]
call H5Sselect_hyperslab_f(spaceID, H5S_SELECT_SET_F, offset, &
count, info)
! Write data
dataSetDims = [NX, NB, 1]
call H5Dwrite_f(dataSetID, arrTypeID, beads(:,:,1), dataSetDims, info, &
memID, spaceID)
! Close dataset
call H5Dclose_f(dataSetID, info)
! Close datatype
call H5Tclose_f(arrTypeID, info)
! Close dataspace
call H5Sclose_f(spaceID, info)
! Close memory space
call H5Sclose_f(memID, info)
!-----------------------------------------------------------------------
! Close file
call h5fclose_f(fileID, info)
! Close Fortran interface
call h5close_f(info)
! Deallocate data arrays
deallocate(beads, gxx, gxy, gxz, stat=info)
END PROGRAM H5_ARR_BEADS_EXT
【讨论】:
【参考方案2】:在发布的代码中,用于创建数据集的数据空间(其标识符在 spaceID
中)被重新用于选择文件中的 hyperslab 以写入附加帧。但是数据集自创建以来已经扩展了范围 - 用于创建数据集的数据空间不再与数据集的扩展数据空间一致。
为 hyperslab 选择操作和后续写入使用适当的数据空间。增加数据集的范围后,可以通过在数据集上使用H5Dget_space_f
来获得合适的数据空间。
(索引也可能存在其他问题 - 但导致报告错误的是数据空间的重复使用。)
【讨论】:
以上是关于将时间序列数据存储在具有可扩展时间维度的 HDF5 中的主要内容,如果未能解决你的问题,请参考以下文章