DDL in BNF for HDF5

1. Introduction

This document contains the data description language (DDL) for an HDF5 file. The description is in Backus-Naur Form.

2. Explanation of Symbols

This section contains a brief explanation of the symbols used in the DDL.
    ::=                      defined as
    <tname>                  a token with the name tname
    <a> | <b>                one of <a> or <b>
    <a>opt                    zero or one occurrence of <a>
    <a>*                     zero or more occurrence of <a>
    <a>+                     one or more occurrence of <a>
    [0-9]                    an element in the range between 0 and 9
    `['                      the token within the quotes (used for special characters)
    TBD                      To Be Decided

3. The DDL

<file> ::= HDF5 <file_name> { <file_super_block>opt 
                              <root_group>  }

<file_name> ::= <identifier>

<file_super_block> ::= SUPER_BLOCK { <super_block_content> <file_istore>  <file_driver>* }
<super_block_content> ::= SUPERBLOCK_VERSION <superblock_version> FREELIST_VERSION <freelist_version> 
                          SYMBOLTABLE_VERSION <symboltable_version> 
                          OBJECTHEADER_VERSION <objectheader_version>
                          USERBLOCK_SIZE <userblock_size> OFFSET_SIZE <offset_size> 
                          LENGTH_SIZE <length_size> BTREE_RANK <btree_rank> 
                          BTREE_LEAF <btree_leaf>
<superblock_version> ::= <integer> 
<freelist_version> ::= <integer> 
<objectheader_version> ::= <integer> 
<symboltable_version> ::= <integer> 
<userblock_size> ::= <integer> 
<offset_size> ::= <integer> 
<length_size> ::= <integer> 
<btree_rank> ::= <integer>
<btree_leaf> ::= <integer>
<file_driver> ::= <file_driver_identifier>
<file_driver_identifier> ::= FILE_DRIVER H5FD_CORE |
 FILE_DRIVER H5FD_DPSS |
 FILE_DRIVER H5FD_FAMILY |
 FILE_DRIVER H5FD_GASS |
 FILE_DRIVER H5FD_LOG |
 FILE_DRIVER H5FD_MPIO |
 FILE_DRIVER H5FD_MULTI |
 FILE_DRIVER H5FD_SEC2 |
 FILE_DRIVER H5FD_STDIO
<file_istore> ::= <integer> 

<root_group> ::= GROUP "/" {
                           <unamed_datatype>*
                           <object_id>opt
                           <group_comment>opt
                           <group_attribute>*
                           <group_member>*
                       }

<datatype> ::= <atomic_type> | <compound_type> | <variable_length_type> | <array_type>

<unamed_datatype> ::= DATATYPE <unamed_type_name> { <datatype> }

<unamed_type_name> ::= the assigned name for unamed type is in the form of 
                       #oid1:oid2, where oid1 and oid2 are the object ids
                       of the type

<atomic_type> ::= <integer>  | <float>  | <time>      | <string> |
                  <bitfield> | <opaque> | <reference> | <enum>

<integer> ::=  H5T_STD_I8BE     | H5T_STD_I8LE      |
               H5T_STD_I16BE    | H5T_STD_I16LE     |
               H5T_STD_I32BE    | H5T_STD_I32LE     |
               H5T_STD_I64BE    | H5T_STD_I64LE     |
               H5T_STD_U8BE     | H5T_STD_U8LE      |
               H5T_STD_U16BE    | H5T_STD_U16LE     |
               H5T_STD_U32BE    | H5T_STD_U32LE     |
               H5T_STD_U64BE    | H5T_STD_U64LE     |
               H5T_NATIVE_CHAR  | H5T_NATIVE_UCHAR  |
               H5T_NATIVE_SHORT | H5T_NATIVE_USHORT |
               H5T_NATIVE_INT   | H5T_NATIVE_UINT   |
               H5T_NATIVE_LONG  | H5T_NATIVE_ULONG  |
               H5T_NATIVE_LLONG | H5T_NATIVE_ULLONG

<float> ::= H5T_IEEE_F32BE   | H5T_IEEE_F32LE     |
            H5T_IEEE_F64BE   | H5T_IEEE_F64LE     |
            H5T_NATIVE_FLOAT |  H5T_NATIVE_DOUBLE |
            H5T_NATIVE_LDOUBLE

<time> ::= TBD

<string> ::= H5T_STRING { STRSIZE <strsize> ;
               STRPAD <strpad> ;
               CSET <cset> ;
               CTYPE <ctype> ; }  

<strsize> ::= <int_value>

<strpad> ::= H5T_STR_NULLTERM | H5T_STR_NULLPAD | H5T_STR_SPACEPAD

<cset> ::= H5T_CSET_ASCII

<ctype> ::= H5T_C_S1 | H5T_FORTRAN_S1

<bitfield> ::= TBD

<opaque> ::= H5T_OPAQUE { <identifier> }

<reference> ::= H5T_REFERENCE { <ref_type> }

<ref_type> ::= H5T_STD_REF_OBJECT | H5T_STD_REF_DSETREG

<compound_type> ::= H5T_COMPOUND { <member_type_def>+ }

<member_type_def> ::= <datatype> <field_name> ;

<field_name> ::= <identifier>

<variable_length_type> ::= H5T_VLEN { <datatype> }

<array_type> ::= H5T_ARRAY { <dim_sizes> <datatype> }

<dim_sizes> ::= `['<dimsize>`]' | `['<dimsize>`]'<dim_sizes>

<dimsize> ::= <int_value>

<attribute> ::= ATTRIBUTE <attr_name> { <dataset_type>    
                                        <dataset_space>
                                        <data>opt  } 

<attr_name> ::= <identifier>

<dataset_type> ::= DATATYPE <path_name> | <datatype>

<enum> ::= H5T_ENUM { <enum_base_type> <enum_def>+  }

<enum_base_type> ::= <integer>
// Currently enums can only hold integer type data, but they may be expanded
// in the future to hold any datatype

<enum_def> ::= <enum_symbol> <enum_val>;

<enum_symbol> ::= <identifier>

<enum_val> ::= <int_value>

<path_name> ::= <path_part>+

<path_part> ::= /<identifier>

<dataspace> ::= <scalar_space> | <simple_space> | <complex_space> | <null_space>

<scalar_space> ::= SCALAR
<null_space> ::= NULL //only to 1.7

<simple_space> ::= SIMPLE { <current_dims> / <max_dims> }

<complex_space> ::= COMPLEX { <complex_space_definition> }

<dataset_space> ::= DATASPACE <path_name> | <dataspace>

<current_dims> ::= <dims>

<max_dims> ::= `(' <max_dim_list> `)'

<max_dim_list> ::= <max_dim> | <max_dim>, <max_dim_list>

<max_dim> ::= <int_value> | H5S_UNLIMITED

<complex_space_definition> ::= TBD

<data> ::= DATA { <scalar_space_data> | <simple_space_data> | <complex_space_data> } | <subset>

<scalar_space_data> ::= <any_element>

<any_element> ::= <atomic_element> | <compound_element> | 
                  <variable_length_element> | <array_element>

<any_data_seq> ::= <any_element> | <any_element>, <any_data_seq>

<atomic_element> :: = <integer_data> | <float_data>    | <time_data>   |
                      <string_data>  | <bitfield_data> | <opaque_data> |
                      <enum_data>    | <reference_data>

<subset> ::= SUBSET { <start>;
                      <stride>;
                      <count>;
                      <block>;
                 DATA { <simple_space_data> }
             }

<start> ::= START (<coor_list>)

<stride> ::= STRIDE (<pos_list>)

<count> ::= COUNT (<coor_list>)

<block> ::= BLOCK (<coor_list>)

<coor_list> ::= <int_value>, <coor_list> | <int_value>

<integer_data> ::= <int_value>

<float_data> ::= a floating point number

<time_data> ::= TBD

<string_data> ::= a string
// A string is enclosed in double quotes. 
// If a string is displayed on more than one line, string concatenate
// operator '//'is used.

<bitfield_data> ::= TBD

<opaque_data> ::= TBD

<enum_data> ::= <enum_symbol>

<reference_data> ::= <object_ref_data> | <data_region_data> | NULL

<object_ref_data> ::= <object_type> <object_num>

<object_type> ::= DATASET | GROUP | DATATYPE

<object_id> ::= OBJECTID { <object_num> }

<object_num> ::= <int_value>:<int_value> | <int_value>

<data_region_data> ::= H5T_STD_REF_DSETREG <object_num> { <data_region_data_list> }

<data_region_data_list> ::= <data_region_data_info>, <data_region_data_list> | <data_region_data_info>

<data_region_data_info> ::= <region_info> | <point_info>

<region_info> ::= (<region_vals>)

<region_vals> ::= <lower_bound>:<upper_bound>, <region_vals> | <lower_bound>:<upper_bound>

<lower_bound> ::= <int_value>

<upper_bound> ::= <int_value>

<point_info> ::= (<point_vals>)

<point_vals> ::= <int_value> | <int_value>, <point_vals>

<compound_element> ::= { <any_data_seq> }

<atomic_simple_data> :: = <atomic_element>, <atomic_simple_data> | <atomic_element>

<simple_space_data> :: = <any_data_seq>

<variable_length_element> ::= ( <any_data_seq> )

<array_element> ::= `[' <any_data_seq> `]'

<complex_space_data> ::= TBD

<named_datatype> ::= DATATYPE <type_name> { <datatype> }

<type_name> ::= <identifier>

<named_dataspace> ::= TBD

<hardlink> ::= HARDLINK <path_name> 

<group> ::= GROUP <group_name> { <hardlink> | <group_info> }

<group_comment> ::= COMMENT <string_data>
            
<group_name> ::= <identifier>

<group_info> ::= <object_id>opt <group_comment>opt <group_attribute>* <group_member>* 
            
<group_attribute> ::= <attribute> 

<group_member> ::= <named_datatype> | <named_dataspace> | <group> |
                   <dataset> | <softlink>

<dataset> ::= DATASET <dataset_name> { <hardlink> | <dataset_info> }

<dataset_info> ::= <dataset_name> <comment>opt <dataset_type>  <dataset_space> 
                   <storagelayout>opt
                   <filters>* <fill_value>opt <object_id>opt <dataset_attribute>* 
                              <data>opt
// Tokens must be in this order 

<dataset_name> ::= <identifier>
<comment>          :: = COMMENT <comment_string>
<comment_string>   :: = a string
<storagelayout> :: = STORAGE_LAYOUT { <contiguous_layout>  |  
                            <chunked_layout>     |
                            <compact_layout>     |
                            <external_layout> }

<contiguous_layout> ::= CONTIGUOUS    // default

<chunked_layout> ::=  CHUNKED <chunk_dims> 
<chunk_dims> ::= <dims>
<compact_layout> ::=  COMPACT
<external_layout> ::=  EXTERNAL <external_info>
<external_info> ::= {OFFSET <offset> FILENAME <file_name> SIZE <file_size> }
<offset> ::= integer
<file_name> ::= string
<file_size> ::= integer //size in Kb
<filters> :: = FILTERS { 
                <shuffle_filter>    
                <fletcher32_filter>     
                <deflate_filter>     
                <szip_filter> 
                <user_defined_filter>
                }

<shuffle_filter> ::= SHUFFLE | PREPROCESSING SHUFFLE
<fletcher32_filter> ::= FLETCHER32 | CHECKSUM FLETCHER32
<deflate_filter> ::= DEFLATE <LEVEL <deflate_level>> |
                     COMPRESSION DEFLATE <LEVEL <deflate_level>>
<deflate_level> ::= [1-9]
<szip_filter> ::= SZIP <PIXELS PER BLOCK <pixels_per_block>, MODE <mode>, 
                  CODING <coding>, BYTE ORDER <byte_order>, HEADER <header>opt|
                  COMPRESSION SZIP <PIXELS PER BLOCK <pixels_per_block>, MODE <mode>, 
                  CODING <coding>, BYTE ORDER <byte_order>, HEADER <header>opt
<pixels_per_block> ::= 2|4|6|8|10|12|14|16|18|20|22|24|26|28|30|32
<mode> ::= HARDWARE | K13
<coding> ::= ENTROPY | NN
<byte_order> ::= LSB | MSB
<header> ::= raw
<user_defined_filter> ::= UNKNOWN_FILTER <filter_id> PARAMS {<filter_params>}
<filter_id> ::= integer
<filter_params> ::= comma separated numeric values
<fill_info> :: = FILLVALUE FILL_TIME <fill_time> ALLOC_TIME  <alloc_time> VALUE <fill_value>
<fill_time> :: = IFSET | ALLOC | NEVER
<alloc_time> :: = EARLY | INCR | LATE
<fill_value> :: = {<data>}
<dims> ::= (<dims_values>)

<dims_values> ::= <int_value> | <int_value>, <dims_values>

<compact_layout> ::= TBD           

<external_layout> ::= {EXTERNAL <external_file>+ }

<external_file> ::= (<file_name> <offset> <size>) 

<offset> ::= <int_value>

<size> ::= <int_value>

<compression> :: = COMPRESSION { TBD }  

<dataset_attribute> ::= <attribute> 

<softlink> ::= SOFTLINK <softlink_name> { LINKTARGET <target> }

<softlink_name> ::= <identifier>

<target> ::= <identifier>

<identifier> ::= a string
// character '/' should be used with care. 

<pos_list>  ::= <pos_int>, <pos_list> | <pos_int>

<int_value> ::= 0 | <pos_int>

<pos_int>   ::= [1-9][0-9]*

 

 
Last modified: Wednesday, May 12, 2004