BioHDF version 0.3 alpha
Scalable NGS Data Storage Based on HDF5

bioh5g_alignments.h

00001 /*****************************************************************************
00002  * Copyright by The HDF Group                                                *
00003  * All rights reserved.                                                      *
00004  *                                                                           *
00005  * This file is part of BioHDF.  The full BioHDF copyright notice, including *
00006  * terms governing use, modification, and redistribution, is contained in    *
00007  * the file COPYING.  COPYING can be found at the root of the source code    *
00008  * distribution tree.  If you do not have access to this file, you may       *
00009  * request a copy from help@hdfgroup.org.                                    *
00010  *****************************************************************************/
00011  
00020 #ifndef _BIOH5G_ALIGNMENTS_H
00021 #define _BIOH5G_ALIGNMENTS_H
00022 
00023 #include <stdio.h>
00024 
00025 #include "biohdf_api.h"
00026 #include "biohdf_error.h"
00027 #include "biohdf_file.h"
00028 #include "biohdf_utility.h"
00029 
00030 #include "bioh5g_api.h"
00031 #include "bioh5g_reads.h"
00032 
00033 
00034 
00035 /*****************************************************************************
00036  * Attribute Names                                                           *
00037  *****************************************************************************/
00038 
00039 
00040 
00042 #define BIOH5G_READS_PATH_ATTR "READS_PATH"
00043 
00044 
00045 
00047 #define BIOH5G_INDEX_METHOD_ATTR "INDEX_METHOD"
00048 
00049 
00050 
00051 /*****************************************************************************
00052  * Type definitions                                                          *
00053  *****************************************************************************/
00054 
00055 
00056 
00058 typedef struct _bioh5g_alignments *bioh5g_alignments;
00059 
00060 
00061 
00063 typedef struct _bioh5g_alignments_creation_properties *bioh5g_alignments_creation_properties;
00064 
00065 
00066 
00068 typedef struct _bioh5g_alignments_iterator *bioh5g_alignments_iterator;
00069 
00070 
00071 
00072 /*****************************************************************************
00073  * Structs and enums                                                         *
00074  *****************************************************************************/
00075 
00076 
00077 
00079 typedef enum
00080 {
00081     SAM_FORMAT      
00082 } bioh5g_alignments_format;
00083 
00084 
00085 
00087 typedef enum
00088 {
00089     UNINDEXED = 0,       
00090     REF_POS_SECONDARY = 1  
00092     /* REF_POS_PRIMARY = 2, */
00093     /* REF_POS_NCLIST_PRIMARY = 3, */
00094     /* REF_POS_NCLIST_PRIMARY = 4, */
00095 
00096 } bioh5g_alignments_index_method;
00097 
00098 
00099 
00100 /*****************************************************************************
00101  * Data container                                                            *
00102  *****************************************************************************/
00103 
00104 
00105 
00110 typedef struct
00111 {
00112     /* basic alignment data */
00113     biohdf_int64   read_index;   
00114     char           *reference;   
00115     biohdf_int32   position;     
00116     biohdf_int32   length;       
00118     /* SAM data */
00119     unsigned char  sam_mapq;     
00120     biohdf_int32   sam_flags;    
00121     char           *sam_cigar;   
00122     char           *sam_tags;    
00124     /* SAM template data */
00125     char           *sam_rnext;   
00126     biohdf_int32   sam_pnext;    
00127     biohdf_int32   sam_tlen;     
00129 } bioh5g_alignment_data;
00130 
00131 
00132 
00133 /*****************************************************************************
00134  * Create, open, close                                                       *
00135  *****************************************************************************/
00136 
00137 
00138 
00154 BIOHDF_API biohdf_error
00155 BIOH5Gcheck_alignments_presence(const biohdf_file file,
00156                                 const char *path,
00157                                 /*OUT*/ biohdf_bool *presence);
00158 
00159 
00160 
00173 BIOHDF_API biohdf_error 
00174 BIOH5Gcreate_alignments_collection(const biohdf_file file,
00175                                    const bioh5g_alignments_creation_properties props,
00176                                    const char *path,
00177                                    /*OUT*/ bioh5g_alignments *aligns);
00178 
00179 
00180 
00190 BIOHDF_API biohdf_error 
00191 BIOH5Gopen_alignments_collection(const biohdf_file file,
00192                                  const char *path,
00193                                  biohdf_open_mode mode,
00194                                  /*OUT*/ bioh5g_alignments *aligns);
00195 
00196 
00197 
00206 BIOHDF_API biohdf_error 
00207 BIOH5Gclose_alignments_collection(/*IN-OUT*/ bioh5g_alignments *aligns);
00208 
00209 
00210 
00223 BIOHDF_API biohdf_error 
00224 BIOH5Gget_reads_path(const bioh5g_alignments aligns,
00225                      /*OUT*/ char **reads_path);
00226 
00227 
00228 
00229 /*****************************************************************************
00230  * Append and read data                                                      *
00231  *****************************************************************************/
00232 
00233 
00234 
00242 BIOHDF_API biohdf_error
00243 BIOH5Gget_alignments_count(const bioh5g_alignments aligns,
00244                            /*OUT*/ biohdf_int64 *count);
00245 
00246 
00247 
00255 BIOHDF_API biohdf_error
00256 BIOH5Gcreate_alignments_iterator(const bioh5g_alignments aligns,
00257                                  /*OUT*/ bioh5g_alignments_iterator *iter);
00258 
00259 
00272 BIOHDF_API biohdf_error
00273 BIOH5Gadd_alignments_iterator_range_filter(bioh5g_alignments_iterator iter,
00274                                            const char *reference,
00275                                            biohdf_int32 start,
00276                                            biohdf_int32 end);
00277 
00278 
00279 
00293 BIOHDF_API biohdf_error
00294 BIOH5Gadd_alignments_iterator_mapq_filter(bioh5g_alignments_iterator iter,
00295                                           unsigned char min_mapq);
00296 
00297 
00298 
00311 BIOHDF_API biohdf_error
00312 BIOH5Gadd_alignments_iterator_flags_filter(bioh5g_alignments_iterator iter,
00313                                            biohdf_int32u mask);
00314 
00315 
00316 
00325 BIOHDF_API biohdf_error
00326 BIOH5Gdestroy_alignments_iterator(/*IN-OUT*/ bioh5g_alignments_iterator *iter);
00327 
00328 
00329 
00337 BIOHDF_API biohdf_error
00338 BIOH5Gadd_alignment(const bioh5g_alignments aligns,
00339                     const bioh5g_alignment_data *data);
00340 
00341 
00342 
00352 BIOHDF_API biohdf_error
00353 BIOH5Gget_index_of_last_added_alignment(const bioh5g_alignments aligns,
00354                                         /*OUT*/ biohdf_int64 *index);
00355 
00356 
00357 
00366 BIOHDF_API biohdf_error
00367 BIOH5Gget_next_alignment(bioh5g_alignments_iterator iter,
00368                          /*OUT*/ biohdf_int64 *index,
00369                          /*OUT*/ bioh5g_alignment_data **data);
00370 
00371 
00372 
00381 BIOHDF_API biohdf_error
00382 BIOH5Gget_alignment(const bioh5g_alignments aligns,
00383                     biohdf_int64 index,
00384                     /*OUT*/ bioh5g_alignment_data **data);
00385 
00386 
00387 
00396 BIOHDF_API biohdf_error
00397 BIOH5Gfree_alignment_data(/*IN-OUT*/ bioh5g_alignment_data **data);
00398 
00399 
00400 
00401 /*****************************************************************************
00402  * Alignment hit index functionality                                         *
00403  *****************************************************************************/
00404 
00405 
00406 
00417 BIOHDF_API biohdf_error
00418 BIOH5Gcreate_alignments_index(bioh5g_alignments aligns,
00419                               bioh5g_alignments_index_method method,
00420                               biohdf_index_creation_properties props);
00421 
00422 
00423 
00424 /*****************************************************************************
00425  * External file header storage                                              *
00426  *****************************************************************************/
00427 
00428 
00439 BIOHDF_API biohdf_error
00440 BIOH5Gstore_alignment_file_header(const bioh5g_alignments aligns,
00441                                   bioh5g_alignments_format format,
00442                                   const char *header);
00443 
00444 
00445 
00458 BIOHDF_API biohdf_error
00459 BIOH5Gget_alignment_file_header(const bioh5g_alignments aligns,
00460                                 /*OUT*/ bioh5g_alignments_format *format,
00461                                 /*OUT*/ char **header);
00462 
00463 
00464 
00465 /*****************************************************************************
00466  * Data formats                                                              *
00467  *****************************************************************************/
00468 
00469 
00483 BIOHDF_API biohdf_error
00484 BIOH5Gcreate_alignment_string(const bioh5g_alignment_data *alignment,
00485                               const bioh5g_read_data *read,
00486                               bioh5g_alignments_format format,
00487                               /*OUT*/ char **alignment_string);
00488 
00489 
00490 
00507 BIOHDF_API biohdf_error
00508 BIOH5Gwrite_alignment_to_stream(const bioh5g_alignment_data *alignment,
00509                                 const bioh5g_read_data *read,
00510                                 bioh5g_alignments_format format,
00511                                 FILE *stream);
00512 
00513 
00514 
00515 
00516 /*****************************************************************************
00517  * Accessor functions (needed for higher-language interoperation)            *
00518  *****************************************************************************/
00519 
00525 BIOHDF_API biohdf_error
00526 BIOH5Gcreate_alignment_data(/*OUT*/ bioh5g_alignment_data **data);
00527 
00528 
00529 
00530 BIOHDF_API biohdf_error
00531 BIOH5Gget_alignment_read_index(bioh5g_alignment_data *data,
00532                                /*OUT*/ biohdf_int64 *read_index);
00533 
00534 
00535 
00536 BIOHDF_API biohdf_error
00537 BIOH5Gset_alignment_read_index(bioh5g_alignment_data *data,
00538                                biohdf_int64 read_index);
00539 
00540 
00541 
00542 BIOHDF_API biohdf_error
00543 BIOH5Gget_alignment_reference(bioh5g_alignment_data *data,
00544                               /*OUT*/ char **reference);
00545 
00546 
00547 
00548 BIOHDF_API biohdf_error
00549 BIOH5Gset_alignment_reference(bioh5g_alignment_data *data,
00550                               char *reference);
00551 
00552 
00553 
00554 BIOHDF_API biohdf_error
00555 BIOH5Gget_alignment_position(bioh5g_alignment_data *data,
00556                              /*OUT*/ biohdf_int32 *position);
00557 
00558 
00559 
00560 BIOHDF_API biohdf_error
00561 BIOH5Gset_alignment_position(bioh5g_alignment_data *data,
00562                              biohdf_int32 position);
00563 
00564 
00565 
00566 BIOHDF_API biohdf_error
00567 BIOH5Gget_alignment_length(bioh5g_alignment_data *data,
00568                            /*OUT*/ biohdf_int32 *length);
00569 
00570 
00571 
00572 BIOHDF_API biohdf_error
00573 BIOH5Gset_alignment_length(bioh5g_alignment_data *data,
00574                            biohdf_int32 length);
00575 
00576 
00577 
00578 BIOHDF_API biohdf_error
00579 BIOH5Gget_alignment_sam_mapq(bioh5g_alignment_data *data,
00580                              /*OUT*/ unsigned char *sam_mapq);
00581 
00582 
00583 
00584 BIOHDF_API biohdf_error
00585 BIOH5Gset_alignment_sam_mapq(bioh5g_alignment_data *data,
00586                              unsigned char sam_mapq);
00587 
00588 
00589 
00590 BIOHDF_API biohdf_error
00591 BIOH5Gget_alignment_sam_flags(bioh5g_alignment_data *data,
00592                               /*OUT*/ biohdf_int32u *sam_flags);
00593 
00594 
00595 
00596 BIOHDF_API biohdf_error
00597 BIOH5Gset_alignment_sam_flags(bioh5g_alignment_data *data,
00598                               biohdf_int32u sam_flags);
00599 
00600 
00601 
00602 BIOHDF_API biohdf_error
00603 BIOH5Gget_alignment_sam_cigar(bioh5g_alignment_data *data,
00604                               /*OUT*/ char **sam_cigar);
00605 
00606 
00607 
00608 BIOHDF_API biohdf_error
00609 BIOH5Gset_alignment_sam_cigar(bioh5g_alignment_data *data,
00610                               char *sam_cigar);
00611 
00612 
00613 
00614 BIOHDF_API biohdf_error
00615 BIOH5Gget_alignment_sam_tags(bioh5g_alignment_data *data,
00616                              /*OUT*/ char **sam_tags);
00617 
00618 
00619 
00620 BIOHDF_API biohdf_error
00621 BIOH5Gset_alignment_sam_tags(bioh5g_alignment_data *data,
00622                              char *sam_tags);
00623 
00624 
00625 
00626 BIOHDF_API biohdf_error
00627 BIOH5Gget_alignment_sam_rnext(bioh5g_alignment_data *data,
00628                               /*OUT*/ char **sam_rnext);
00629 
00630 
00631 
00632 BIOHDF_API biohdf_error
00633 BIOH5Gset_alignment_sam_rnext(bioh5g_alignment_data *data,
00634                               char *sam_rnext);
00635 
00636 
00637 
00638 BIOHDF_API biohdf_error
00639 BIOH5Gget_alignment_sam_pnext(bioh5g_alignment_data *data,
00640                               /*OUT*/ biohdf_int32 *sam_pnext);
00641 
00642 
00643 
00644 BIOHDF_API biohdf_error
00645 BIOH5Gset_alignment_sam_pnext(bioh5g_alignment_data *data,
00646                               biohdf_int32 sam_pnext);
00647 
00648 
00649 
00650 BIOHDF_API biohdf_error
00651 BIOH5Gget_alignment_sam_tlen(bioh5g_alignment_data *data,
00652                              /*OUT*/ biohdf_int32 *sam_tlen);
00653 
00654 
00655 
00656 BIOHDF_API biohdf_error
00657 BIOH5Gset_alignment_sam_tlen(bioh5g_alignment_data *data,
00658                              biohdf_int32 sam_tlen);
00659 
00662 /*****************************************************************************
00663  * Alignments properties - create, destroy, access                           *
00664  *****************************************************************************/
00665 
00671 BIOHDF_API biohdf_error
00672 BIOH5Gcreate_alignments_properties(/*OUT*/ bioh5g_alignments_creation_properties *props);
00673 
00674 
00675 
00676 BIOHDF_API biohdf_error
00677 BIOH5Gdestroy_alignments_properties(/*OUT*/ bioh5g_alignments_creation_properties *props);
00678 
00679 
00680 
00681 BIOHDF_API biohdf_error
00682 BIOH5Gset_alignments_properties_reads_path(bioh5g_alignments_creation_properties props,
00683                                            char *reads_path);
00684 
00685 
00686 
00687 BIOHDF_API biohdf_error
00688 BIOH5Gset_alignments_properties_refs_scheme(bioh5g_alignments_creation_properties props,
00689                                             biohdf_string_storage_scheme scheme);
00690 
00691 
00692 
00693 BIOHDF_API biohdf_error
00694 BIOH5Gset_alignments_properties_tags_scheme(bioh5g_alignments_creation_properties props,
00695                                                  biohdf_string_storage_scheme scheme);
00696 
00697 
00698 
00699 BIOHDF_API biohdf_error
00700 BIOH5Gset_alignments_properties_cigar_scheme(bioh5g_alignments_creation_properties props,
00701                                              biohdf_string_storage_scheme scheme);
00702 
00703 
00704 
00705 BIOHDF_API biohdf_error
00706 BIOH5Gset_alignments_properties_refs_length(bioh5g_alignments_creation_properties props,
00707                                             size_t length);
00708 
00709 
00710 
00711 BIOHDF_API biohdf_error
00712 BIOH5Gset_alignments_properties_tags_length(bioh5g_alignments_creation_properties props,
00713                                             size_t length);
00714 
00715 
00716 
00717 BIOHDF_API biohdf_error
00718 BIOH5Gset_alignments_properties_cigar_length(bioh5g_alignments_creation_properties props,
00719                                              size_t length);
00720 
00721 
00722 
00723 BIOHDF_API biohdf_error
00724 BIOH5Gset_alignments_properties_chunk_size(bioh5g_alignments_creation_properties props,
00725                                            biohdf_int64 chunk_size);
00726 
00727 
00728 
00729 BIOHDF_API biohdf_error
00730 BIOH5Gset_alignments_properties_compression_level(bioh5g_alignments_creation_properties props,
00731                                                   compression_level level);
00732 
00733 
00737 #endif
 All Data Structures Variables