|
BioHDF version 0.3 alpha
Scalable NGS Data Storage Based on HDF5
|
00001 /***************************************************************************** 00002 * Copyright by The HDF Group * 00003 * All rights reserved. * 00004 * * 00005 * This file is part of BioHDF. The full BioHDF copyright notice, including * 00006 * terms governing use, modification, and redistribution, is contained in * 00007 * the file COPYING. COPYING can be found at the root of the source code * 00008 * distribution tree. If you do not have access to this file, you may * 00009 * request a copy from help@hdfgroup.org. * 00010 *****************************************************************************/ 00011 00020 #ifndef _BIOH5G_ALIGNMENTS_H 00021 #define _BIOH5G_ALIGNMENTS_H 00022 00023 #include <stdio.h> 00024 00025 #include "biohdf_api.h" 00026 #include "biohdf_error.h" 00027 #include "biohdf_file.h" 00028 #include "biohdf_utility.h" 00029 00030 #include "bioh5g_api.h" 00031 #include "bioh5g_reads.h" 00032 00033 00034 00035 /***************************************************************************** 00036 * Attribute Names * 00037 *****************************************************************************/ 00038 00039 00040 00042 #define BIOH5G_READS_PATH_ATTR "READS_PATH" 00043 00044 00045 00047 #define BIOH5G_INDEX_METHOD_ATTR "INDEX_METHOD" 00048 00049 00050 00051 /***************************************************************************** 00052 * Type definitions * 00053 *****************************************************************************/ 00054 00055 00056 00058 typedef struct _bioh5g_alignments *bioh5g_alignments; 00059 00060 00061 00063 typedef struct _bioh5g_alignments_creation_properties *bioh5g_alignments_creation_properties; 00064 00065 00066 00068 typedef struct _bioh5g_alignments_iterator *bioh5g_alignments_iterator; 00069 00070 00071 00072 /***************************************************************************** 00073 * Structs and enums * 00074 *****************************************************************************/ 00075 00076 00077 00079 typedef enum 00080 { 00081 SAM_FORMAT 00082 } bioh5g_alignments_format; 00083 00084 00085 00087 typedef enum 00088 { 00089 UNINDEXED = 0, 00090 REF_POS_SECONDARY = 1 00092 /* REF_POS_PRIMARY = 2, */ 00093 /* REF_POS_NCLIST_PRIMARY = 3, */ 00094 /* REF_POS_NCLIST_PRIMARY = 4, */ 00095 00096 } bioh5g_alignments_index_method; 00097 00098 00099 00100 /***************************************************************************** 00101 * Data container * 00102 *****************************************************************************/ 00103 00104 00105 00110 typedef struct 00111 { 00112 /* basic alignment data */ 00113 biohdf_int64 read_index; 00114 char *reference; 00115 biohdf_int32 position; 00116 biohdf_int32 length; 00118 /* SAM data */ 00119 unsigned char sam_mapq; 00120 biohdf_int32 sam_flags; 00121 char *sam_cigar; 00122 char *sam_tags; 00124 /* SAM template data */ 00125 char *sam_rnext; 00126 biohdf_int32 sam_pnext; 00127 biohdf_int32 sam_tlen; 00129 } bioh5g_alignment_data; 00130 00131 00132 00133 /***************************************************************************** 00134 * Create, open, close * 00135 *****************************************************************************/ 00136 00137 00138 00154 BIOHDF_API biohdf_error 00155 BIOH5Gcheck_alignments_presence(const biohdf_file file, 00156 const char *path, 00157 /*OUT*/ biohdf_bool *presence); 00158 00159 00160 00173 BIOHDF_API biohdf_error 00174 BIOH5Gcreate_alignments_collection(const biohdf_file file, 00175 const bioh5g_alignments_creation_properties props, 00176 const char *path, 00177 /*OUT*/ bioh5g_alignments *aligns); 00178 00179 00180 00190 BIOHDF_API biohdf_error 00191 BIOH5Gopen_alignments_collection(const biohdf_file file, 00192 const char *path, 00193 biohdf_open_mode mode, 00194 /*OUT*/ bioh5g_alignments *aligns); 00195 00196 00197 00206 BIOHDF_API biohdf_error 00207 BIOH5Gclose_alignments_collection(/*IN-OUT*/ bioh5g_alignments *aligns); 00208 00209 00210 00223 BIOHDF_API biohdf_error 00224 BIOH5Gget_reads_path(const bioh5g_alignments aligns, 00225 /*OUT*/ char **reads_path); 00226 00227 00228 00229 /***************************************************************************** 00230 * Append and read data * 00231 *****************************************************************************/ 00232 00233 00234 00242 BIOHDF_API biohdf_error 00243 BIOH5Gget_alignments_count(const bioh5g_alignments aligns, 00244 /*OUT*/ biohdf_int64 *count); 00245 00246 00247 00255 BIOHDF_API biohdf_error 00256 BIOH5Gcreate_alignments_iterator(const bioh5g_alignments aligns, 00257 /*OUT*/ bioh5g_alignments_iterator *iter); 00258 00259 00272 BIOHDF_API biohdf_error 00273 BIOH5Gadd_alignments_iterator_range_filter(bioh5g_alignments_iterator iter, 00274 const char *reference, 00275 biohdf_int32 start, 00276 biohdf_int32 end); 00277 00278 00279 00293 BIOHDF_API biohdf_error 00294 BIOH5Gadd_alignments_iterator_mapq_filter(bioh5g_alignments_iterator iter, 00295 unsigned char min_mapq); 00296 00297 00298 00311 BIOHDF_API biohdf_error 00312 BIOH5Gadd_alignments_iterator_flags_filter(bioh5g_alignments_iterator iter, 00313 biohdf_int32u mask); 00314 00315 00316 00325 BIOHDF_API biohdf_error 00326 BIOH5Gdestroy_alignments_iterator(/*IN-OUT*/ bioh5g_alignments_iterator *iter); 00327 00328 00329 00337 BIOHDF_API biohdf_error 00338 BIOH5Gadd_alignment(const bioh5g_alignments aligns, 00339 const bioh5g_alignment_data *data); 00340 00341 00342 00352 BIOHDF_API biohdf_error 00353 BIOH5Gget_index_of_last_added_alignment(const bioh5g_alignments aligns, 00354 /*OUT*/ biohdf_int64 *index); 00355 00356 00357 00366 BIOHDF_API biohdf_error 00367 BIOH5Gget_next_alignment(bioh5g_alignments_iterator iter, 00368 /*OUT*/ biohdf_int64 *index, 00369 /*OUT*/ bioh5g_alignment_data **data); 00370 00371 00372 00381 BIOHDF_API biohdf_error 00382 BIOH5Gget_alignment(const bioh5g_alignments aligns, 00383 biohdf_int64 index, 00384 /*OUT*/ bioh5g_alignment_data **data); 00385 00386 00387 00396 BIOHDF_API biohdf_error 00397 BIOH5Gfree_alignment_data(/*IN-OUT*/ bioh5g_alignment_data **data); 00398 00399 00400 00401 /***************************************************************************** 00402 * Alignment hit index functionality * 00403 *****************************************************************************/ 00404 00405 00406 00417 BIOHDF_API biohdf_error 00418 BIOH5Gcreate_alignments_index(bioh5g_alignments aligns, 00419 bioh5g_alignments_index_method method, 00420 biohdf_index_creation_properties props); 00421 00422 00423 00424 /***************************************************************************** 00425 * External file header storage * 00426 *****************************************************************************/ 00427 00428 00439 BIOHDF_API biohdf_error 00440 BIOH5Gstore_alignment_file_header(const bioh5g_alignments aligns, 00441 bioh5g_alignments_format format, 00442 const char *header); 00443 00444 00445 00458 BIOHDF_API biohdf_error 00459 BIOH5Gget_alignment_file_header(const bioh5g_alignments aligns, 00460 /*OUT*/ bioh5g_alignments_format *format, 00461 /*OUT*/ char **header); 00462 00463 00464 00465 /***************************************************************************** 00466 * Data formats * 00467 *****************************************************************************/ 00468 00469 00483 BIOHDF_API biohdf_error 00484 BIOH5Gcreate_alignment_string(const bioh5g_alignment_data *alignment, 00485 const bioh5g_read_data *read, 00486 bioh5g_alignments_format format, 00487 /*OUT*/ char **alignment_string); 00488 00489 00490 00507 BIOHDF_API biohdf_error 00508 BIOH5Gwrite_alignment_to_stream(const bioh5g_alignment_data *alignment, 00509 const bioh5g_read_data *read, 00510 bioh5g_alignments_format format, 00511 FILE *stream); 00512 00513 00514 00515 00516 /***************************************************************************** 00517 * Accessor functions (needed for higher-language interoperation) * 00518 *****************************************************************************/ 00519 00525 BIOHDF_API biohdf_error 00526 BIOH5Gcreate_alignment_data(/*OUT*/ bioh5g_alignment_data **data); 00527 00528 00529 00530 BIOHDF_API biohdf_error 00531 BIOH5Gget_alignment_read_index(bioh5g_alignment_data *data, 00532 /*OUT*/ biohdf_int64 *read_index); 00533 00534 00535 00536 BIOHDF_API biohdf_error 00537 BIOH5Gset_alignment_read_index(bioh5g_alignment_data *data, 00538 biohdf_int64 read_index); 00539 00540 00541 00542 BIOHDF_API biohdf_error 00543 BIOH5Gget_alignment_reference(bioh5g_alignment_data *data, 00544 /*OUT*/ char **reference); 00545 00546 00547 00548 BIOHDF_API biohdf_error 00549 BIOH5Gset_alignment_reference(bioh5g_alignment_data *data, 00550 char *reference); 00551 00552 00553 00554 BIOHDF_API biohdf_error 00555 BIOH5Gget_alignment_position(bioh5g_alignment_data *data, 00556 /*OUT*/ biohdf_int32 *position); 00557 00558 00559 00560 BIOHDF_API biohdf_error 00561 BIOH5Gset_alignment_position(bioh5g_alignment_data *data, 00562 biohdf_int32 position); 00563 00564 00565 00566 BIOHDF_API biohdf_error 00567 BIOH5Gget_alignment_length(bioh5g_alignment_data *data, 00568 /*OUT*/ biohdf_int32 *length); 00569 00570 00571 00572 BIOHDF_API biohdf_error 00573 BIOH5Gset_alignment_length(bioh5g_alignment_data *data, 00574 biohdf_int32 length); 00575 00576 00577 00578 BIOHDF_API biohdf_error 00579 BIOH5Gget_alignment_sam_mapq(bioh5g_alignment_data *data, 00580 /*OUT*/ unsigned char *sam_mapq); 00581 00582 00583 00584 BIOHDF_API biohdf_error 00585 BIOH5Gset_alignment_sam_mapq(bioh5g_alignment_data *data, 00586 unsigned char sam_mapq); 00587 00588 00589 00590 BIOHDF_API biohdf_error 00591 BIOH5Gget_alignment_sam_flags(bioh5g_alignment_data *data, 00592 /*OUT*/ biohdf_int32u *sam_flags); 00593 00594 00595 00596 BIOHDF_API biohdf_error 00597 BIOH5Gset_alignment_sam_flags(bioh5g_alignment_data *data, 00598 biohdf_int32u sam_flags); 00599 00600 00601 00602 BIOHDF_API biohdf_error 00603 BIOH5Gget_alignment_sam_cigar(bioh5g_alignment_data *data, 00604 /*OUT*/ char **sam_cigar); 00605 00606 00607 00608 BIOHDF_API biohdf_error 00609 BIOH5Gset_alignment_sam_cigar(bioh5g_alignment_data *data, 00610 char *sam_cigar); 00611 00612 00613 00614 BIOHDF_API biohdf_error 00615 BIOH5Gget_alignment_sam_tags(bioh5g_alignment_data *data, 00616 /*OUT*/ char **sam_tags); 00617 00618 00619 00620 BIOHDF_API biohdf_error 00621 BIOH5Gset_alignment_sam_tags(bioh5g_alignment_data *data, 00622 char *sam_tags); 00623 00624 00625 00626 BIOHDF_API biohdf_error 00627 BIOH5Gget_alignment_sam_rnext(bioh5g_alignment_data *data, 00628 /*OUT*/ char **sam_rnext); 00629 00630 00631 00632 BIOHDF_API biohdf_error 00633 BIOH5Gset_alignment_sam_rnext(bioh5g_alignment_data *data, 00634 char *sam_rnext); 00635 00636 00637 00638 BIOHDF_API biohdf_error 00639 BIOH5Gget_alignment_sam_pnext(bioh5g_alignment_data *data, 00640 /*OUT*/ biohdf_int32 *sam_pnext); 00641 00642 00643 00644 BIOHDF_API biohdf_error 00645 BIOH5Gset_alignment_sam_pnext(bioh5g_alignment_data *data, 00646 biohdf_int32 sam_pnext); 00647 00648 00649 00650 BIOHDF_API biohdf_error 00651 BIOH5Gget_alignment_sam_tlen(bioh5g_alignment_data *data, 00652 /*OUT*/ biohdf_int32 *sam_tlen); 00653 00654 00655 00656 BIOHDF_API biohdf_error 00657 BIOH5Gset_alignment_sam_tlen(bioh5g_alignment_data *data, 00658 biohdf_int32 sam_tlen); 00659 00662 /***************************************************************************** 00663 * Alignments properties - create, destroy, access * 00664 *****************************************************************************/ 00665 00671 BIOHDF_API biohdf_error 00672 BIOH5Gcreate_alignments_properties(/*OUT*/ bioh5g_alignments_creation_properties *props); 00673 00674 00675 00676 BIOHDF_API biohdf_error 00677 BIOH5Gdestroy_alignments_properties(/*OUT*/ bioh5g_alignments_creation_properties *props); 00678 00679 00680 00681 BIOHDF_API biohdf_error 00682 BIOH5Gset_alignments_properties_reads_path(bioh5g_alignments_creation_properties props, 00683 char *reads_path); 00684 00685 00686 00687 BIOHDF_API biohdf_error 00688 BIOH5Gset_alignments_properties_refs_scheme(bioh5g_alignments_creation_properties props, 00689 biohdf_string_storage_scheme scheme); 00690 00691 00692 00693 BIOHDF_API biohdf_error 00694 BIOH5Gset_alignments_properties_tags_scheme(bioh5g_alignments_creation_properties props, 00695 biohdf_string_storage_scheme scheme); 00696 00697 00698 00699 BIOHDF_API biohdf_error 00700 BIOH5Gset_alignments_properties_cigar_scheme(bioh5g_alignments_creation_properties props, 00701 biohdf_string_storage_scheme scheme); 00702 00703 00704 00705 BIOHDF_API biohdf_error 00706 BIOH5Gset_alignments_properties_refs_length(bioh5g_alignments_creation_properties props, 00707 size_t length); 00708 00709 00710 00711 BIOHDF_API biohdf_error 00712 BIOH5Gset_alignments_properties_tags_length(bioh5g_alignments_creation_properties props, 00713 size_t length); 00714 00715 00716 00717 BIOHDF_API biohdf_error 00718 BIOH5Gset_alignments_properties_cigar_length(bioh5g_alignments_creation_properties props, 00719 size_t length); 00720 00721 00722 00723 BIOHDF_API biohdf_error 00724 BIOH5Gset_alignments_properties_chunk_size(bioh5g_alignments_creation_properties props, 00725 biohdf_int64 chunk_size); 00726 00727 00728 00729 BIOHDF_API biohdf_error 00730 BIOH5Gset_alignments_properties_compression_level(bioh5g_alignments_creation_properties props, 00731 compression_level level); 00732 00733 00737 #endif
1.7.3