Browse Source

improving docs...

remotes/georgeg/no_streams
lomereiter 9 years ago
parent
commit
24474ac73b
  1. 28
      bio/bam/pileup.d
  2. 62
      bio/bam/reference.d
  3. 64
      bio/bam/tagvalue.d

28
bio/bam/pileup.d

@ -17,39 +17,37 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/// This module is used for iterating over columns of alignment.
/// $(BR)
/// The function makePileup is called on
/// $(P This module is used for iterating over columns of alignment.)
/// $(P The function makePileup is called on
/// a range of coordinate-sorted reads mapped to the same reference.
/// It returns an input range of columns.
/// $(BR)
/// This returned range can then be iterated with $(D foreach).
/// It returns an input range of columns.)
/// $(P This returned range can then be iterated with $(D foreach).
/// First column is located at the same position on the reference,
/// as the first base of the first read.
/// Each $(D popFront) operation advances current position on the
/// reference. Currently, there's no option to skip non-covered regions.
/// $(BR)
/// Each column keeps set of reads that overlap corresponding position
/// Each $(D popFront) operation advances current position on the
/// reference. Currently, there's no option to skip non-covered regions.)
/// $(P Each column keeps set of reads that overlap corresponding position
/// on the reference.
/// If reads contain MD tags, and makePileup was asked
/// to use them, reference base at the column is also available.
/// to use them, reference base at the column is also available.)
/// $(BR)
/// Each read preserves all standard read properties
/// but also keeps column-related information, namely:
/// $(UL
/// but also keeps column-related information, namely
/// <ul>
/// $(LI number of bases consumed from the read sequence so far)
/// $(LI current CIGAR operation and offset in it)
/// $(LI all CIGAR operations before and after current one))
/// $(LI all CIGAR operations before and after current one)</ul>
/// $(BR)
/// It is clear from the above that current CIGAR operation cannot be an insertion.
/// The following are suggested ways to check for them:
/// $(UL
/// <ul>
/// $(LI $(D cigar_after.length > 0 &&
/// cigar_operation_offset == cigar_operation.length - 1 &&
/// cigar_after[0].type == 'I'))
/// $(LI $(D cigar_before.length > 0 &&
/// cigar_operation_offset == 0 &&
/// cigar_before[$ - 1].type == 'I')))
/// cigar_before[$ - 1].type == 'I'))</ul>
/// $(BR)
/// Examples:
/// ---------------------------------------------------------

62
bio/bam/reference.d

@ -17,6 +17,23 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/// $(P Each BAM file contains reads aligned to different reference sequences.)
/// $(P These sequences have unique identifiers in BAM file, starting from 0.
/// Unmapped reads are associated with id = -1.)
/// $(P If BAI file is available, fast region queries are available, that is,
/// getting all reads that overlap given region. This is achieved via $(D opSlice) method.)
///
/// Example:
/// -----------------------------
/// import bio.bam.reader, std.stdio;
/// ...
/// auto bam = new BamReader("file.bam");
/// auto refseq = bam["chr17"];
/// writeln(refseq.name, " - length ", refseq.length);
/// foreach (read; refseq[1234 .. 5678])
/// if (read.cigar.length > 1)
/// writeln(read.name, " ", read.cigarString());
/// -----------------------------
module bio.bam.reference;
public import bio.bam.referenceinfo;
@ -29,12 +46,10 @@ import std.stream;
import std.exception;
import std.array;
/**
Represents reference sequence.
*/
///
struct ReferenceSequence {
/// Name of reference sequence as in BAM file
/// Name
string name() @property const {
return _info.name;
}
@ -49,14 +64,16 @@ struct ReferenceSequence {
return _ref_id;
}
/// Get alignments overlapping [start, end)
/// Get alignments overlapping [start, end) region.
/// $(BR)
/// Coordinates are 0-based.
auto opSlice(uint start, uint end) {
enforce(start < end, "start must be less than end");
enforce(_manager !is null, "random access is not available");
return _manager.getReads(_ref_id, start, end);
}
/// Get all alignments
/// Get all alignments for this reference
auto opSlice() {
return opSlice(0, length);
}
@ -66,20 +83,10 @@ struct ReferenceSequence {
return opSlice().front.dup;
}
/// First position on the reference overlapped by reads (0-based)
/// Returns -1 if set of reads is empty.
int firstPosition() {
auto reads = opSlice();
if (reads.empty) {
return -1;
}
return reads.front.position;
}
/// Virtual offset at which reads aligned to this reference start.
/// Virtual offset at which reads, aligned to this reference, start in BAM file.
/// If there are no reads aligned to this reference, returns virtual
/// offset of the EOF block if it's presented, or the end of file.
VirtualOffset startVirtualOffset() {
bio.core.bgzf.virtualoffset.VirtualOffset startVirtualOffset() {
auto reads = opSlice();
if (reads.empty) {
return _manager.eofVirtualOffset();
@ -87,10 +94,10 @@ struct ReferenceSequence {
return reads.front.start_virtual_offset;
}
/// Virtual offset before which reads aligned to this reference stop.
/// Virtual offset before which reads, aligned to this reference, stop.
/// If there are no reads aligned to this reference, returns virtual
/// offset of the EOF block if it's presented, or the end of file.
VirtualOffset endVirtualOffset() {
bio.core.bgzf.virtualoffset.VirtualOffset endVirtualOffset() {
if (opSlice().empty) {
return _manager.eofVirtualOffset();
@ -124,8 +131,21 @@ struct ReferenceSequence {
return result;
}
/// First position on the reference overlapped by reads (0-based)
/// $(BR)
/// Returns -1 if set of reads is empty.
int firstPosition() {
auto reads = opSlice();
if (reads.empty) {
return -1;
}
return reads.front.position;
}
/// Last position on the reference overlapped by reads (0-based)
/// $(BR)
/// Returns -1 if set of reads is empty.
int lastPosition() {
// The key idea is
// 1) use last offset from linear index

64
bio/bam/tagvalue.d

@ -17,6 +17,31 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/// BAM records may carry arbitrary information in tags.
/// $(BR)
/// $(D Value) type provides convenient way to work with this information.
///
/// Example:
/// --------------------------------
/// import bio.bam.reader, bio.bam.tagvalue;
/// ...
/// auto bam = new BamReader("file.bam");
/// Value v = bam.reads.front["MD"];
/// assert(v.is_string);
/// v = 5;
/// assert(v.is_signed); // because 5 is of type int which is signed
/// assert(v == "5"); // converted to string and then compared
/// v = "abc";
/// assert(v.is_string);
/// v = [1, 2, 3]; // integer and float arrays are supported
/// assert(v.is_numeric_array);
/// v = [1.5f, 2.3f, 17.0f]; // double[] arrays must be converted to float[]
/// assert(v.is_numeric_array);
/// v = 5.6;
/// assert(v.is_float);
/// v = -17;
/// assert(v.is_signed);
/// ----------------------------------
module bio.bam.tagvalue;
public import std.conv;
@ -63,7 +88,7 @@ alias TypeTuple!(CharToType!('c', byte),
CharToType!('I', uint),
CharToType!('f', float)) ArrayElementTagValueTypes;
/**
/*
Useful in TagStorage implementations, for skipping elements
Params:
@ -86,7 +111,7 @@ uint charToSizeof(char c) {
mixin(charToSizeofHelper());
}
/**
/*
Pair of type and its ubyte identifier.
(Currently, ubyte is enough, but that might change in the future.)
@ -165,6 +190,7 @@ private template GetType(U) {
/// assert(v.tag == GetTypeId!string);
/// -----------------------------------
template GetTypeId(T) {
///
enum GetTypeId = TypeIdMap[staticIndexOf!(T, staticMap!(GetType, TypeIdMap))].Id;
}
@ -281,16 +307,10 @@ string injectOpCast() {
Tagged union, allows to store
8/16/32-bit integers, floats, chars, strings,
and arrays of integers/floats.
Currently, opCast is very restrictive and requires that
the requested type is exactly the same as stored in Value
(otherwise, ConvException is thrown). That means that
you can't cast Value to string when it contains integer,
although it's possible to convert integer to string.
*/
struct Value {
/**
/*
Notice that having union first allows to do simple casts,
without using opCast(). That's a bit hackish but
allows for better speed.
@ -325,16 +345,19 @@ struct Value {
mixin(injectOpAssign());
mixin(injectOpCast());
///
final void opAssign(Value v) {
bam_typeid = v.bam_typeid;
_tag = v._tag;
u = v.u;
}
/// ditto
final void opAssign(typeof(null) n) {
_tag = GetTypeId!(typeof(null));
}
///
final bool opEquals(T)(const T val) {
try {
return to!T(this) == val;
@ -343,10 +366,12 @@ struct Value {
}
}
///
string toString() const {
return opCast!string();
}
///
this(T)(T value) {
opAssign(value);
}
@ -364,27 +389,40 @@ struct Value {
}
}
/// Holds $(D null). Represents non-existing tag. Such values can't be stored in tags.
bool is_nothing() @property const { return _tag == GetTypeId!(typeof(null)); }
/// char
bool is_character() @property const { return _tag == GetTypeId!char; }
/// float
bool is_float() @property const { return _tag == GetTypeId!float; }
/// ubyte[]/byte[]/ushort[]/short[]/uint[]/int[]/float[]
bool is_numeric_array() @property const { return (_tag & 0b11) == 0b01; }
/// ubyte[]/byte[]/ushort[]/short[]/uint[]/int[]
bool is_array_of_integers() @property const { return (_tag & 0b111) == 0b001; }
/// float[]
bool is_array_of_floats() @property const { return (_tag & 0b111) == 0b101; }
/// ubyte/byte/ushort/short/uint/int
bool is_integer() @property const { return (_tag & 0b1111) == 0; }
/// true if the value is unsigned integer
/// ubyte/ushort/uint
bool is_unsigned() @property const { return (_tag & 0b11111) == 0; }
/// true if the value is signed integer
/// byte/short/int
bool is_signed() @property const { return (_tag & 0b11111) == 0b10000; }
/// true if the value represents 'Z' or 'H' tag
/// 'Z' or 'H' tag
bool is_string() @property const { return (_tag & 0b11) == 0b11; }
/// true if the value represents 'H' tag
/// 'H' tag
bool is_hexadecimal_string() @property const { return (_tag & 0b111) == 0b111; }
/// Serializes value in MessagePack format
public void toMsgpack(Packer)(ref Packer packer) const {
switch (_tag) {
case GetTypeId!byte: packer.pack(*cast(byte*)(&u)); break;

Loading…
Cancel
Save