Browse Source

added another example on pileup

* PileupColumn.reads now returns SortedRange
* new method PileupColumn.reads_starting_here
* made BamRead.raw_sequence_data private
remotes/georgeg/no_streams
lomereiter 9 years ago
parent
commit
168052e154
  1. 26
      bio/bam/pileup.d
  2. 75
      bio/bam/read.d
  3. 4
      bio/bam/serialization/json.d
  4. 4
      bio/bam/serialization/sam.d
  5. 28
      examples/example7.d

26
bio/bam/pileup.d

@ -168,12 +168,16 @@ struct PileupRead(Read=EagerBamRead) {
}
}
static assert(isBamRead!(PileupRead!BamRead));
static assert(isBamRead!(PileupRead!EagerBamRead));
/// Represents a single pileup column
struct PileupColumn(R) {
private {
ulong _position;
int _ref_id = -1;
R _reads;
size_t _n_starting_here;
}
/// Reference base. 'N' if not available.
@ -198,9 +202,18 @@ struct PileupColumn(R) {
return _position;
}
/// Reads overlapping the position
/// Reads overlapping the position, sorted by coordinate
auto reads() @property {
return _reads[];
return assumeSorted!compareCoordinates(_reads[]);
}
/// Reads that have leftmost mapped position at this column
auto reads_starting_here() @property {
debug {
import std.stdio;
writeln(_n_starting_here);
}
return _reads[$ - _n_starting_here .. $];
}
/// Shortcut for map!(read => read.current_base)(reads)
@ -281,6 +294,9 @@ class PileupRange(R, alias TColumn=PileupColumn) {
}
_read_buf.shrinkTo(survived);
// unless range is empty, this value is
_column._n_starting_here = 0; // updated either in initNewReference()
// or in the loop below
if (!_reads.empty) {
if (_reads.front.ref_id != _column._ref_id &&
@ -288,6 +304,7 @@ class PileupRange(R, alias TColumn=PileupColumn) {
{
initNewReference();
} else {
size_t n = 0;
while (!_reads.empty &&
_reads.front.position == pos &&
_reads.front.ref_id == _column._ref_id)
@ -295,7 +312,9 @@ class PileupRange(R, alias TColumn=PileupColumn) {
auto read = _reads.front;
add(read);
_reads.popFront();
++n;
}
_column._n_starting_here = n;
}
}
@ -307,6 +326,7 @@ class PileupRange(R, alias TColumn=PileupColumn) {
_column._position = read.position;
_column._ref_id = read.ref_id;
auto n = 1;
add(read);
_reads.popFront();
@ -315,12 +335,14 @@ class PileupRange(R, alias TColumn=PileupColumn) {
read = _reads.front;
if (read.position == _column._position) {
add(read);
++n;
_reads.popFront();
} else {
break;
}
}
_column._n_starting_here = n;
_column._reads = _read_buf.data;
}
}

75
bio/bam/read.d

@ -280,8 +280,7 @@ struct BamRead {
return cast(string)str;
}
/// Sequence data
@property const(ubyte)[] raw_sequence_data() const nothrow {
private @property const(ubyte)[] raw_sequence_data() const nothrow {
return _chunk[_seq_offset .. _seq_offset + (_l_seq + 1) / 2];
}
@ -1149,15 +1148,64 @@ struct EagerBamRead {
}
}
static assert(is(EagerBamRead : BamRead));
/// Checks if $(D T) behaves like $(D BamRead)
template isBamRead(T)
{
static if (is(Unqual!T : BamRead))
enum isBamRead = true;
else
enum isBamRead = __traits(compiles,
{
T t; bool p;
p = t.ref_id == 1; p = t.position == 2; p = t.bin.id == 3;
p = t.mapping_quality == 4; p = t.flag == 5; p = t.sequence_length == 6;
p = t.next_ref_id == 7; p = t.next_pos == 8; p = t.template_length == 9;
p = t.is_paired; p = t.proper_pair; p = t.is_unmapped;
p = t.mate_is_unmapped; p = t.mate_is_reverse_strand; p = t.is_first_of_pair;
p = t.is_second_of_pair; p = t.is_secondary_alignment; p = t.failed_quality_control;
p = t.is_duplicate; p = t.strand == '+'; p = t.name == "";
p = t.cigar[0].type == 'M'; p = t.basesCovered() > 42; p = t.cigarString() == "";
p = t.sequence[0] == 'A'; p = t.base_qualities[0] == 0;
});
}
/// Comparison function for 'queryname' sorting order
/// (return whether first read is 'less' than second)
bool compareReadNames(R1, R2)(const auto ref R1 a1, const auto ref R2 a2) {
///
/// This function can be called on:
/// * two reads
/// * read and string in any order
bool compareReadNames(R1, R2)(const auto ref R1 a1, const auto ref R2 a2)
if (isBamRead!R1 && isBamRead!R2)
{
return a1.name < a2.name;
}
/// ditto
bool compareReadNames(R1, R2)(const auto ref R1 a1, const auto ref R2 a2)
if (isBamRead!R1 && isSomeString!R2)
{
return a1.name < a2;
}
/// ditto
bool compareReadNames(R1, R2)(const auto ref R1 a1, const auto ref R2 a2)
if (isSomeString!R1 && isBamRead!R2)
{
return a1 < a2.name;
}
/// Comparison function for 'coordinate' sorting order
/// (return whether first read is 'less' than second)
bool compareCoordinates(R1, R2)(const auto ref R1 a1, const auto ref R2 a2) {
/// (returns whether first read is 'less' than second)
///
/// This function can be called on:
/// * two reads (in this case, reference IDs are also taken into account)
/// * read and integer in any order
bool compareCoordinates(R1, R2)(const auto ref R1 a1, const auto ref R2 a2)
if (isBamRead!R1 && isBamRead!R2)
{
if (a1.ref_id == -1) return false; // unmapped reads should be last
if (a2.ref_id == -1) return true;
if (a1.ref_id < a2.ref_id) return true;
@ -1165,3 +1213,20 @@ bool compareCoordinates(R1, R2)(const auto ref R1 a1, const auto ref R2 a2) {
if (a1.position < a2.position) return true;
return false;
}
/// ditto
bool compareCoordinates(R1, R2)(const auto ref R1 a1, const auto ref R2 a2)
if (isBamRead!R1 && isIntegral!R2)
{
return a1.position < a2;
}
/// ditto
bool compareCoordinates(R1, R2)(const auto ref R1 a1, const auto ref R2 a2)
if (isIntegral!R1 && isBamRead!R2)
{
return a1 < a2.position;
}
static assert(isTwoWayCompatible!(compareReadNames, BamRead, string));
static assert(isTwoWayCompatible!(compareCoordinates, BamRead, int));

4
bio/bam/serialization/json.d

@ -258,10 +258,10 @@ void jsonSerialize(S)(BamRead alignment, ReferenceSequenceInfo[] info, ref S str
putinteger(stream, alignment.template_length);
putstring(stream, `,"seq":"`);
if (alignment.raw_sequence_data.length == 0) {
if (alignment.sequence_length == 0) {
putstring(stream, `*","qual":`);
} else {
foreach(char c; alignment.sequence()) {
foreach(char c; alignment.sequence) {
putcharacter(stream, c);
}
putstring(stream, `","qual":`);

4
bio/bam/serialization/sam.d

@ -205,10 +205,10 @@ void serialize(S, R)(auto ref R alignment, ReferenceSequenceInfo[] info, auto re
putinteger(stream, alignment.template_length);
putcharacter(stream, '\t');
if (alignment.raw_sequence_data.length == 0) {
if (alignment.sequence_length == 0) {
putstring(stream, "*\t");
} else {
foreach(char c; alignment.sequence()) {
foreach(char c; alignment.sequence) {
putcharacter(stream, c);
}
putcharacter(stream, '\t');

28
examples/example7.d

@ -0,0 +1,28 @@
// run example: rdmd -I.. example7.d
import bio.bam.reader;
import bio.bam.pileup;
import bio.bam.read : compareCoordinates;
import std.range, std.algorithm, std.datetime, std.stdio, std.array;
void main() {
auto bam = new BamReader("../test/data/illu_20_chunk.bam");
auto pileup = makePileup(bam.reads, true);
// Reads in every pileup column are sorted by coordinate.
// Therefore the following holds:
assert(equal(
pileup.map!(column => column.reads.equalRange(column.position))()
.joiner(),
bam.reads));
// There is also easier and faster way to get reads starting at the column:
pileup = makePileup(bam.reads, true); // (initialize pileup engine again)
assert(equal(
pileup.map!(column => column.reads_starting_here)()
.joiner(),
bam.reads));
}
Loading…
Cancel
Save