Browse Source

fixes; example of iterating several datasets

remotes/georgeg/bam_output_redesign
lomereiter 9 years ago
parent
commit
b3f2466d46
  1. 3
      bio/bam/bgzf/block.d
  2. 17
      bio/bam/read.d
  3. 2
      bio/bam/readrange.d
  4. 35
      examples/example5.d
  5. BIN
      test/data/illu_20_chunk.bam
  6. BIN
      test/data/ion_20_chunk.bam

3
bio/bam/bgzf/block.d

@ -92,7 +92,8 @@ struct DecompressedBgzfBlock {
/// Function for BGZF block decompression.
/// Reuses buffer allocated for storing compressed data,
/// i.e. after execution
/// i.e. after execution buffer of the passed $(D block)
/// is overwritten with uncompressed data.
DecompressedBgzfBlock decompressBgzfBlock(BgzfBlock block) {
if (block.input_size == 0) {

17
bio/bam/read.d

@ -1135,3 +1135,20 @@ struct EagerBamRead {
return EagerBamRead(read.dup);
}
}
/// Comparison function for 'queryname' sorting order
/// (return whether first read is 'less' than second)
bool compareReadNames(R1, R2)(const auto ref R1 a1, const auto ref R2 a2) {
return a1.name < a2.name;
}
/// Comparison function for 'coordinate' sorting order
/// (return whether first read is 'less' than second)
bool compareCoordinates(R1, R2)(const auto ref R1 a1, const auto ref R2 a2) {
if (a1.ref_id == -1) return false; // unmapped reads should be last
if (a2.ref_id == -1) return true;
if (a1.ref_id < a2.ref_id) return true;
if (a1.ref_id > a2.ref_id) return false;
if (a1.position < a2.position) return true;
return false;
}

2
bio/bam/readrange.d

@ -64,7 +64,7 @@ mixin template withoutOffsets() {
/**
Returns: current bamRead
*/
BamRead front() @property {
ref BamRead front() @property {
return _current_record;
}

35
examples/example5.d

@ -0,0 +1,35 @@
// run example: rdmd -I.. example5.d
import bio.bam.reader;
import bio.bam.read : compareCoordinates;
import bio.bam.pileuprange;
import std.algorithm, std.conv, std.stdio;
void main() {
auto bam1 = new BamReader("../test/data/illu_20_chunk.bam");
auto bam2 = new BamReader("../test/data/ion_20_chunk.bam");
// this is how to iterate over several datasets simultaneously
// (internally, nWayUnion maintains a binary heap)
auto reads = nWayUnion!compareCoordinates([bam1.reads, bam2.reads]);
auto pileup = makePileup(reads, // ANY range of reads is acceptable
true, // use MD tags
32_000_083,
32_000_089);
foreach (column; pileup) {
writeln("Column position: ", column.position);
writeln(" Ref.base: ", column.reference_base); // extracted from MD tags
writeln(" Coverage: ", column.coverage);
writeln(" ", column.reads // bases from Illumina dataset
.filter!(read => to!string(read["RG"]).startsWith("ERR"))()
.map!(read => read.current_base)(),
" ", column.reads // bases from IonTorrent dataset
.filter!(read => to!string(read["RG"]).startsWith("66A0Q"))()
.map!(read => read.current_base)());
}
}

BIN
test/data/illu_20_chunk.bam

BIN
test/data/ion_20_chunk.bam

Loading…
Cancel
Save