
7 changed files with 309 additions and 0 deletions
@ -0,0 +1,57 @@ |
|||
/* |
|||
To run this example from this directory: |
|||
rdmd -I.. calculate_gc_content_from_reference.d |
|||
|
|||
// compile this example with
|
|||
|
|||
// debug version
|
|||
dmd -i -I.. calculate_gc_content_from_reference.d |
|||
|
|||
// optimised version
|
|||
dmd -i -O -release -inline -boundscheck=off -I.. calculate_gc_content_from_reference.d |
|||
|
|||
*/ |
|||
import bio.std.hts.bam.reader; |
|||
import bio.std.hts.bam.md.reconstruct : dna; |
|||
|
|||
import std.stdio; |
|||
import std.datetime.stopwatch: benchmark, StopWatch; |
|||
import std.range; |
|||
import std.array; |
|||
|
|||
void main() { |
|||
auto bam = new BamReader("../test/data/b7_295_chunk.bam"); |
|||
|
|||
// the sequence starts at first mapped base of first read
|
|||
auto reference = dna(bam.reads); |
|||
|
|||
int n_bases = 0, gc = 0; |
|||
|
|||
foreach (base; reference) { |
|||
if (base == 'N') continue; // happens if coverage is zero
|
|||
if (base == 'G' || base == 'C') gc += 1; |
|||
n_bases += 1; |
|||
} |
|||
|
|||
writeln("total bases: ", n_bases); |
|||
writeln(" GC%: ", cast(float)gc / n_bases); |
|||
writeln(" sequence: ", reference); |
|||
writeln(" #reads: ", walkLength(bam.reads)); |
|||
|
|||
auto reads = array(bam.reads); // no I/O during measurements
|
|||
|
|||
StopWatch sw; // for a range of reads, minimum number of MD tags is parsed
|
|||
|
|||
sw.start(); |
|||
walkLength(dna(reads)); |
|||
sw.stop(); |
|||
|
|||
writeln("extracting reference from range of reads: ", sw.peek.total!"usecs", "μs"); |
|||
|
|||
sw.reset(); |
|||
sw.start(); |
|||
foreach (read; reads) walkLength(dna(read)); |
|||
sw.stop(); |
|||
|
|||
writeln("extracting reference from each read: ", sw.peek.total!"usecs", "μs"); |
|||
} |
@ -0,0 +1,41 @@ |
|||
|
|||
import bio.std.hts.bam.read; |
|||
import bio.std.hts.bam.referenceinfo; |
|||
import bio.std.hts.sam.header; |
|||
import bio.std.hts.bam.reader; |
|||
import bio.std.hts.bam.writer; |
|||
import undead.stream; |
|||
import std.stdio; |
|||
import bio.std.hts.bam.cigar; |
|||
|
|||
void main() { |
|||
auto header = new SamHeader(); // First, create SAM header
|
|||
RgLine rg; // and fill it with information.
|
|||
rg.identifier = "RG007"; |
|||
rg.platform = "ILLUMINA"; // Of course, you can modify header
|
|||
header.read_groups.add(rg); // provided by BamReader object.
|
|||
|
|||
auto reference = ReferenceSequenceInfo("contig123", 4321); |
|||
auto stream = new MemoryStream(); |
|||
auto writer = new BamWriter(stream); |
|||
writer.writeSamHeader(header); // start writing BAM from header
|
|||
writer.writeReferenceSequenceInfo([reference]); // and reference sequence info
|
|||
|
|||
auto read = BamRead("readName001", "ACTGATGAAC", |
|||
[CigarOperation(7, 'M'), CigarOperation(1, 'I'), CigarOperation(2, 'S')]); |
|||
read.base_qualities = [38, 34, 33, 35, 28, 39, 25, 19, 21, 17]; |
|||
read.ref_id = 0; |
|||
read.mapping_quality = 46; |
|||
read.is_unmapped = false; |
|||
read["RG"] = "RG007"; |
|||
// ... set many more fields, flags, and tags
|
|||
read.position = 2345; // 0-based, in SAM output you will see 2346
|
|||
read.strand = '-'; // same as read.is_reverse_strand = true
|
|||
writer.writeRecord(read); // BGZF blocks are seamlessly compressed in parallel
|
|||
writer.flush(); // in practice, one would call finish() method
|
|||
stream.seekSet(0); // but here we will read from the stream
|
|||
|
|||
auto reader = new BamReader(stream); |
|||
write(reader.header.text); // serialized header already contains newline
|
|||
writefln("%j", reader.reads.front); // prints record in JSON format
|
|||
} |
@ -0,0 +1,68 @@ |
|||
/* Run this example from this directory by typing this from the command line: |
|||
|
|||
// run with rdmd
|
|||
rdmd -I.. iterate_tags.d |
|||
|
|||
// A debug compile with dmd
|
|||
dmd -i -I.. iterate_tags.d |
|||
|
|||
// optimised compile
|
|||
dmd -i -O -release -inline -boundscheck=off -I.. iterate_tags.d |
|||
|
|||
*/ |
|||
|
|||
|
|||
import bio.std.hts.bam.reader; |
|||
import bio.std.hts.bam.tagvalue; |
|||
import std.stdio; |
|||
import std.datetime.stopwatch : benchmark,StopWatch; |
|||
import std.conv : to; |
|||
|
|||
void main() { |
|||
|
|||
// add a file handling exception in the example
|
|||
|
|||
auto bam = new BamReader("../test/data/b7_295_chunk.bam"); |
|||
// auto bam = new BamReader("/Users/george/HH_bam_files/H_3801_01_03.final.bam");
|
|||
|
|||
auto read = bam.reads.front; // take first read
|
|||
|
|||
// iterating all tags
|
|||
foreach (tag, value; read) |
|||
writeln(tag, ": ", value); |
|||
|
|||
// taking value of tag
|
|||
Value v = read["XS"]; |
|||
|
|||
// Usually, it will be converted to some type right away.
|
|||
auto v2 = to!int(v); |
|||
|
|||
// It is not necessary to know exact value type as in BAM.
|
|||
// If it can be converted to specified type, that's fine.
|
|||
// Otherwise, an exception will be thrown.
|
|||
auto v3 = to!long(v); |
|||
auto v4 = to!string(v); |
|||
auto v5 = to!float(v); |
|||
|
|||
// With strings and arrays there is an unsafe but faster way...
|
|||
v = read["FZ"]; |
|||
|
|||
StopWatch sw; |
|||
|
|||
// even with -O -release -inline this is slow
|
|||
sw.start; |
|||
auto fz1 = to!(ushort[])(v); |
|||
sw.stop(); |
|||
|
|||
writeln(" safe conversion: ", sw.peek.total!"usecs", "μs"); |
|||
sw.reset(); |
|||
|
|||
// this works because v starts in memory with a union
|
|||
sw.start(); |
|||
auto fz2 = *(cast(ushort[]*)(&v)); |
|||
sw.stop(); |
|||
|
|||
writeln("unsafe conversion: ", sw.peek.total!"usecs", "μs"); |
|||
|
|||
assert(fz1 == fz2); |
|||
} |
@ -0,0 +1,31 @@ |
|||
|
|||
import bio.std.hts.bam.reader; |
|||
import bio.std.hts.bam.pileup; |
|||
import bio.std.hts.bam.read : compareCoordinates; |
|||
|
|||
import std.range; |
|||
import std.algorithm; |
|||
import std.datetime; |
|||
import std.stdio; |
|||
import std.array; |
|||
|
|||
void main() { |
|||
|
|||
auto bam = new BamReader("../test/data/illu_20_chunk.bam"); |
|||
|
|||
auto pileup = makePileup(bam.reads, true); |
|||
|
|||
// Reads in every pileup column are sorted by coordinate.
|
|||
// Therefore the following holds:
|
|||
assert(equal( |
|||
pileup.map!(column => column.reads.equalRange(column.position))() |
|||
.joiner(), |
|||
bam.reads)); |
|||
|
|||
// There is also easier and faster way to get reads starting at the column:
|
|||
pileup = makePileup(bam.reads, true); // (initialize pileup engine again)
|
|||
assert(equal( |
|||
pileup.map!(column => column.reads_starting_here)() |
|||
.joiner(), |
|||
bam.reads)); |
|||
} |
@ -0,0 +1,35 @@ |
|||
|
|||
import bio.std.hts.bam.reader; |
|||
import bio.std.hts.bam.baseinfo; |
|||
|
|||
import std.stdio; |
|||
import std.range : take, drop; |
|||
import std.algorithm : find; |
|||
|
|||
void main() { |
|||
|
|||
auto bam = new BamReader("../test/data/b7_295_chunk.bam"); |
|||
|
|||
// get read group information by name
|
|||
auto rg = bam.header.read_groups["9IKNG"]; |
|||
|
|||
auto read = find!(r => r.name == "9IKNG:00592:01791")(bam.reads).front; |
|||
|
|||
// fetch information about flow calls from FZ & ZF tags
|
|||
// and also reference base from MD tag
|
|||
auto bases = basesWith!("FZ", "MD")(read, arg!"flowOrder"(rg.flow_order), |
|||
arg!"keySequence"(rg.key_sequence)); |
|||
|
|||
// end of read contains a few indel errors
|
|||
foreach (baseinfo; bases.drop(350).take(32)) { |
|||
writefln("%s\t%s\tflow: %3d\tintensity: %.2f\t\tref. pos.: %6d\tCIGAR op.: %s", |
|||
baseinfo.reference_base, // from MD tag
|
|||
baseinfo.base, |
|||
baseinfo.flow_call.flow_index, // from FZ tag
|
|||
baseinfo.flow_call.intensity, // also from FZ tag
|
|||
baseinfo.position, |
|||
baseinfo.cigar_operation); |
|||
// notice that because the read is on reverse strand,
|
|||
// reference position decreases during the iteration
|
|||
} |
|||
} |
@ -0,0 +1,44 @@ |
|||
/* |
|||
To run this example from this directory: |
|||
rdmd -I.. read_bam_file.d |
|||
|
|||
// compile this example with
|
|||
|
|||
// debug version
|
|||
dmd -i -I.. read_bam_file.d |
|||
|
|||
// optimised version
|
|||
dmd -i -O -release -inline -boundscheck=off -I.. read_bam_file.d |
|||
|
|||
*/ |
|||
|
|||
import bio.std.hts.bam.reader; |
|||
import bio.std.hts.bam.pileup; |
|||
import std.stdio; |
|||
|
|||
void main() { |
|||
|
|||
auto bam = new BamReader("../test/data/ex1_header.bam"); |
|||
auto reads = bam["chr2"][150 .. 160]; // region chr2:149-158
|
|||
auto pileup = makePileup(reads, |
|||
false, // reads don't contain MD tags
|
|||
155, 158); // specify [start, end) interval
|
|||
|
|||
foreach (column; pileup) { |
|||
writeln("Reference position: ", column.position); |
|||
writeln(" Coverage: ", column.coverage); |
|||
writeln(" Reads:"); |
|||
|
|||
foreach (read; column.reads) { |
|||
writefln("%30s\t%s\t%.2d\t%s\t%2s/%2s\t%2s/%2s\t%10s\t%s %s", |
|||
read.name, |
|||
read.current_base, |
|||
read.current_base_quality, |
|||
read.cigar_operation, |
|||
read.cigar_operation_offset + 1, read.cigar_operation.length, |
|||
read.query_offset + 1, read.sequence.length, |
|||
read.cigarString(), |
|||
read.cigar_before, read.cigar_after); |
|||
} |
|||
} |
|||
} |
@ -0,0 +1,33 @@ |
|||
|
|||
import bio.std.hts.bam.multireader; |
|||
import bio.std.hts.bam.read : compareCoordinates; |
|||
import bio.std.hts.bam.pileup; |
|||
|
|||
import std.algorithm; |
|||
import std.conv; |
|||
import std.stdio; |
|||
|
|||
void main() { |
|||
// multiple BAM files can be traversed simultaneously (if they can be merged)
|
|||
auto bam = new MultiBamReader(["../test/data/illu_20_chunk.bam", |
|||
"../test/data/ion_20_chunk.bam"]); |
|||
|
|||
auto pileup = makePileup(bam.reads, // ANY range of reads is acceptable
|
|||
true, // use MD tags
|
|||
32_000_083, |
|||
32_000_089); |
|||
|
|||
foreach (column; pileup) { |
|||
writeln("Column position: ", column.position); |
|||
writeln(" Ref.base: ", column.reference_base); // extracted from MD tags
|
|||
writeln(" Coverage: ", column.coverage); |
|||
|
|||
writeln(" ", column.reads // bases from Illumina dataset
|
|||
.filter!(read => to!string(read["RG"]).startsWith("ERR"))() |
|||
.map!(read => read.current_base)(), |
|||
|
|||
" ", column.reads // bases from IonTorrent dataset
|
|||
.filter!(read => to!string(read["RG"]).startsWith("66A0Q"))() |
|||
.map!(read => read.current_base)()); |
|||
} |
|||
} |
Loading…
Reference in new issue