Browse Source

fix the examples to use the new namespaces

remotes/georgeg/master
George Githinji 2 years ago
parent
commit
0d7f5b11e9
7 changed files with 309 additions and 0 deletions
  1. +57
    -0
      examples/calculate_gc_content_from_reference.d
  2. +41
    -0
      examples/create_bam_from_scratch.d
  3. +68
    -0
      examples/iterate_tags.d
  4. +31
    -0
      examples/make_pileup.d
  5. +35
    -0
      examples/print_base_info.d
  6. +44
    -0
      examples/read_bam_file.d
  7. +33
    -0
      examples/transverse_multiple_bam_files.d

+ 57
- 0
examples/calculate_gc_content_from_reference.d View File

@ -0,0 +1,57 @@
/*
To run this example from this directory:
rdmd -I.. calculate_gc_content_from_reference.d
// compile this example with
// debug version
dmd -i -I.. calculate_gc_content_from_reference.d
// optimised version
dmd -i -O -release -inline -boundscheck=off -I.. calculate_gc_content_from_reference.d
*/
import bio.std.hts.bam.reader;
import bio.std.hts.bam.md.reconstruct : dna;
import std.stdio;
import std.datetime.stopwatch: benchmark, StopWatch;
import std.range;
import std.array;
void main() {
auto bam = new BamReader("../test/data/b7_295_chunk.bam");
// the sequence starts at first mapped base of first read
auto reference = dna(bam.reads);
int n_bases = 0, gc = 0;
foreach (base; reference) {
if (base == 'N') continue; // happens if coverage is zero
if (base == 'G' || base == 'C') gc += 1;
n_bases += 1;
}
writeln("total bases: ", n_bases);
writeln(" GC%: ", cast(float)gc / n_bases);
writeln(" sequence: ", reference);
writeln(" #reads: ", walkLength(bam.reads));
auto reads = array(bam.reads); // no I/O during measurements
StopWatch sw; // for a range of reads, minimum number of MD tags is parsed
sw.start();
walkLength(dna(reads));
sw.stop();
writeln("extracting reference from range of reads: ", sw.peek.total!"usecs", "μs");
sw.reset();
sw.start();
foreach (read; reads) walkLength(dna(read));
sw.stop();
writeln("extracting reference from each read: ", sw.peek.total!"usecs", "μs");
}

+ 41
- 0
examples/create_bam_from_scratch.d View File

@ -0,0 +1,41 @@
import bio.std.hts.bam.read;
import bio.std.hts.bam.referenceinfo;
import bio.std.hts.sam.header;
import bio.std.hts.bam.reader;
import bio.std.hts.bam.writer;
import undead.stream;
import std.stdio;
import bio.std.hts.bam.cigar;
void main() {
auto header = new SamHeader(); // First, create SAM header
RgLine rg; // and fill it with information.
rg.identifier = "RG007";
rg.platform = "ILLUMINA"; // Of course, you can modify header
header.read_groups.add(rg); // provided by BamReader object.
auto reference = ReferenceSequenceInfo("contig123", 4321);
auto stream = new MemoryStream();
auto writer = new BamWriter(stream);
writer.writeSamHeader(header); // start writing BAM from header
writer.writeReferenceSequenceInfo([reference]); // and reference sequence info
auto read = BamRead("readName001", "ACTGATGAAC",
[CigarOperation(7, 'M'), CigarOperation(1, 'I'), CigarOperation(2, 'S')]);
read.base_qualities = [38, 34, 33, 35, 28, 39, 25, 19, 21, 17];
read.ref_id = 0;
read.mapping_quality = 46;
read.is_unmapped = false;
read["RG"] = "RG007";
// ... set many more fields, flags, and tags
read.position = 2345; // 0-based, in SAM output you will see 2346
read.strand = '-'; // same as read.is_reverse_strand = true
writer.writeRecord(read); // BGZF blocks are seamlessly compressed in parallel
writer.flush(); // in practice, one would call finish() method
stream.seekSet(0); // but here we will read from the stream
auto reader = new BamReader(stream);
write(reader.header.text); // serialized header already contains newline
writefln("%j", reader.reads.front); // prints record in JSON format
}

+ 68
- 0
examples/iterate_tags.d View File

@ -0,0 +1,68 @@
/* Run this example from this directory by typing this from the command line:
// run with rdmd
rdmd -I.. iterate_tags.d
// A debug compile with dmd
dmd -i -I.. iterate_tags.d
// optimised compile
dmd -i -O -release -inline -boundscheck=off -I.. iterate_tags.d
*/
import bio.std.hts.bam.reader;
import bio.std.hts.bam.tagvalue;
import std.stdio;
import std.datetime.stopwatch : benchmark,StopWatch;
import std.conv : to;
void main() {
// add a file handling exception in the example
auto bam = new BamReader("../test/data/b7_295_chunk.bam");
// auto bam = new BamReader("/Users/george/HH_bam_files/H_3801_01_03.final.bam");
auto read = bam.reads.front; // take first read
// iterating all tags
foreach (tag, value; read)
writeln(tag, ": ", value);
// taking value of tag
Value v = read["XS"];
// Usually, it will be converted to some type right away.
auto v2 = to!int(v);
// It is not necessary to know exact value type as in BAM.
// If it can be converted to specified type, that's fine.
// Otherwise, an exception will be thrown.
auto v3 = to!long(v);
auto v4 = to!string(v);
auto v5 = to!float(v);
// With strings and arrays there is an unsafe but faster way...
v = read["FZ"];
StopWatch sw;
// even with -O -release -inline this is slow
sw.start;
auto fz1 = to!(ushort[])(v);
sw.stop();
writeln(" safe conversion: ", sw.peek.total!"usecs", "μs");
sw.reset();
// this works because v starts in memory with a union
sw.start();
auto fz2 = *(cast(ushort[]*)(&v));
sw.stop();
writeln("unsafe conversion: ", sw.peek.total!"usecs", "μs");
assert(fz1 == fz2);
}

+ 31
- 0
examples/make_pileup.d View File

@ -0,0 +1,31 @@
import bio.std.hts.bam.reader;
import bio.std.hts.bam.pileup;
import bio.std.hts.bam.read : compareCoordinates;
import std.range;
import std.algorithm;
import std.datetime;
import std.stdio;
import std.array;
void main() {
auto bam = new BamReader("../test/data/illu_20_chunk.bam");
auto pileup = makePileup(bam.reads, true);
// Reads in every pileup column are sorted by coordinate.
// Therefore the following holds:
assert(equal(
pileup.map!(column => column.reads.equalRange(column.position))()
.joiner(),
bam.reads));
// There is also easier and faster way to get reads starting at the column:
pileup = makePileup(bam.reads, true); // (initialize pileup engine again)
assert(equal(
pileup.map!(column => column.reads_starting_here)()
.joiner(),
bam.reads));
}

+ 35
- 0
examples/print_base_info.d View File

@ -0,0 +1,35 @@
import bio.std.hts.bam.reader;
import bio.std.hts.bam.baseinfo;
import std.stdio;
import std.range : take, drop;
import std.algorithm : find;
void main() {
auto bam = new BamReader("../test/data/b7_295_chunk.bam");
// get read group information by name
auto rg = bam.header.read_groups["9IKNG"];
auto read = find!(r => r.name == "9IKNG:00592:01791")(bam.reads).front;
// fetch information about flow calls from FZ & ZF tags
// and also reference base from MD tag
auto bases = basesWith!("FZ", "MD")(read, arg!"flowOrder"(rg.flow_order),
arg!"keySequence"(rg.key_sequence));
// end of read contains a few indel errors
foreach (baseinfo; bases.drop(350).take(32)) {
writefln("%s\t%s\tflow: %3d\tintensity: %.2f\t\tref. pos.: %6d\tCIGAR op.: %s",
baseinfo.reference_base, // from MD tag
baseinfo.base,
baseinfo.flow_call.flow_index, // from FZ tag
baseinfo.flow_call.intensity, // also from FZ tag
baseinfo.position,
baseinfo.cigar_operation);
// notice that because the read is on reverse strand,
// reference position decreases during the iteration
}
}

+ 44
- 0
examples/read_bam_file.d View File

@ -0,0 +1,44 @@
/*
To run this example from this directory:
rdmd -I.. read_bam_file.d
// compile this example with
// debug version
dmd -i -I.. read_bam_file.d
// optimised version
dmd -i -O -release -inline -boundscheck=off -I.. read_bam_file.d
*/
import bio.std.hts.bam.reader;
import bio.std.hts.bam.pileup;
import std.stdio;
void main() {
auto bam = new BamReader("../test/data/ex1_header.bam");
auto reads = bam["chr2"][150 .. 160]; // region chr2:149-158
auto pileup = makePileup(reads,
false, // reads don't contain MD tags
155, 158); // specify [start, end) interval
foreach (column; pileup) {
writeln("Reference position: ", column.position);
writeln(" Coverage: ", column.coverage);
writeln(" Reads:");
foreach (read; column.reads) {
writefln("%30s\t%s\t%.2d\t%s\t%2s/%2s\t%2s/%2s\t%10s\t%s %s",
read.name,
read.current_base,
read.current_base_quality,
read.cigar_operation,
read.cigar_operation_offset + 1, read.cigar_operation.length,
read.query_offset + 1, read.sequence.length,
read.cigarString(),
read.cigar_before, read.cigar_after);
}
}
}

+ 33
- 0
examples/transverse_multiple_bam_files.d View File

@ -0,0 +1,33 @@
import bio.std.hts.bam.multireader;
import bio.std.hts.bam.read : compareCoordinates;
import bio.std.hts.bam.pileup;
import std.algorithm;
import std.conv;
import std.stdio;
void main() {
// multiple BAM files can be traversed simultaneously (if they can be merged)
auto bam = new MultiBamReader(["../test/data/illu_20_chunk.bam",
"../test/data/ion_20_chunk.bam"]);
auto pileup = makePileup(bam.reads, // ANY range of reads is acceptable
true, // use MD tags
32_000_083,
32_000_089);
foreach (column; pileup) {
writeln("Column position: ", column.position);
writeln(" Ref.base: ", column.reference_base); // extracted from MD tags
writeln(" Coverage: ", column.coverage);
writeln(" ", column.reads // bases from Illumina dataset
.filter!(read => to!string(read["RG"]).startsWith("ERR"))()
.map!(read => read.current_base)(),
" ", column.reads // bases from IonTorrent dataset
.filter!(read => to!string(read["RG"]).startsWith("66A0Q"))()
.map!(read => read.current_base)());
}
}

Loading…
Cancel
Save