Browse Source

changes in basesWith behaviour

remotes/georgeg/bam_output_redesign
lomereiter 10 years ago
parent
commit
ec3dc0986b
  1. 16
      bio/bam/baseinfo.d
  2. 42
      bio/bam/fz/flowcall.d
  3. 14
      test/unittests.d

16
bio/bam/baseinfo.d

@ -138,7 +138,7 @@ struct PerBaseInfo(R, Tags...) {
Result front() @property {
Result r = void;
r.base = _rev ? _seq.back : _seq.front;
r.base = _rev ? _seq.back.complement : _seq.front;
foreach (t; Extensions)
populate!t(r);
return r;
@ -219,20 +219,26 @@ template FZbaseInfo(R) {
void setup(Args...)(const ref R read, Args args)
{
string flow_order;
string key_sequence;
debug {
_read_name = read.read_name.idup;
}
enum argExists = staticIndexOf!(MixinArg!(string, "FZ"), Args);
static assert(argExists != -1, `Flow order must be provided via arg!"FZ"`);
enum flowOrderExists = staticIndexOf!(MixinArg!(string, "flowOrder"), Args);
enum keySequenceExists = staticIndexOf!(MixinArg!(string, "keySequence"), Args);
static assert(flowOrderExists != -1, `Flow order must be provided via arg!"flowOrder"`);
static assert(keySequenceExists != -1, `Flow order must be provided via arg!"keySequence"`);
foreach (arg; args) {
static if(is(typeof(arg) == MixinArg!(string, "FZ")))
static if(is(typeof(arg) == MixinArg!(string, "flowOrder")))
flow_order = arg;
static if(is(typeof(arg) == MixinArg!(string, "keySequence")))
key_sequence = arg;
}
_flow_calls = readFlowCalls(read, flow_order);
_flow_calls = readFlowCalls(read, flow_order, key_sequence);
_at = 0;
}

42
bio/bam/fz/flowcall.d

@ -126,6 +126,8 @@ struct ReadFlowCallRange(S)
size_t _current_flow_index;
ushort _current_offset;
ushort _overlap;
bool _empty;
// consumes next homopolymer from the sequence,
@ -155,11 +157,12 @@ struct ReadFlowCallRange(S)
}
}
this(S seq, ushort[] intensities, string flow_order, int zf) {
this(S seq, ushort[] intensities, string flow_order, ushort first_base_overlap, int zf) {
_sequence = seq;
_intensities = intensities;
_flow_order = flow_order;
_zf = zf;
_overlap = first_base_overlap;
if (_sequence.empty) {
_empty = true;
@ -173,14 +176,16 @@ struct ReadFlowCallRange(S)
}
ReadFlowCall front() @property const {
return ReadFlowCall(_intensities[_current_flow_index], _current_offset,
_current_length, _current_base, _current_flow_index + _zf);
auto intensity = cast(ushort)(_intensities[_current_flow_index] - _overlap);
return ReadFlowCall(intensity, _current_offset, _current_length,
_current_base, _current_flow_index + _zf);
}
void popFront() {
_current_offset += _current_length;
++_current_flow_index;
_overlap = 0; // after first base it is always zero
_doSetup();
}
@ -193,9 +198,10 @@ struct ReadFlowCallRange(S)
}
}
ReadFlowCallRange!S readFlowCallRange(S)(S seq, ushort[] intensities, string flow_order, int zf)
private ReadFlowCallRange!S readFlowCallRange(S)(S seq, ushort[] intensities,
string flow_order, ushort overlap, int zf)
{
return ReadFlowCallRange!S(seq, intensities, flow_order, zf);
return ReadFlowCallRange!S(seq, intensities, flow_order, overlap, zf);
}
@ -204,16 +210,7 @@ ReadFlowCallRange!S readFlowCallRange(S)(S seq, ushort[] intensities, string flo
/// Tag name is an optional argument because it is not standard and will likely
/// be changed in the future (there was a proposal on samtools mailing list
/// to introduce standard FB tag).
ForwardRange!ReadFlowCall readFlowCalls(R)(R read, string flow_order, string tag="ZF") {
static auto readFlowCall(T)(T tup) {
auto base = tup[0][0][0];
auto called_length = tup[0][1];
auto flow_intensity = tup[1];
auto offset = tup[2];
return ReadFlowCall(flow_intensity, cast(ushort)offset, cast(ushort)called_length, base);
}
ForwardRange!ReadFlowCall readFlowCalls(R)(R read, string flow_order, string key_sequence, string tag="ZF") {
auto zf = cast(int)read[tag];
Value fz_value = read["FZ"];
@ -221,11 +218,22 @@ ForwardRange!ReadFlowCall readFlowCalls(R)(R read, string flow_order, string tag
flow_order = flow_order[zf .. $];
auto intensities = fz[zf .. $];
// key sequence is required because its last base can overlap with first called base
ushort overlap = 0;
Base5 base = read.is_reverse_strand ? read.sequence.back.complement : read.sequence.front;
foreach_reverse (c; key_sequence) {
if (c != base)
break;
overlap += 100;
}
if (!read.is_reverse_strand) {
auto seq = read.sequence;
return inputRangeObject(readFlowCallRange(seq, intensities, flow_order, zf));
return inputRangeObject(readFlowCallRange(seq, intensities, flow_order, overlap, zf));
} else {
auto seq = retro(map!"a.complement"(read.sequence));
return inputRangeObject(readFlowCallRange(seq, intensities, flow_order, zf));
return inputRangeObject(readFlowCallRange(seq, intensities, flow_order, overlap, zf));
}
}

14
test/unittests.d

@ -331,12 +331,15 @@ unittest {
{
fn = buildPath(dirName(__FILE__), "data", "mg1655_chunk.bam");
bf = new BamReader(fn);
auto flow_order = bf.header.read_groups.values.front.flow_order;
auto rg = bf.header.read_groups.values.front;
auto flow_order = rg.flow_order;
auto key_sequence = rg.key_sequence;
auto reads = array(bf.reads);
auto read = reads[1];
assert(!read.is_reverse_strand);
auto basesFZ = basesWith!"FZ"(read, arg!"FZ"(flow_order));
auto basesFZ = basesWith!"FZ"(read, arg!"flowOrder"(flow_order),
arg!"keySequence"(key_sequence));
assert(equal(basesFZ.save, read.sequence));
assert(equal(take(map!"a.flow_call.intensity_value"(basesFZ.save), 92),
[219, 219, 194, 194, 92, 107, 83, 198, 198, 78,
@ -362,10 +365,11 @@ unittest {
if (r.is_unmapped) continue;
if (r.cigar.length == 0) continue;
if (r.is_reverse_strand) {
basesFZ = basesWith!"FZ"(r, arg!"FZ"(flow_order));
assert(equal(basesFZ.save, retro(r.sequence)));
basesFZ = basesWith!"FZ"(r, arg!"flowOrder"(flow_order), arg!"keySequence"(key_sequence));
// if reverse strand, bases are also reverse complemented
assert(equal(basesFZ.save, map!"a.complement"(retro(r.sequence))));
} else {
basesFZ = basesWith!"FZ"(r, arg!"FZ"(flow_order));
basesFZ = basesWith!"FZ"(r, arg!"flowOrder"(flow_order), arg!"keySequence"(key_sequence));
assert(equal(basesFZ.save, r.sequence));
}
}

Loading…
Cancel
Save