Browse Source

support for ZM tag

remotes/georgeg/no_streams
lomereiter 9 years ago
parent
commit
9a79ba6eee
  1. 5
      bio/bam/baseinfo.d
  2. 125
      bio/bam/iontorrent/flowcall.d
  3. 2
      bio/bam/iontorrent/flowindex.d

5
bio/bam/baseinfo.d

@ -24,7 +24,7 @@ import bio.core.sequence;
import bio.bam.read;
import bio.bam.tagvalue;
import bio.bam.fz.flowcall;
import bio.bam.iontorrent.flowcall;
import bio.bam.md.core;
import std.range;
@ -555,6 +555,9 @@ template FZbaseInfo(R, Options...) {
}
}
/// Retrieving flow signal intensities from ZM tags is also available.
alias FZbaseInfo ZMbaseInfo;
/// Provides additional properties
/// * position
/// * cigar_operation

125
bio/bam/fz/flowcall.d → bio/bam/iontorrent/flowcall.d

@ -17,10 +17,10 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
module bio.bam.fz.flowcall;
module bio.bam.iontorrent.flowcall;
import bio.bam.tagvalue;
import bio.bam.fz.flowindex;
import bio.bam.iontorrent.flowindex;
import bio.core.base;
import bio.core.utils.range;
@ -29,12 +29,51 @@ import std.array;
import std.typecons;
import std.range;
import std.algorithm;
import std.exception;
/// Tag where flow signal intensities are stored
enum FlowGramTag : ubyte {
FZ,
ZM
}
/// Scale of intensity values
float multiplier(FlowGramTag tag) {
return tag == FlowGramTag.FZ ? 100.0 : 256.0;
}
/// Flow base call
struct FlowCall {
private {
ushort _signal_intensity;
Base _base;
short _signal_intensity;
static assert(Base.ValueSetSize <= 16 && FlowGramTag.max < 16,
"implementation of FlowCall should be changed?");
ubyte _storage; // tag in upper 4 bits, base in lower 4 bits
Base _base() @property const {
return Base.fromInternalCode(_storage & 0xF);
}
void _base(Base b) @property {
_storage &= 0xF0;
_storage |= b.internal_code;
}
FlowGramTag _tag() @property const {
return cast(FlowGramTag)(_storage >> 4);
}
void _tag(FlowGramTag tag) @property {
_storage &= 0xF;
_storage |= (cast(ubyte)tag << 4);
}
this(short signal_intensity, Base b, FlowGramTag tag) {
_signal_intensity = signal_intensity;
_storage = cast(ubyte)(b.internal_code | (tag << 4));
}
}
/// Nucleotide
@ -44,12 +83,13 @@ struct FlowCall {
/// Signal intensity, normalized to homopolymer lengths
float intensity() @property const {
return _signal_intensity / 100.0;
return _signal_intensity / multiplier(_tag);
}
/// round(intensity * 100.0)
/// More efficient, because this is how intensities are stored in FZ tag.
ushort intensity_value() @property const {
/// round(intensity * Multiplier) where Multiplier is 100.0 for FZ tag,
/// and 256.0 for ZM tag.
/// More efficient, because this is how intensities are stored in FZ/ZM tag.
short intensity_value() @property const {
return _signal_intensity;
}
}
@ -57,21 +97,29 @@ struct FlowCall {
/// Flow call associated with a read
struct ReadFlowCall {
private {
Base _base;
ushort _signal_intensity;
FlowCall _fc;
ushort _offset;
ushort _called_len;
ushort _flow_index;
this(Base b, short signal_intensity, ushort offset,
ushort called, ushort flow_index, FlowGramTag tag)
{
_fc = FlowCall(signal_intensity, b, tag);
_offset = offset;
_called_len = called;
_flow_index = flow_index;
}
}
/// Called nucleotide
Base base() @property const {
return _base;
return _fc._base;
}
/// Set base to its complement
void complement() {
_base = _base.complement;
_fc._base = _fc._base.complement;
}
/// Called homopolymer length
@ -87,13 +135,14 @@ struct ReadFlowCall {
/// Signal intensity, normalized to homopolymer lengths
float intensity() @property const {
return _signal_intensity / 100.0;
return _fc.intensity;
}
/// round(intensity * 100.0)
/// More efficient, because this is how intensities are stored in FZ tag.
ushort intensity_value() @property const {
return _signal_intensity;
/// round(intensity * Multiplier) where Multiplier is 100.0 for FZ tags,
/// and 256.0 for ZM tags.
/// More efficient, because this is how intensities are stored in FZ/ZM tag.
short intensity_value() @property const {
return _fc._signal_intensity;
}
/// Flow index (0-based)
@ -103,13 +152,13 @@ struct ReadFlowCall {
}
/// Get flow calls from signal intensities and flow order.
auto flowCalls(ushort[] intensities, string flow_order) {
auto flowCalls(short[] intensities, string flow_order, FlowGramTag tag) {
static FlowCall flowCall(T)(T call) {
return FlowCall(call[0], Base(call[1]));
return FlowCall(call[0], Base(call[1]), call[2]);
}
return map!flowCall(zip(intensities, flow_order));
return map!flowCall(zip(intensities, flow_order, repeat(tag)));
}
struct ReadFlowCallRange(S)
@ -117,7 +166,7 @@ struct ReadFlowCallRange(S)
{
private {
string _flow_order = void;
ushort[] _intensities = void;
short[] _intensities = void;
bool _rev = void;
S _sequence = void;
@ -128,6 +177,8 @@ struct ReadFlowCallRange(S)
ushort _current_offset;
ushort _overlap = void;
FlowGramTag _tag = void;
bool _empty = false;
@ -169,8 +220,8 @@ struct ReadFlowCallRange(S)
}
}
this(S seq, ushort[] intensities, bool reverse_strand,
string flow_order, ushort first_base_overlap, int zf)
this(S seq, short[] intensities, bool reverse_strand,
string flow_order, ushort first_base_overlap, int zf, FlowGramTag tag)
{
_sequence = seq;
_intensities = intensities;
@ -178,6 +229,7 @@ struct ReadFlowCallRange(S)
_flow_order = flow_order;
_zf = zf;
_overlap = first_base_overlap;
_tag = tag;
if (_sequence.empty) {
_empty = true;
@ -193,11 +245,10 @@ struct ReadFlowCallRange(S)
ReadFlowCall front() @property const {
auto intensity = cast(ushort)(_intensities[_current_flow_index] - _overlap);
ReadFlowCall rfc = void;
rfc._signal_intensity = intensity;
rfc._fc = FlowCall(intensity, _current_base, _tag);
rfc._offset = _current_offset;
rfc._called_len = _current_length;
rfc._flow_index = cast(ushort)(_current_flow_index + _zf);
rfc._base = _current_base;
return rfc;
}
@ -218,10 +269,11 @@ struct ReadFlowCallRange(S)
}
}
private ReadFlowCallRange!S readFlowCallRange(S)(S seq, ushort[] intensities, bool rev,
string flow_order, ushort overlap, int zf)
private ReadFlowCallRange!S readFlowCallRange(S)(S seq, short[] intensities, bool rev,
string flow_order, ushort overlap, int zf,
FlowGramTag tag)
{
return ReadFlowCallRange!S(seq, intensities, rev, flow_order, overlap, zf);
return ReadFlowCallRange!S(seq, intensities, rev, flow_order, overlap, zf, tag);
}
@ -233,11 +285,18 @@ private ReadFlowCallRange!S readFlowCallRange(S)(S seq, ushort[] intensities, bo
auto readFlowCalls(R)(R read, string flow_order, string key_sequence, string tag="ZF") {
auto zf = cast(int)read[tag];
Value fz_value = read["FZ"];
ushort[] fz = *cast(ushort[]*)(&fz_value);
auto fz_value = read["FZ"];
auto zm_value = read["ZM"];
enforce(!(fz_value.is_nothing && zm_value.is_nothing),
"Neither FZ nor ZM tag is presented in a mapped read");
auto fg_tag = fz_value.is_nothing ? FlowGramTag.ZM : FlowGramTag.FZ;
short[] flow_int = *cast(short[]*)(fg_tag == FlowGramTag.ZM ? &zm_value : &fz_value);
flow_order = flow_order[zf .. $];
auto intensities = fz[zf .. $];
auto intensities = flow_int[zf .. $];
// key sequence is required because its last base can overlap with first called base
ushort overlap = 0;
@ -246,9 +305,9 @@ auto readFlowCalls(R)(R read, string flow_order, string key_sequence, string tag
foreach_reverse (c; key_sequence) {
if (c != base)
break;
overlap += 100;
overlap += cast(int)(multiplier(fg_tag));
}
return readFlowCallRange(read.sequence, intensities, read.is_reverse_strand,
flow_order, overlap, zf);
flow_order, overlap, zf, fg_tag);
}

2
bio/bam/fz/flowindex.d → bio/bam/iontorrent/flowindex.d

@ -17,7 +17,7 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
module bio.bam.fz.flowindex;
module bio.bam.iontorrent.flowindex;
import std.range;
Loading…
Cancel
Save