You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

599 lines
17 KiB

/*
This file is part of BioD.
Copyright (C) 2012 Artem Tarasov <lomereiter@gmail.com>
BioD is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
BioD is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
module bio.bam.baseinfo;
import bio.core.base;
import bio.core.sequence;
import bio.bam.read;
import bio.bam.tagvalue;
import bio.bam.fz.flowcall;
import bio.bam.md.core;
import std.range;
import std.conv;
import std.traits;
import std.typetuple;
///
enum Option
{
cigarExtra /// adds 'cigar_before' and 'cigar_after' properties to each base
}
///
struct MixinArg(T, string Tag) {
T value;
alias value this;
alias Tag TagName;
}
/// Wrapper for arguments to $(D basesWith) function (see below).
/// Required to distinguish to which tag each parameter refers.
MixinArg!(T, Tag) arg(string Tag, T)(T value) {
return MixinArg!(T, Tag)(value);
}
template staticFilter(alias P, T...)
{
static if (T.length == 0)
alias TypeTuple!() staticFilter;
else static if (P!(T[0]))
alias TypeTuple!(T[0], staticFilter!(P, T[1..$])) staticFilter;
else
alias staticFilter!(P, T[1..$]) staticFilter;
}
template isTag(alias argument)
{
enum isTag = is(typeof(argument) == string);
}
template isOption(alias argument)
{
enum isOption = is(typeof(argument) == Option);
}
struct PerBaseInfo(R, TagsAndOptions...) {
alias staticFilter!(isTag, TagsAndOptions) Tags;
alias staticFilter!(isOption, TagsAndOptions) Options;
private alias TypeTuple!("CIGAR", Tags) Extensions;
// /////////////////////////////////////////////////////////////////////////
//
// Each 'extension' is a template with name TAGbaseInfo, containing
// a couple of mixin templates:
//
// * resultProperties
// These are additional properties provided by the template
//
// * rangeMethods
// These describe how to proceed to the next base.
// The following methods must be implemented:
//
// - void setup(Args...)(const ref R read, Args args);
// Gets called during range construction. All constructor
// arguments are forwarded, and it's this function which
// is responsible for getting required parameters for this
// particular template.
//
// - void populate(Result)(ref Result r);
// Populates fields of the result declared in resultProperties.
// Should run in O(1), just copying a few variables.
// Current base of the result is updated before the call.
//
// - void update(const ref R read);
// Encapsulates logic of moving to the next base and updating
// mixin variables correspondingly.
//
// - void copy(Range)(const ref Range source, ref Range target);
// Gets called during $(D source.save). Therefore, any ranges
// used in mixin templates must be saved as well at that time.
//
// /////////////////////////////////////////////////////////////////////////
private static string getResultProperties(Exts...)() {
char[] result;
foreach (ext; Exts)
result ~= "mixin " ~ ext ~ "baseInfo!(R, Options).resultProperties;".dup;
return cast(string)result;
}
static struct Result {
/// Actual read base, with strand taken into account.
Base base;
alias base this;
string opCast(T)() if (is(T == string))
{
return to!string(base);
}
bool opEquals(T)(T base) const
if (is(Unqual!T == Base))
{
return this.base == base;
}
bool opEquals(T)(T result) const
if (is(Unqual!T == Result))
{
return this == result;
}
bool opEquals(T)(T base) const
if (is(Unqual!T == char) || is(Unqual!T == dchar))
{
return this.base == base;
}
mixin(getResultProperties!Extensions());
}
private static string getRangeMethods(Exts...)() {
char[] result;
foreach (ext; Exts)
result ~= "mixin " ~ ext ~ "baseInfo!(R, Options).rangeMethods " ~ ext ~ ";".dup;
return cast(string)result;
}
mixin(getRangeMethods!Extensions());
private void setup(string tag, Args...)(R read, Args args) {
mixin(tag ~ ".setup(read, args);");
}
private void populate(string tag)(ref Result r) {
mixin(tag ~ ".populate(r);");
}
private void update(string tag)() {
mixin(tag ~ ".update(_read);");
}
private void copy(string tag)(ref typeof(this) other) {
mixin(tag ~ ".copy(this, other);");
}
this(Args...)(R read, Args args) {
_read = read;
_rev = read.is_reverse_strand;
_seq = reversableRange!complementBase(read.sequence, _rev);
foreach (t; Extensions) {
setup!t(read, args);
}
}
bool empty() @property {
return _seq.empty;
}
Result front() @property {
Result r = void;
r.base = _seq.front;
foreach (t; Extensions)
populate!t(r);
return r;
}
void popFront() {
moveToNextBase();
}
PerBaseInfo save() @property {
PerBaseInfo r = void;
r._read = _read.dup;
r._seq = _seq.save;
r._rev = _rev;
foreach (t; Extensions)
copy!t(r);
return r;
}
private void moveToNextBase() {
foreach (t; Extensions) {
update!t();
}
_seq.popFront();
}
/// Returns true if the read is reverse strand,
/// and false otherwise.
bool reverse_strand() @property const {
return _rev;
}
private {
bool _rev = void;
R _read = void;
ReversableRange!(complementBase, typeof(_read.sequence)) _seq = void;
}
}
///
/// Collect per-base information from available tags.
/// Use $(D arg!TagName) to pass a parameter related to a particular tag.
///
/// Example:
///
/// basesWith!"FZ"(arg!"flowOrder"(flow_order), arg!"keySequence"(key_sequence));
///
template basesWith(TagsAndOptions...) {
auto basesWith(R, Args...)(R read, Args args) {
return PerBaseInfo!(R, TagsAndOptions)(read, args);
}
}
/// Provides additional properties
/// * reference_base
/// * md_operation
template MDbaseInfo(R, Options...) {
mixin template resultProperties() {
/// If current CIGAR operation is reference consuming,
/// returns reference base at this position, otherwise
/// returns '-'.
///
/// If read is on '-' strand, the result will be
/// complementary base.
char reference_base() @property {
return _ref_base;
}
/// Current MD operation
MdOperation md_operation() @property {
return _md_op;
}
private {
char _ref_base = void;
MdOperation _md_op = void;
}
}
mixin template rangeMethods() {
private {
ReversableRange!(reverseMdOp, MdOperationRange) _md_ops = void;
uint _match; // remaining length of current match operation
MdOperation _md_front = void;
}
void setup(Args...)(const ref R read, Args args)
{
auto md = read["MD"];
auto md_str = *(cast(string*)&md);
_md_ops = reversableRange!reverseMdOp(mdOperations(md_str),
read.is_reverse_strand);
while (!_md_ops.empty)
{
_md_front = _md_ops.front;
_md_ops.popFront();
if (!_md_front.is_deletion) {
if (_md_front.is_match) {
_match = _md_front.match;
}
break;
}
}
}
void populate(Result)(ref Result result)
{
if (!current_cigar_operation.is_reference_consuming)
{
result._ref_base = '-';
return;
}
MdOperation op = _md_front;
if (op.is_mismatch)
result._ref_base = op.mismatch.asCharacter;
else if (op.is_match) {
result._ref_base = result.base.asCharacter;
}
else assert(0);
result._md_op = op;
}
void update(const ref R read)
{
if (!current_cigar_operation.is_reference_consuming)
return;
if (_md_front.is_mismatch)
{
if (_md_ops.empty)
return;
_md_front = _md_ops.front;
_md_ops.popFront();
}
else if (_md_front.is_match)
{
--_match;
if (_match == 0 && !_md_ops.empty) {
_md_front = _md_ops.front;
_md_ops.popFront();
}
}
else assert(0);
while (_md_front.is_deletion) {
if (_md_ops.empty)
return;
_md_front = _md_ops.front;
_md_ops.popFront();
}
if (_match == 0 && _md_front.is_match)
_match = _md_front.match;
}
void copy(Range)(ref Range source, ref Range target)
{
target.MD._md_ops = source.MD._md_ops.save;
target.MD._md_front = source.MD._md_front;
}
}
}
/// Provides additional property $(D flow_call).
template FZbaseInfo(R, Options...) {
mixin template resultProperties() {
/// Current flow call
ReadFlowCall flow_call() @property const {
return _flow_call;
}
private {
ReadFlowCall _flow_call;
}
}
mixin template rangeMethods() {
private {
ReadFlowCallRange!(BamRead.SequenceResult) _flow_calls = void;
ReadFlowCall _current_flow_call = void;
ushort _at = void;
debug {
string _read_name;
}
}
void setup(Args...)(const ref R read, Args args)
{
string flow_order = void;
string key_sequence = void;
debug {
_read_name = read.name.idup;
}
enum flowOrderExists = staticIndexOf!(MixinArg!(string, "flowOrder"), Args);
enum keySequenceExists = staticIndexOf!(MixinArg!(string, "keySequence"), Args);
static assert(flowOrderExists != -1, `Flow order must be provided via arg!"flowOrder"`);
static assert(keySequenceExists != -1, `Flow order must be provided via arg!"keySequence"`);
foreach (arg; args) {
static if(is(typeof(arg) == MixinArg!(string, "flowOrder")))
flow_order = arg;
static if(is(typeof(arg) == MixinArg!(string, "keySequence")))
key_sequence = arg;
}
_at = 0;
_flow_calls = readFlowCalls(read, flow_order, key_sequence);
if (!_flow_calls.empty) {
_current_flow_call = _flow_calls.front;
}
}
void populate(Result)(ref Result result) {
result._flow_call = _current_flow_call;
debug {
if (result.base != result._flow_call.base) {
import std.stdio;
stderr.writeln("invalid flow call at ", _read_name, ": ", result.position);
}
}
}
void update(const ref R read)
{
++_at;
if (_at == _current_flow_call.length) {
_flow_calls.popFront();
if (!_flow_calls.empty) {
_current_flow_call = _flow_calls.front;
_at = 0;
}
}
}
void copy(Range)(ref Range source, ref Range target) {
target.FZ._flow_calls = source._flow_calls.save();
target.FZ._at = source.FZ._at;
target.FZ._current_flow_call = source._current_flow_call;
debug {
target._read_name = _read_name;
}
}
}
}
/// Provides additional properties
/// * position
/// * cigar_operation
/// * cigar_operation_offset
template CIGARbaseInfo(R, Options...) {
mixin template resultProperties() {
enum CigarExtraProperties = staticIndexOf!(Option.cigarExtra, Options) != -1;
static if (CigarExtraProperties)
{
/// Current CIGAR operation
CigarOperation cigar_operation() @property {
return _cigar[_operation_index];
}
/// CIGAR operations before current one
auto cigar_before() @property {
return _cigar[0 .. _operation_index];
}
/// CIGAR operations after current one
auto cigar_after() @property {
return _cigar[_operation_index + 1 .. _cigar.length];
}
}
else
{
/// Current CIGAR operation
CigarOperation cigar_operation() @property const {
return _current_cigar_op;
}
}
/// Position of the corresponding base on the reference.
/// If current CIGAR operation is not one of 'M', '=', 'X',
/// returns the position of the previous mapped base.
uint position() @property const {
return _reference_position;
}
/// Offset in current CIGAR operation, starting from 0.
uint cigar_operation_offset() @property const {
return _cigar_operation_offset;
}
private {
int _operation_index = void;
uint _reference_position = void;
uint _cigar_operation_offset = void;
static if (CigarExtraProperties)
{
ReversableRange!(identity, const(CigarOperation)[]) _cigar = void;
}
else
{
CigarOperation _current_cigar_op;
}
}
}
mixin template rangeMethods() {
enum CigarExtraProperties = staticIndexOf!(Option.cigarExtra, Options) != -1;
private {
CigarOperation _current_cigar_op = void;
int _index = void;
uint _at = void;
uint _ref_pos = void;
ReversableRange!(identity, const(CigarOperation)[]) _cigar = void;
}
/// Current CIGAR operation, available to all extensions
const(CigarOperation) current_cigar_operation() @property const {
return _current_cigar_op;
}
void setup(Args...)(const ref R read, Args)
{
_cigar = reversableRange(read.cigar, read.is_reverse_strand);
_index = -1;
_ref_pos = reverse_strand ? (read.position + read.basesCovered() - 1)
: read.position;
_moveToNextCigarOperator();
assert(_index >= 0);
}
void populate(Result)(ref Result result) {
result._reference_position = _ref_pos;
result._cigar_operation_offset = _at;
static if (CigarExtraProperties)
{
result._cigar = _cigar;
result._operation_index = _index;
}
else
{
result._current_cigar_op = _current_cigar_op;
}
}
void update(const ref R read)
{
++_at;
if (_current_cigar_op.is_reference_consuming) {
_ref_pos += reverse_strand ? -1 : 1;
}
if (_at == _current_cigar_op.length) {
_moveToNextCigarOperator();
}
}
void copy(Range)(const ref Range source, ref Range target) {
target.CIGAR._cigar = source.CIGAR._cigar;
target.CIGAR._index = source.CIGAR._index;
target.CIGAR._current_cigar_op = source.CIGAR._current_cigar_op;
target.CIGAR._at = source.CIGAR._at;
target.CIGAR._ref_pos = source.CIGAR._ref_pos;
}
private void _moveToNextCigarOperator() {
_at = 0;
for (++_index; _index < _cigar.length; ++_index)
{
_current_cigar_op = _cigar[_index];
if (_current_cigar_op.is_query_consuming)
break;
if (_current_cigar_op.is_reference_consuming)
{
if (reverse_strand)
_ref_pos -= _current_cigar_op.length;
else
_ref_pos += _current_cigar_op.length;
}
}
}
}
}