Browse Source

Merge pull request #25 from pjotrp/master

Add compareCoordinatesAndStrand to fix sorting test in depth
bio2
Artem Tarasov 5 years ago
committed by GitHub
parent
commit
da892d9e31
  1. 26
      bio/bam/multireader.d
  2. 274
      bio/bam/read.d

26
bio/bam/multireader.d

@ -8,10 +8,10 @@
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@ -66,7 +66,7 @@ bool compare(T)(auto ref T r1, auto ref T r2) {
sorting_order = r1[1].merged_header.sorting_order;
if (sorting_order == SortingOrder.coordinate)
return compareCoordinates(r1[0], r2[0]);
return compareCoordinatesAndStrand(r1[0], r2[0]);
else if (sorting_order == SortingOrder.queryname)
return compareReadNames(r1[0], r2[0]);
else
@ -86,10 +86,10 @@ auto readRange(BamReader reader, SamHeaderMerger merger, FileId index) {
return zip(reader.reads.multiBamReads(index), repeat(merger), repeat(index));
}
// (BamReader, SamHeaderMerger, FileId, int, uint, uint) ->
// (BamReader, SamHeaderMerger, FileId, int, uint, uint) ->
// [(MultiBamRead, SamHeaderMerger, FileId)]
auto readRange(BamReader reader, SamHeaderMerger merger, FileId index,
int ref_id, uint start, uint end)
int ref_id, uint start, uint end)
{
int old_ref_id = ref_id;
if (merger !is null)
@ -139,12 +139,12 @@ auto readRangesWithProgress
.map!(t => readRangeWithProgress(t[0], t[1], t[2], f, u(t[2])))();
}
// ([BamReader], SamHeaderMerger, int, uint, uint) ->
// ([BamReader], SamHeaderMerger, int, uint, uint) ->
// [[(MultiBamRead, SamHeaderMerger, FileId)]]
auto readRanges(BamReader[] readers, SamHeaderMerger merger,
int ref_id, uint start, uint end)
auto readRanges(BamReader[] readers, SamHeaderMerger merger,
int ref_id, uint start, uint end)
{
return readers.zip(repeat(merger), iota(readers.length),
return readers.zip(repeat(merger), iota(readers.length),
repeat(ref_id), repeat(start), repeat(end))
.map!(t => readRange(t[0], t[1], t[2], t[3], t[4], t[5]))();
}
@ -160,8 +160,8 @@ auto readRanges(BamReader[] readers, SamHeaderMerger merger, BamRegion[] regions
// tweaks RG and PG tags, and reference sequence ID
// [[(BamRead, SamHeaderMerger, size_t)]] -> [[MultiBamRead]]
auto adjustTags(R)(R reads_with_aux_info, TaskPool pool, size_t bufsize)
if (isInputRange!R)
auto adjustTags(R)(R reads_with_aux_info, TaskPool pool, size_t bufsize)
if (isInputRange!R)
{
alias R2 = typeof(pool.map!adjustTagsInRange(reads_with_aux_info.front, 1));
R2[] result;
@ -189,7 +189,7 @@ auto adjustTagsInRange(R)(R read_with_aux_info) if (!isInputRange!R) {
auto new_ref_id = to!int(ref_id_map[file_id][old_ref_id]);
if (new_ref_id != old_ref_id)
read.ref_id = new_ref_id;
}
}
auto program = read["PG"];
if (!program.is_nothing) {
@ -216,7 +216,7 @@ auto adjustTagsInRange(R)(R read_with_aux_info) if (!isInputRange!R) {
///
class MultiBamReader {
///
this(BamReader[] readers) {
_readers = readers;

274
bio/bam/read.d

@ -8,10 +8,10 @@
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@ -31,7 +31,7 @@
/// ...
/// assert(!read.is_unmapped); // check flag
/// assert(read.ref_id != -1); // access field
///
///
/// int edit_distance = to!int(read["NM"]); // access tag
/// read["NM"] = 0; // modify tag
/// read["NM"] = null; // remove tag
@ -82,14 +82,14 @@ struct CigarOperation {
/*
WARNING!
It is very essential that the size of
It is very essential that the size of
this struct is EXACTLY equal to uint.sizeof!
The reason is to avoid copying of arrays during alignment parsing.
Namely, when some_pointer points to raw cigar data,
we can just do a cast. This allows to access those data
directly, not doing any memory allocations.
directly, not doing any memory allocations.
*/
private uint raw; // raw data from BAM
@ -121,7 +121,7 @@ struct CigarOperation {
uint length() @property const nothrow {
return raw >> 4;
}
/// CIGAR operation as one of MIDNSHP=X.
/// Absent or invalid operation is represented by '?'
char type() @property const nothrow {
@ -153,7 +153,7 @@ struct CigarOperation {
return ((raw & 0xF) >> 1) == 2; // 4 or 5
}
private void toSam(Sink)(auto ref Sink sink) const
private void toSam(Sink)(auto ref Sink sink) const
if (isSomeSink!Sink)
{
sink.write(length);
@ -170,7 +170,7 @@ struct CigarOperation {
struct ExtendedCigarRange(CigarOpRange, MdOpRange) {
static assert(isInputRange!CigarOpRange && is(Unqual!(ElementType!CigarOpRange) == CigarOperation));
static assert(isInputRange!MdOpRange && is(Unqual!(ElementType!MdOpRange) == MdOperation));
private {
CigarOpRange _cigar;
MdOpRange _md_ops;
@ -199,7 +199,7 @@ struct ExtendedCigarRange(CigarOpRange, MdOpRange) {
import std.stdio;
writeln(_front_cigar_op, " - ", _front_md_op);
}
if (_front_cigar_op.type != 'M')
return _front_cigar_op;
@ -208,7 +208,7 @@ struct ExtendedCigarRange(CigarOpRange, MdOpRange) {
uint len = min(_front_md_op.match, _front_cigar_op.length);
return CigarOperation(len, '=');
}
assert(_front_md_op.is_mismatch);
return CigarOperation(min(_n_mismatches, _front_cigar_op.length), 'X');
}
@ -237,7 +237,7 @@ struct ExtendedCigarRange(CigarOpRange, MdOpRange) {
auto len = _front_cigar_op.length;
if (_n_mismatches > 0) {
enforce(_front_md_op.is_mismatch);
if (len > _n_mismatches) {
_front_cigar_op = CigarOperation(len - _n_mismatches, 'M');
_n_mismatches = 0;
@ -252,7 +252,7 @@ struct ExtendedCigarRange(CigarOpRange, MdOpRange) {
} else {
enforce(_front_md_op.is_match);
auto n_matches = _front_md_op.match;
if (len > n_matches) {
_front_cigar_op = CigarOperation(len - n_matches, 'M');
fetchNextMdOp();
@ -265,14 +265,14 @@ struct ExtendedCigarRange(CigarOpRange, MdOpRange) {
}
}
}
private {
void fetchNextCigarOp() {
if (_cigar.empty) {
_empty = true;
return;
}
_front_cigar_op = _cigar.front;
_cigar.popFront();
}
@ -282,7 +282,7 @@ struct ExtendedCigarRange(CigarOpRange, MdOpRange) {
return;
_n_mismatches = 0;
_front_md_op = _md_ops.front;
_md_ops.popFront();
@ -301,7 +301,7 @@ auto makeExtendedCigar(CigarOpRange, MdOpRange)(CigarOpRange cigar, MdOpRange md
return ExtendedCigarRange!(CigarOpRange, MdOpRange)(cigar, md_ops);
}
/**
/**
BAM record representation.
*/
struct BamRead {
@ -377,42 +377,42 @@ struct BamRead {
/// Next segment in the template unmapped
@property bool mate_is_unmapped() const nothrow { return cast(bool)(flag & 0x8); }
/// ditto
@property void mate_is_unmapped(bool b) { _setFlag( 3, b); }
@property void mate_is_unmapped(bool b) { _setFlag( 3, b); }
/// Sequence being reverse complemented
@property bool is_reverse_strand() const nothrow { return cast(bool)(flag & 0x10); }
/// ditto
@property void is_reverse_strand(bool b) { _setFlag( 4, b); }
@property void is_reverse_strand(bool b) { _setFlag( 4, b); }
/// Sequence of the next segment in the template being reversed
@property bool mate_is_reverse_strand() const nothrow { return cast(bool)(flag & 0x20); }
/// ditto
@property void mate_is_reverse_strand(bool b) { _setFlag( 5, b); }
@property void mate_is_reverse_strand(bool b) { _setFlag( 5, b); }
/// The first segment in the template
@property bool is_first_of_pair() const nothrow { return cast(bool)(flag & 0x40); }
/// ditto
@property void is_first_of_pair(bool b) { _setFlag( 6, b); }
@property void is_first_of_pair(bool b) { _setFlag( 6, b); }
/// The last segment in the template
@property bool is_second_of_pair() const nothrow { return cast(bool)(flag & 0x80); }
/// ditto
@property void is_second_of_pair(bool b) { _setFlag( 7, b); }
@property void is_second_of_pair(bool b) { _setFlag( 7, b); }
/// Secondary alignment
@property bool is_secondary_alignment() const nothrow { return cast(bool)(flag & 0x100); }
/// ditto
@property void is_secondary_alignment(bool b) { _setFlag( 8, b); }
@property void is_secondary_alignment(bool b) { _setFlag( 8, b); }
/// Not passing quality controls
@property bool failed_quality_control() const nothrow { return cast(bool)(flag & 0x200); }
/// ditto
@property void failed_quality_control(bool b) { _setFlag( 9, b); }
@property void failed_quality_control(bool b) { _setFlag( 9, b); }
/// PCR or optical duplicate
@property bool is_duplicate() const nothrow { return cast(bool)(flag & 0x400); }
/// ditto
@property void is_duplicate(bool b) { _setFlag(10, b); }
@property void is_duplicate(bool b) { _setFlag(10, b); }
/// Supplementary alignment
@property bool is_supplementary() const nothrow { return cast(bool)(flag & 0x800); }
@ -438,10 +438,10 @@ struct BamRead {
/// ditto
@property void name(string new_name) {
enforce(new_name.length >= 1 && new_name.length <= 255,
enforce(new_name.length >= 1 && new_name.length <= 255,
"name length must be in 1-255 range");
_dup();
bio.bam.utils.array.replaceSlice(_chunk,
bio.bam.utils.array.replaceSlice(_chunk,
_chunk[_read_name_offset .. _read_name_offset + _l_read_name - 1],
cast(ubyte[])new_name);
_l_read_name = cast(ubyte)(new_name.length + 1);
@ -449,7 +449,7 @@ struct BamRead {
/// List of CIGAR operations
@property const(CigarOperation)[] cigar() const nothrow {
return cast(const(CigarOperation)[])(_chunk[_cigar_offset .. _cigar_offset +
return cast(const(CigarOperation)[])(_chunk[_cigar_offset .. _cigar_offset +
_n_cigar_op * CigarOperation.sizeof]);
}
@ -534,32 +534,32 @@ struct BamRead {
return opIndex(_len - 1);
}
/*
/*
I have no fucking idea why this tiny piece of code
does NOT get inlined by stupid DMD compiler.
Therefore I use string mixin instead.
Therefore I use string mixin instead.
(hell yeah! Back to the 90s! C macros rulez!)
private size_t _getActualPosition(size_t index) const
{
if (_use_first_4_bits) {
// [0 1] [2 3] [4 5] [6 7] ...
// |
// V
// 0 1 2 3
// |
// V
// 0 1 2 3
return index >> 1;
} else {
// [. 0] [1 2] [3 4] [5 6] ...
// |
// V
// 0 1 2 3
// |
// V
// 0 1 2 3
return (index >> 1) + (index & 1);
}
}*/
}*/
private static string _getActualPosition(string index) {
return "((" ~ index ~") >> 1) + " ~
return "((" ~ index ~") >> 1) + " ~
"(_use_first_4_bits ? 0 : ((" ~ index ~ ") & 1))";
}
@ -574,15 +574,15 @@ struct BamRead {
///
@property SequenceResult save() const {
return SequenceResult(_data[mixin(_getActualPosition("_index")) .. $],
_len - _index,
return SequenceResult(_data[mixin(_getActualPosition("_index")) .. $],
_len - _index,
_useFirst4Bits(_index));
}
///
SequenceResult opSlice(size_t i, size_t j) const {
return SequenceResult(_data[mixin(_getActualPosition("_index + i")) .. $],
j - i,
return SequenceResult(_data[mixin(_getActualPosition("_index + i")) .. $],
j - i,
_useFirst4Bits(_index + i));
}
@ -668,7 +668,7 @@ struct BamRead {
static assert(isRandomAccessRange!(ReturnType!sequence));
/// Sets query sequence. Sets all base qualities to 255 (i.e. unknown).
@property void sequence(string seq)
@property void sequence(string seq)
{
_dup();
@ -683,8 +683,8 @@ struct BamRead {
replacement[i] |= cast(ubyte)(Base(seq[2 * i + 1]).internal_code);
}
bio.bam.utils.array.replaceSlice(_chunk,
_chunk[_seq_offset .. _tags_offset],
bio.bam.utils.array.replaceSlice(_chunk,
_chunk[_seq_offset .. _tags_offset],
replacement);
_l_seq = cast(int)seq.length;
@ -715,7 +715,7 @@ struct BamRead {
// 3) the code will be too complicated, whereas there're
// not so many users of big-endian systems
//
// In summa, BAM is little-endian format, so big-endian
// In summa, BAM is little-endian format, so big-endian
// users will suffer anyway, it's unavoidable.
_chunk = chunk;
@ -727,33 +727,33 @@ struct BamRead {
// Dealing with tags is the responsibility of TagStorage.
fixTagStorageByteOrder();
}
}
}
// Doesn't touch tags, only fields.
// Doesn't touch tags, only fields.
// @@@TODO: NEEDS TESTING@@@
private void switchChunkEndianness() {
// First 8 fields are 32-bit integers:
//
// 0) refID int
// 1) pos int
// 2) bin_mq_nl uint
// 3) flag_nc uint
// 4) l_seq int
// 5) next_refID int
// 6) next_pos int
// 7) tlen int
// First 8 fields are 32-bit integers:
//
// 0) refID int
// 1) pos int
// 2) bin_mq_nl uint
// 3) flag_nc uint
// 4) l_seq int
// 5) next_refID int
// 6) next_pos int
// 7) tlen int
// ----------------------------------------------------
// (after them name follows which is string)
//
// (after them name follows which is string)
//
switchEndianness(_chunk.ptr, 8 * uint.sizeof);
// Then we need to switch endianness of CIGAR data:
switchEndianness(_chunk.ptr + _cigar_offset,
switchEndianness(_chunk.ptr + _cigar_offset,
_n_cigar_op * uint.sizeof);
}
private size_t calculateChunkSize(string read_name,
string sequence,
private size_t calculateChunkSize(string read_name,
string sequence,
in CigarOperation[] cigar)
{
return 8 * int.sizeof
@ -776,7 +776,7 @@ struct BamRead {
if (this._chunk is null) {
this._chunk = new ubyte[calculateChunkSize(read_name, sequence, cigar)];
}
this._refID = -1; // set default values
this._pos = -1; // according to SAM/BAM
this._mapq = 255; // specification
@ -817,7 +817,7 @@ struct BamRead {
return result;
}
/// Compare two alignments, including tags
/// Compare two alignments, including tags
/// (the tags must follow in the same order for equality).
bool opEquals(BamRead other) const pure nothrow {
// don't forget about _is_slice trick
@ -835,7 +835,7 @@ struct BamRead {
@property size_t size_in_bytes() const {
return int.sizeof + _chunk.length;
}
package void write(BamWriter writer) {
writer.writeInteger(cast(int)(_chunk.length));
@ -858,7 +858,7 @@ struct BamRead {
/// Packs message in the following format:
/// $(BR)
/// MsgPack array with elements
/// $(OL
/// $(OL
/// $(LI name - string)
/// $(LI flag - ushort)
/// $(LI reference sequence id - int)
@ -916,9 +916,9 @@ struct BamRead {
throw new FormatException("unknown format specifier");
}
}
/// ditto
void toSam(Sink)(auto ref Sink sink) const
void toSam(Sink)(auto ref Sink sink) const
if (isSomeSink!Sink)
{
sink.write(name);
@ -1025,10 +1025,10 @@ struct BamRead {
} else {
sink.writeJson(_reader.reference_sequences[mate_ref_id].name);
}
sink.write(`,"pnext":`); sink.write(mate_position + 1);
sink.write(`,"tlen":`); sink.write(template_length);
sink.write(`,"seq":"`);
if (sequence_length == 0)
sink.write('*');
@ -1041,7 +1041,7 @@ struct BamRead {
sink.writeJson(base_qualities);
sink.write(`,"tags":{`);
bool not_first = false;
foreach (k, v; this) {
if (not_first)
@ -1087,8 +1087,8 @@ struct BamRead {
void raw_data(ubyte[] data) @property {
_chunk = data;
}
package ubyte[] _chunk; // holds all the data,
package ubyte[] _chunk; // holds all the data,
// the access is organized via properties
// (see below)
@ -1130,36 +1130,36 @@ private:
// Official field names from SAM/BAM specification.
// For internal use only
@property int _refID() const nothrow {
return *(cast( int*)(_chunk.ptr + int.sizeof * 0));
@property int _refID() const nothrow {
return *(cast( int*)(_chunk.ptr + int.sizeof * 0));
}
@property int _pos() const nothrow {
return *(cast( int*)(_chunk.ptr + int.sizeof * 1));
@property int _pos() const nothrow {
return *(cast( int*)(_chunk.ptr + int.sizeof * 1));
}
@property uint _bin_mq_nl() const nothrow pure @system {
return *(cast(uint*)(_chunk.ptr + int.sizeof * 2));
@property uint _bin_mq_nl() const nothrow pure @system {
return *(cast(uint*)(_chunk.ptr + int.sizeof * 2));
}
@property uint _flag_nc() const nothrow {
return *(cast(uint*)(_chunk.ptr + int.sizeof * 3));
@property uint _flag_nc() const nothrow {
return *(cast(uint*)(_chunk.ptr + int.sizeof * 3));
}
@property int _l_seq() const nothrow {
return *(cast( int*)(_chunk.ptr + int.sizeof * 4));
@property int _l_seq() const nothrow {
return *(cast( int*)(_chunk.ptr + int.sizeof * 4));
}
@property int _next_refID() const nothrow {
return *(cast( int*)(_chunk.ptr + int.sizeof * 5));
return *(cast( int*)(_chunk.ptr + int.sizeof * 5));
}
@property int _next_pos() const nothrow {
return *(cast( int*)(_chunk.ptr + int.sizeof * 6));
@property int _next_pos() const nothrow {
return *(cast( int*)(_chunk.ptr + int.sizeof * 6));
}
@property int _tlen() const nothrow {
return *(cast( int*)(_chunk.ptr + int.sizeof * 7));
return *(cast( int*)(_chunk.ptr + int.sizeof * 7));
}
// Setters, also only for internal use
@ -1176,29 +1176,29 @@ private:
//
// The layout of bin_mq_nl and flag_nc is as follows
// (upper bits -------> lower bits):
//
//
// bin_mq_nl [ { bin (16b) } { mapping quality (8b) } { read name length (8b) } ]
//
// flag_nc [ { flag (16b) } { n_cigar_op (16b) } ]
//
@property ushort _bin() const nothrow {
return _bin_mq_nl >> 16;
@property ushort _bin() const nothrow {
return _bin_mq_nl >> 16;
}
@property ubyte _mapq() const nothrow {
return (_bin_mq_nl >> 8) & 0xFF;
@property ubyte _mapq() const nothrow {
return (_bin_mq_nl >> 8) & 0xFF;
}
@property ubyte _l_read_name() const nothrow pure {
return _bin_mq_nl & 0xFF;
@property ubyte _l_read_name() const nothrow pure {
return _bin_mq_nl & 0xFF;
}
@property ushort _flag() const nothrow {
return _flag_nc >> 16;
@property ushort _flag() const nothrow {
return _flag_nc >> 16;
}
@property ushort _n_cigar_op() const nothrow {
return _flag_nc & 0xFFFF;
@property ushort _n_cigar_op() const nothrow {
return _flag_nc & 0xFFFF;
}
// Setters for those properties
@property void _bin(ushort n) { _bin_mq_nl = (_bin_mq_nl & 0xFFFF) | (n << 16); }
@property void _bin(ushort n) { _bin_mq_nl = (_bin_mq_nl & 0xFFFF) | (n << 16); }
@property void _mapq(ubyte n) { _bin_mq_nl = (_bin_mq_nl & ~0xFF00) | (n << 8); }
@property void _l_read_name(ubyte n) { _bin_mq_nl = (_bin_mq_nl & ~0xFF ) | n; }
@property void _flag(ushort n) { _flag_nc = (_flag_nc & 0xFFFF) | (n << 16); }
@ -1207,24 +1207,24 @@ private:
// Offsets of various arrays in bytes.
// Currently, are computed each time, so if speed will be an issue,
// they can be made fields instead of properties.
@property size_t _read_name_offset() const nothrow pure {
return 8 * int.sizeof;
@property size_t _read_name_offset() const nothrow pure {
return 8 * int.sizeof;
}
@property size_t _cigar_offset() const nothrow pure {
return _read_name_offset + _l_read_name * char.sizeof;
@property size_t _cigar_offset() const nothrow pure {
return _read_name_offset + _l_read_name * char.sizeof;
}
@property size_t _seq_offset() const nothrow {
return _cigar_offset + _n_cigar_op * uint.sizeof;
@property size_t _seq_offset() const nothrow {
return _cigar_offset + _n_cigar_op * uint.sizeof;
}
@property size_t _qual_offset() const nothrow {
@property size_t _qual_offset() const nothrow {
return _seq_offset + (_l_seq + 1) / 2;
}
// Offset of auxiliary data
@property size_t _tags_offset() const nothrow {
@property size_t _tags_offset() const nothrow {
return _qual_offset + _l_seq;
}
@ -1257,7 +1257,7 @@ public:
}
/// Lazy tag storage.
/// Lazy tag storage.
///
/// Provides hash-like access and opportunity to iterate
/// storage like an associative array.
@ -1298,7 +1298,7 @@ mixin template TagStorage() {
auto __tags_chunk = _tags_chunk; // _tags_chunk is evaluated lazily
if (__tags_chunk.length < 4)
return Value(null);
size_t offset = 0;
while (offset + 1 < __tags_chunk.length) {
if (__tags_chunk[offset .. offset + 2] == key) {
@ -1313,8 +1313,8 @@ mixin template TagStorage() {
}
/// ditto
void opIndexAssign(T)(T value, string key)
if (is(T == Value) || __traits(compiles, GetTypeId!T))
void opIndexAssign(T)(T value, string key)
if (is(T == Value) || __traits(compiles, GetTypeId!T))
{
static if(is(T == Value)) {
enforce(key.length == 2, "Key length must be 2");
@ -1378,7 +1378,7 @@ mixin template TagStorage() {
auto __tags_chunk = _tags_chunk;
skipValue(offset, __tags_chunk); // now offset is updated and points to the end
auto end = offset;
prepareSlice(_chunk, __tags_chunk[begin .. end], sizeInBytes(value));
emplaceValue(_chunk.ptr + _tags_offset + begin, value);
@ -1428,9 +1428,9 @@ mixin template TagStorage() {
if (std.system.endian == Endian.littleEndian) {
writer.writeByteArray(_tags_chunk[]);
} else {
fixTagStorageByteOrder();
fixTagStorageByteOrder();
writer.writeByteArray(_tags_chunk[]);
fixTagStorageByteOrder();
fixTagStorageByteOrder();
}
}
@ -1486,7 +1486,7 @@ mixin template TagStorage() {
p += size;
}
} else {
// skip
// skip
p += length;
}
} else {
@ -1509,8 +1509,8 @@ unittest {
import std.math;
writeln("Testing BamRead behaviour...");
auto read = BamRead("readname",
"AGCTGACTACGTAATAGCCCTA",
auto read = BamRead("readname",
"AGCTGACTACGTAATAGCCCTA",
[CigarOperation(22, 'M')]);
assert(read.sequence_length == 22);
assert(read.cigar.length == 1);
@ -1522,7 +1522,7 @@ unittest {
assert(read.name == "anothername");
assert(read.cigarString() == "22M");
read.base_qualities = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
read.base_qualities = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22];
assert(reduce!"a+b"(0, read.base_qualities) == 253);
@ -1602,7 +1602,7 @@ unittest {
/// $(P The idea is that this should be a drop-in replacement for BamRead in algorithms,
/// as the struct uses 'alias this' construction for the wrapped read.)
struct EagerBamRead(R=BamRead) {
///
///
this(R read) {
this.read = read;
this.end_position = read.position + read.basesCovered();
@ -1612,7 +1612,7 @@ struct EagerBamRead(R=BamRead) {
R read;
///
alias read this;
/// End position on the reference, computed as position + basesCovered().
int end_position;
@ -1629,11 +1629,11 @@ template isBamRead(T)
{
static if (is(Unqual!T : BamRead))
enum isBamRead = true;
else
enum isBamRead = __traits(compiles,
else
enum isBamRead = __traits(compiles,
{
T t; bool p;
p = t.ref_id == 1; p = t.position == 2; p = t.bin.id == 3;
p = t.ref_id == 1; p = t.position == 2; p = t.bin.id == 3;
p = t.mapping_quality == 4; p = t.flag == 5; p = t.sequence_length == 6;
p = t.mate_ref_id == 7; p = t.mate_position == 8; p = t.template_length == 9;
p = t.is_paired; p = t.proper_pair; p = t.is_unmapped;
@ -1649,22 +1649,22 @@ template isBamRead(T)
/// (return whether first read is 'less' than second))
///
/// $(P This function can be called on:
/// $(UL
/// $(UL
/// $(LI two reads)
/// $(LI read and string in any order)))
bool compareReadNames(R1, R2)(const auto ref R1 a1, const auto ref R2 a2)
bool compareReadNames(R1, R2)(const auto ref R1 a1, const auto ref R2 a2)
if (isBamRead!R1 && isBamRead!R2)
{
return a1.name < a2.name;
}
bool compareReadNames(R1, R2)(const auto ref R1 a1, const auto ref R2 a2)
bool compareReadNames(R1, R2)(const auto ref R1 a1, const auto ref R2 a2)
if (isBamRead!R1 && isSomeString!R2)
{
return a1.name < a2;
}
bool compareReadNames(R1, R2)(const auto ref R1 a1, const auto ref R2 a2)
bool compareReadNames(R1, R2)(const auto ref R1 a1, const auto ref R2 a2)
if (isSomeString!R1 && isBamRead!R2)
{
return a1 < a2.name;
@ -1747,10 +1747,12 @@ unittest {
/// (returns whether first read is 'less' than second))
///
/// $(P This function can be called on:
/// $(UL
/// $(UL
/// $(LI two reads (in this case, reference IDs are also taken into account))
/// $(LI read and integer in any order)))
bool compareCoordinates(R1, R2)(const auto ref R1 a1, const auto ref R2 a2)
/// This function takes read direction into account (used for original samtools style sorting)
bool compareCoordinatesAndStrand(R1, R2)(const auto ref R1 a1, const auto ref R2 a2)
if (isBamRead!R1 && isBamRead!R2)
{
if (a1.ref_id == -1) return false; // unmapped reads should be last
@ -1762,6 +1764,16 @@ bool compareCoordinates(R1, R2)(const auto ref R1 a1, const auto ref R2 a2)
return !a1.is_reverse_strand && a2.is_reverse_strand;
}
bool compareCoordinates(R1, R2)(const auto ref R1 a1, const auto ref R2 a2)
if (isBamRead!R1 && isBamRead!R2)
{
if (a1.ref_id == -1) return false; // unmapped reads should be last
if (a2.ref_id == -1) return true;
if (a1.ref_id < a2.ref_id) return true;
if (a1.ref_id > a2.ref_id) return false;
return (a1.position < a2.position);
}
bool compareCoordinates(R1, R2)(const auto ref R1 a1, const auto ref R2 a2)
if (isBamRead!R1 && isIntegral!R2)
{

Loading…
Cancel
Save