You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

210 lines
6.0 KiB

/*
This file is part of BioD.
Copyright (C) 2012 Artem Tarasov <lomereiter@gmail.com>
BioD is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
BioD is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
module bio.bam.reference;
import bio.bam.randomaccessmanager;
import bio.bam.readrange;
import bio.bam.virtualoffset;
import std.stream;
import std.exception;
import std.array;
/**
Stores reference sequence name and length
*/
struct ReferenceSequenceInfo {
string name;
int length;
/**
Constructs the structure from input stream
*/
this(ref Stream stream) {
int l_name; // length of the reference name plus one
stream.read(l_name);
name = stream.readString(l_name)[0..$-1].idup; // strip '\0' at the end
stream.read(length);
}
}
/**
Represents reference sequence.
*/
struct ReferenceSequence {
/// Name of reference sequence as in BAM file
string name() @property const {
return _info.name;
}
/// Length in base pairs
int length() @property const {
return _info.length;
}
/// Reference ID
int id() @property const {
return _ref_id;
}
/// Get alignments overlapping [start, end)
auto opSlice(uint start, uint end) {
enforce(start < end, "start must be less than end");
enforce(_manager !is null, "random access is not available");
return _manager.getReads(_ref_id, start, end);
}
/// Get all alignments
auto opSlice() {
return opSlice(0, length);
}
private alias typeof(opSlice().front) Read;
private Read _first_read() @property {
return opSlice().front.dup;
}
/// First position on the reference overlapped by reads (0-based)
/// Returns -1 if set of reads is empty.
int firstPosition() {
auto reads = opSlice();
if (reads.empty) {
return -1;
}
return reads.front.position;
}
/// Virtual offset at which reads aligned to this reference start.
/// If there are no reads aligned to this reference, returns virtual
/// offset of the EOF block if it's presented, or the end of file.
VirtualOffset startVirtualOffset() {
auto reads = opSlice();
if (reads.empty) {
return _manager.eofVirtualOffset();
}
return reads.front.start_virtual_offset;
}
/// Virtual offset before which reads aligned to this reference stop.
/// If there are no reads aligned to this reference, returns virtual
/// offset of the EOF block if it's presented, or the end of file.
VirtualOffset endVirtualOffset() {
if (opSlice().empty) {
return _manager.eofVirtualOffset();
}
auto ioffsets = _manager.getBai().indices[_ref_id].ioffsets[];
assert(ioffsets.length > 0);
// Try to get startVirtualOffset of the next reference presented in the file.
for (auto r = _ref_id + 1; r < _manager.getBai().indices.length; ++r) {
auto reads = _manager.getReads(r, 0, uint.max);
if (reads.empty) {
continue;
} else {
return reads.front.start_virtual_offset;
}
}
// However, this approach fails if there are unmapped reads coming after
// this reference. We cannot just return _manager.eofVirtualOffset.
auto last_offset = ioffsets[$ - 1];
auto stream = _manager.createStreamStartingFrom(last_offset);
auto last_few_reads = bamReadRange!withOffsets(stream);
VirtualOffset result;
assert(!last_few_reads.empty);
foreach (read; last_few_reads) {
result = read.end_virtual_offset;
}
return result;
}
/// Last position on the reference overlapped by reads (0-based)
int lastPosition() {
// The key idea is
// 1) use last offset from linear index
// 2) loop through all remaining reads starting from there
auto ioffsets = _manager.getBai().indices[_ref_id].ioffsets[];
long index = ioffsets.length - 1;
debug {
int reads_processed = 0;
}
while (index >= 0) {
auto offset = ioffsets[index];
auto stream = _manager.createStreamStartingFrom(offset);
auto reads = bamReadRange(stream);
int last_position = int.min;
foreach (read; reads) {
debug {
reads_processed += 1;
}
if (read.ref_id != _ref_id) {
break;
}
if (read.position == -1) {
continue;
}
auto end_pos = read.position + read.basesCovered();
if (end_pos > last_position)
last_position = end_pos;
}
if (last_position != int.min) {
debug {
import std.stdio;
stderr.writeln("[debug] ReferenceSequence.lastPosition() processed ",
reads_processed, " reads");
}
return last_position - 1;
}
--index;
}
return firstPosition();
}
this(RandomAccessManager manager, int ref_id, ReferenceSequenceInfo info) {
_manager = manager;
_ref_id = ref_id;
_info = info;
}
private:
RandomAccessManager _manager;
int _ref_id;
ReferenceSequenceInfo _info;
}