You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

148 lines
4.2 KiB

/*
This file is part of BioD.
Copyright (C) 2012 Artem Tarasov <lomereiter@gmail.com>
BioD is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
BioD is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
module bio.bam.readrange;
import bio.bam.virtualoffset;
import bio.bam.chunkinputstream;
import bio.bam.read;
import bio.core.utils.switchendianness;
import std.stream;
import std.algorithm;
import std.system;
/// Tuple of virtual offset of the read, and the read itself.
struct BamReadBlock {
VirtualOffset start_virtual_offset;
VirtualOffset end_virtual_offset;
BamRead bamRead;
alias bamRead this;
BamReadBlock dup() @property const {
return BamReadBlock(start_virtual_offset, end_virtual_offset, bamRead.dup);
}
}
/// Policies for bamReadRange
mixin template withOffsets() {
/**
Returns: virtual offsets of beginning and end of the current read
plus the current read itself.
*/
BamReadBlock front() @property {
return BamReadBlock(_start_voffset,
_stream.virtualTell(),
_current_record);
}
private VirtualOffset _start_voffset;
private void beforeNextBamReadLoad() {
_start_voffset = _stream.virtualTell();
}
}
/// ditto
mixin template withoutOffsets() {
/**
Returns: current bamRead
*/
BamRead front() @property {
return _current_record;
}
private void beforeNextBamReadLoad() {}
}
class BamReadRange(alias IteratePolicy)
{
/// Create new range from IChunkInputStream.
this(ref IChunkInputStream stream) {
_stream = stream;
_endian_stream = new EndianStream(_stream, Endian.littleEndian);
readNext();
}
bool empty() @property const {
return _empty;
}
mixin IteratePolicy;
void popFront() {
readNext();
}
private:
IChunkInputStream _stream;
EndianStream _endian_stream;
BamRead _current_record;
bool _empty = false;
/**
Reads next bamRead block from stream.
*/
void readNext() {
// In fact, on BAM files containing a special EOF BGZF block
// this condition will be always false!
//
// The reason is that we don't want to unpack next block just
// in order to see if it's an EOF one or not.
if (_stream.eof()) {
_empty = true;
return;
}
// In order to get the right virtual offset, we need to do it here.
beforeNextBamReadLoad();
// Here's where _empty is really set!
int block_size = void;
ubyte* ptr = cast(ubyte*)(&block_size);
auto _read = 0;
while (_read < int.sizeof) {
auto _actually_read = _endian_stream.readBlock(ptr, int.sizeof - _read);
if (_actually_read == 0) {
version(development) {
import std.stdio;
stderr.writeln("[info][bamRead range] empty, read ", _read, " bytes, expected ", int.sizeof);
}
_empty = true;
return;
}
_read += _actually_read;
ptr += _actually_read;
}
if (std.system.endian != Endian.littleEndian) {
switchEndianness(&block_size, int.sizeof);
}
_current_record = BamRead(_stream.readSlice(block_size));
}
}
/// Returns: lazy range of BamRead structs constructed from a given stream.
auto bamReadRange(alias IteratePolicy=withoutOffsets)(ref IChunkInputStream stream) {
return new BamReadRange!IteratePolicy(stream);
}