Pjotr Prins 8 months ago
parent
commit
3f89719cb0

+ 24
- 9
Makefile View File

@@ -1,6 +1,10 @@
# Simple Makefile
#
# make sharedlibrary : make shared library
# make shared : make shared lib
# make lib : make static lib (nyi)
# make check
#
# You can also use 'dub' and 'dub test' instead

D_COMPILER=ldc2
DFLAGS = -wi -g -relocation-model=pic -Icontrib/undead -L-lz
@@ -17,32 +21,43 @@ endif
DLIBS = $(LIBRARY_PATH)/libphobos2-ldc.a $(LIBRARY_PATH)/libdruntime-ldc.a
DLIBS_DEBUG = $(LIBRARY_PATH)/libphobos2-ldc-debug.a $(LIBRARY_PATH)/libdruntime-ldc-debug.a

SRC = $(wildcard contrib/undead/*.d) contrib/undead/*/*.d $(wildcard bio/*.d bio/*/*.d bio/*/*/*.d bio/*/*/*/*.d bio/*/*/*/*/*.d bio/*/*/*/*/*/*.d) test/unittests.d
SRC = $(wildcard contrib/undead/*.d) contrib/undead/*/*.d $(wildcard bio/*.d bio/*/*.d bio/*/*/*.d bio/*/*/*/*.d bio/*/*/*/*/*.d bio/*/*/*/*/*/*.d) test/unittests.d test/read_bam_file.d

OBJ = $(SRC:.d=.o)
BIN = bin/biod_tests
sharedlibrary: BIN = libbiod.so
shared: LIB = libbiod.so
lib: LIB = libbiod

debug check: DFLAGS += -O0 -d-debug -unittest -link-debuglib
# debug check: DFLAGS += -O0 -d-debug -unittest -link-debuglib
check: DFLAGS += -O0 -d-debug -link-debuglib -unittest
release static: DFLAGS += -O3 -release -enable-inlining -boundscheck=off
static: DFLAGS += -static -L-Bstatic
sharedlibrary: DFLAGS += -shared
shared: DFLAGS += -shared
lib: DFLAGS += -lib

all: debug

default: all

default debug release static sharedlibrary: $(BIN)
default debug release static: $(BIN)
shared lib: $(LIB)

%.o: %.d
$(D_COMPILER) $(DFLAGS) -c $< -od=$(dir $@)

$(LIB): $(OBJ)
$(info linking lib...)
$(D_COMPILER) $(DFLAGS) $(OBJ) -of=$(LIB)

$(BIN): $(OBJ)
$(info linking...)
$(D_COMPILER) -main $(DFLAGS) $(OBJ) -of=$(BIN)
$(D_COMPILER) $(DFLAGS) $(OBJ) -of=$(BIN)

check: $(BIN)
$(info running tests...)
$(BIN) "--DRT-gcopt=gc:precise disable:1 cleanup:none"
$(info Make check running tests...)
$(BIN)

# $(BIN) "--DRT-gcopt=gc:precise disable:1 cleanup:none"

clean:
rm -vf $(OBJ)

+ 6
- 0
RELEASE-NOTES.md View File

@@ -1,3 +1,9 @@
## ChangeLog v0.2.4 (2019xxxx)

+ Fixed dub and make files
+ Dub test still fails because of GC
+ Debian package (thanks https://github.com/atille https://github.com/biod/BioD/issues/50)

## ChangeLog v0.2.3 (20191119)

+ Compiles and tests pass on Debian with dub and ldc 1.17.0

+ 1
- 1
VERSION View File

@@ -1 +1 @@
0.2.2
0.2.3

+ 32
- 13
bio/core/bgzf/block.d View File

@@ -24,7 +24,7 @@
module bio.core.bgzf.block;

import bio.std.hts.bam.constants;
import bio.core.utils.memoize;
// import bio.core.utils.memoize;
import bio.core.utils.zlib;

import std.array;
@@ -35,6 +35,9 @@ import std.exception;
/**
Structure representing BGZF block.
In general, users shouldn't use it, as it is EXTREMELY low-level.

Note it is a struct that has support for comparison based
on its crc32 value.
*/
struct BgzfBlock {
// field types are as in the SAM/BAM specification
@@ -89,6 +92,8 @@ struct BgzfBlock {
}
}

import std.stdio;

/**
Struct representing decompressed BgzfBlock

@@ -96,33 +101,45 @@ struct BgzfBlock {
and yet be able to decompress blocks in parallel.
*/
struct DecompressedBgzfBlock {
ulong start_offset;
ulong end_offset;
ubyte[] decompressed_data;
/* For the class version:
this(ulong start, ulong end, ubyte[] buf) {
start_offset = start;
end_offset = end;
decompressed_data = buf;
}
~this() {
stderr.writeln("destroy DecompressedBgzfBlock ",start_offset,":",end_offset," ",decompressed_data.sizeof);
};
*/

ulong start_offset;
ulong end_offset;
ubyte[] decompressed_data;
}

///
alias Cache!(BgzfBlock, DecompressedBgzfBlock) BgzfBlockCache;
// alias Cache!(BgzfBlock, DecompressedBgzfBlock) BgzfBlockCache;

/// Function for BGZF block decompression.
/// Reuses buffer allocated for storing compressed data,
/// i.e. after execution buffer of the passed $(D block)
/// is overwritten with uncompressed data.
DecompressedBgzfBlock decompressBgzfBlock(BgzfBlock block,
BgzfBlockCache cache=null)
DecompressedBgzfBlock decompressBgzfBlock(BgzfBlock block)
{
if (block.input_size == 0) {
return DecompressedBgzfBlock(block.start_offset,
block.start_offset + block.bsize + 1,
cast(ubyte[])[]); // EOF marker
// TODO: add check for correctness of EOF marker
return DecompressedBgzfBlock(block.start_offset,
block.start_offset + block.bsize + 1,
cast(ubyte[])[]); // EOF marker
// TODO: add check for correctness of EOF marker
}

/*
if (cache !is null) {
auto ptr = cache.lookup(block);
if (ptr !is null)
return *ptr;
}
*/

int err = void;

@@ -169,6 +186,7 @@ DecompressedBgzfBlock decompressBgzfBlock(BgzfBlock block,

assert(block.crc32 == crc32(0, uncompressed[]));

/*
if (cache !is null) {
BgzfBlock compressed_bgzf_block = block;
compressed_bgzf_block._buffer = block._buffer.dup;
@@ -180,6 +198,7 @@ DecompressedBgzfBlock decompressBgzfBlock(BgzfBlock block,
}
cache.put(compressed_bgzf_block, decompressed_bgzf_block);
}
*/

// Now copy back to block._buffer, overwriting existing data.
// It should have enough bytes already allocated.
@@ -192,6 +211,6 @@ DecompressedBgzfBlock decompressBgzfBlock(BgzfBlock block,
block._buffer[0 .. block.input_size] = uncompressed[];
block.dirty = true;

return DecompressedBgzfBlock(block.start_offset, block.end_offset,
block._buffer[0 .. block.input_size]);
auto decompressed = DecompressedBgzfBlock(block.start_offset, block.end_offset, block._buffer[0 .. block.input_size]);
return decompressed;
}

+ 35
- 15
bio/core/bgzf/inputstream.d View File

@@ -42,6 +42,15 @@ class BgzfException : Exception {
this(string msg) { super(msg); }
}

/*

Called by

randomaccessmanager.d: fillBgzfBufferFromStream(stream, true, &block, buf.ptr);
inputstream.d: auto result = fillBgzfBufferFromStream(_stream, _seekable, block, buffer,

*/

bool fillBgzfBufferFromStream(Stream stream, bool is_seekable,
BgzfBlock* block, ubyte* buffer,
size_t *number_of_bytes_read=null)
@@ -336,12 +345,13 @@ class StreamChunksSupplier : BgzfBlockSupplier {
}

///
// Provided an uncompressed stream block by class
class BgzfInputStream : Stream {
private {
BgzfBlockSupplier _supplier;
ubyte[] _data;

BgzfBlockCache _cache;
// BgzfBlockCache _cache;

ubyte[] _read_buffer;
VirtualOffset _current_vo;
@@ -355,17 +365,18 @@ class BgzfInputStream : Stream {
TaskPool _pool;
enum _max_block_size = BGZF_MAX_BLOCK_SIZE * 2;

alias Task!(decompressBgzfBlock, BgzfBlock, BgzfBlockCache)
DecompressionTask;
DecompressionTask[] _task_buf;
alias Task!(decompressBgzfBlock, BgzfBlock) DecompressionTask;
// DecompressionTask[] _task_buf;

// static here means that BlockAux has no access to
// its surrounding scope https://dlang.org/spec/struct.html
static struct BlockAux {
BgzfBlock block;
ushort skip_start;
ushort skip_end;
BgzfBlock block;
ushort skip_start;
ushort skip_end;

DecompressionTask* task;
alias task this;
DecompressionTask* task;
// alias task this; // https://dlang.org/spec/class.html#AliasThis
}

RoundBuf!BlockAux _tasks = void;
@@ -373,6 +384,7 @@ class BgzfInputStream : Stream {
size_t _offset;

bool fillNextBlock() {
// Sets up a decompression task and pushes it on the roundbuf _tasks
ubyte* p = _data.ptr + _offset;
BlockAux b = void;
if (_supplier.getNextBgzfBlock(&b.block, p,
@@ -388,15 +400,21 @@ class BgzfInputStream : Stream {
stderr.writeln("[creating task] ", b.block.start_offset, " / ", b.skip_start, " / ", b.skip_end);
}

/*
DecompressionTask tmp = void;
tmp = scopedTask!decompressBgzfBlock(b.block, _cache);
tmp = scopedTask!decompressBgzfBlock(b.block);
auto t = _task_buf.ptr + _offset / _max_block_size;
import core.stdc.string : memcpy;
memcpy(t, &tmp, DecompressionTask.sizeof);
b.task = t;
_tasks.put(b);
_pool.put(b.task);

*/
// tmp = scopedTask!decompressBgzfBlock(b.block);
auto task = task!decompressBgzfBlock(b.block);
b.task = task;
_tasks.put(b); // _tasks is roundbuf
_pool.put(b.task); // _pool is thread pool
_offset += _max_block_size;
if (_offset == _data.length)
_offset = 0;
@@ -436,20 +454,22 @@ class BgzfInputStream : Stream {

this(BgzfBlockSupplier supplier,
TaskPool pool=taskPool,
BgzfBlockCache cache=null,
// BgzfBlockCache cache=null,
size_t buffer_size=0)
{
_supplier = supplier;
_compressed_size = _supplier.totalCompressedSize();
_pool = pool;
_cache = cache;
// _cache = cache;

// The roundbuf size (n_tasks) should be at least
// the number of threads
size_t n_tasks = max(pool.size, 1) * 2;
if (buffer_size > 0)
n_tasks = max(n_tasks, buffer_size / BGZF_MAX_BLOCK_SIZE);
// n_tasks is 13 on my machine
_tasks = RoundBuf!BlockAux(n_tasks);
_task_buf = uninitializedArray!(DecompressionTask[])(n_tasks);
// _task_buf = uninitializedArray!(DecompressionTask[])(n_tasks);

_data = uninitializedArray!(ubyte[])(n_tasks * _max_block_size);


+ 8
- 8
bio/core/bgzf/outputstream.d View File

@@ -8,10 +8,10 @@
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -67,9 +67,9 @@ class BgzfOutputStream : Stream {

/// Create new BGZF output stream which will use
/// provided $(D task_pool) to do multithreaded compression.
this(Stream output_stream,
int compression_level=-1,
TaskPool task_pool=taskPool,
this(Stream output_stream,
int compression_level=-1,
TaskPool task_pool=taskPool,
size_t buffer_size=0,
size_t max_block_size=BGZF_MAX_BLOCK_SIZE,
size_t block_size=BGZF_BLOCK_SIZE)
@@ -132,7 +132,7 @@ class BgzfOutputStream : Stream {
}

/// Force flushing current block, even if it is not yet filled.
/// Should be used when it's not desired to have records crossing block borders.
/// Should be used when it's not desired to have records crossing block borders.
void flushCurrentBlock() {

if (_current_size == 0)
@@ -197,7 +197,7 @@ class BgzfOutputStream : Stream {
writeResult(block);
_compression_tasks.popFront();
}
_stream.flush();
_current_size = 0;
}
@@ -218,7 +218,7 @@ class BgzfOutputStream : Stream {

/// Adds EOF block. This function is called in close() method.
void addEofBlock() {
_stream.writeExact(BGZF_EOF.ptr, BGZF_EOF.length);
_stream.writeExact(BGZF_EOF.ptr, BGZF_EOF.length);
}
}


+ 34
- 18
bio/core/utils/roundbuf.d View File

@@ -8,10 +8,10 @@
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -25,13 +25,18 @@ module bio.core.utils.roundbuf;

import std.exception;

/// Cyclic buffer
/// Cyclic buffer, see https://en.wikipedia.org/wiki/Circular_buffer
/// Bails out if you try to push beyond its size
///
/// Note the RoundBuf has copy semantics. So if you pass in a
/// Struct it will copy its contents.

struct RoundBuf(T) {
private {
T[] _items = void;
size_t _put;
size_t _taken;
T[] _items = void; // max_length is the size of round buf
size_t _put; // moves the counter forward
size_t _start; // current start of the buffer
}

/** initializes round buffer of size $(D n) */
@@ -41,47 +46,52 @@ struct RoundBuf(T) {

/// Input range primitives
bool empty() @property const {
return _put == _taken;
return _put == _start;
}

/// ditto
/// Get the item at the front (non-destructive)
auto ref front() @property {
enforce(!empty, "roundbuffer is empty");
return _items[_taken % $];
return _items[_start % $];
}

/// ditto
/// Move the front forward (destructive)
void popFront() {
enforce(!empty, "roundbuffer is empty");
++_taken;
++_start;
}

///
/// Returns the tail item (non-destructive)
auto ref back() @property {
enforce(!empty, "roundbuffer is empty");
return _items[(_put - 1) % $];
}

/// Output range primitive
/// Add and item at the tail and move pointer forward (destructive)
void put(T item) {
enforce(!full, "roundbuffer is full");
enforce(_put < _put.max, "ringbuffer overflow");
enforce(_put < _put.max, "ringbuffer size_t overflow");
_items[_put % $] = item;
++_put;
}

/// Check if buffer is full
bool full() @property const {
return _put == _taken + _items.length;
return _put == _start + max_length;
}
/// Current number of elements
size_t length() @property const {
return _put - _taken;
return _put - _start;
}

size_t max_length() @property const {
return _items.length;
}
}

unittest {
import std.stdio;
auto buf = RoundBuf!int(4);
assert(buf.empty);

@@ -101,4 +111,10 @@ unittest {
assert(buf.front == 3);
buf.popFront();
assert(buf.front == 4);
buf.put(4);
buf.put(5);
buf.put(6);
assert(buf.length == buf.max_length);
// should bomb out
assertThrown!Exception(buf.put(7));
}

+ 22
- 20
bio/std/hts/bam/randomaccessmanager.d View File

@@ -8,10 +8,10 @@
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -67,10 +67,10 @@ private {
/// Class which random access tasks are delegated to.
class RandomAccessManager {

void setCache(BgzfBlockCache cache) {
_cache = cache;
}
// void setCache(BgzfBlockCache cache) {
// _cache = cache;
//}
void setTaskPool(TaskPool task_pool) {
_task_pool = task_pool;
}
@@ -146,7 +146,8 @@ class RandomAccessManager {
auto _compressed_stream = new EndianStream(_stream, Endian.littleEndian);
_compressed_stream.seekSet(cast(size_t)(offset.coffset));
auto supplier = new StreamSupplier(_compressed_stream, offset.uoffset);
auto bgzf_stream = new BgzfInputStream(supplier, _task_pool, _cache);
// auto bgzf_stream = new BgzfInputStream(supplier, _task_pool, _cache);
auto bgzf_stream = new BgzfInputStream(supplier, _task_pool);
return bgzf_stream;
}

@@ -154,7 +155,7 @@ class RandomAccessManager {
/// Every time new stream is used.
BamRead getReadAt(VirtualOffset offset) {
auto stream = createStreamStartingFrom(offset);
bool old_mode = _reader._seqprocmode;
_reader._seqprocmode = true;
auto read = bamReadRange(stream, _reader).front.dup;
@@ -303,7 +304,7 @@ class RandomAccessManager {
auto reads = readsFromChunks!IteratePolicy(chunks);
return filterBamReads(reads, regions);
}
/// Fetch alignments with given reference sequence id, overlapping [beg..end)
auto getReads(alias IteratePolicy=withOffsets)(BamRegion region)
{
@@ -325,7 +326,7 @@ class RandomAccessManager {
last_ref_id = region.ref_id;
}
}
static ref auto regB(ref BamRegion region) { return region.start; }
static ref auto regE(ref BamRegion region) { return region.end; }
foreach (ref group; regions_by_ref)
@@ -341,24 +342,25 @@ private:
auto fstream = new bio.core.utils.stream.File(_filename);
auto compressed_stream = new EndianStream(fstream, Endian.littleEndian);
auto supplier = new StreamChunksSupplier(compressed_stream, chunks);
auto stream = new BgzfInputStream(supplier, _task_pool, _cache);
// auto stream = new BgzfInputStream(supplier, _task_pool, _cache);
auto stream = new BgzfInputStream(supplier, _task_pool);
return bamReadRange!IteratePolicy(stream, _reader);
}
string _filename;
BaiFile _bai;
BamReader _reader;
TaskPool _task_pool;
size_t _buffer_size;

BgzfBlockCache _cache;
// BgzfBlockCache _cache;

TaskPool task_pool() @property {
if (_task_pool is null)
_task_pool = taskPool;
return _task_pool;
}
public:

static struct BamReadFilter(R) {
@@ -378,13 +380,13 @@ public:
ElementType!R front() @property {
return _current_read;
}
void popFront() {
_range.popFront();
findNext();
}

private:
private:
R _range;
uint _ref_id;
BamRegion _region;
@@ -417,9 +419,9 @@ public:

if (_current_read.position >= _region.end) {
// As reads are sorted by leftmost coordinate,
// all remaining alignments in _range
// all remaining alignments in _range
// will not overlap the current interval as well.
//
//
// [-----)
// . [-----------)
// . [---)
@@ -443,7 +445,7 @@ public:
}

if (_current_read.position +
_current_read.basesCovered() <= _region.start)
_current_read.basesCovered() <= _region.start)
{
/// ends before beginning of the region
/// [-----------)
@@ -455,7 +457,7 @@ public:
return; /// _current_read overlaps the region
}
}
_empty = true;
_empty = true;
}
}


+ 34
- 32
bio/std/hts/bam/reader.d View File

@@ -8,10 +8,10 @@
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -22,7 +22,7 @@

*/
/// Writing a script/tool for processing BAM data often starts this way:
///
///
/// ------------------------
/// import bio.std.hts.bam.reader;
///
@@ -35,7 +35,7 @@
/// }
/// }
/// ------------------------
///
///
/// Or, if a specific interval on the reference sequence is to be explored:
/// ------------------------
/// import bio.std.hts.bam.pileup;
@@ -72,6 +72,7 @@ import std.parallelism;
import std.array;
import core.stdc.stdio;
import std.string;
import std.stdio;

/**
BAM file reader, featuring parallel decompression of BGZF blocks.
@@ -82,7 +83,7 @@ class BamReader : IBamSamReader {
Creates reader associated with file or stream.
(If stream constructor is used, no random access is possible.)
$(BR)
Optionally, task pool can be specified.
Optionally, task pool can be specified.
It will be used to unpack BGZF blocks in parallel.

Example:
@@ -108,7 +109,7 @@ class BamReader : IBamSamReader {
initializeStreams();

auto magic = _bam.readString(4);
enforce(magic == "BAM\1", "Invalid file format: expected BAM\\1");

readSamHeader();
@@ -130,11 +131,12 @@ class BamReader : IBamSamReader {
this(_source_stream, task_pool);
}


/// ditto
this(string filename) {
this(filename, std.parallelism.taskPool);
this(filename, std.parallelism.taskPool);
}
/**
True if BAI file was found for this BAM file.
This is necessary for any random-access operations.
@@ -142,7 +144,7 @@ class BamReader : IBamSamReader {
Looks for files in the same directory which filename
is either the file name of BAM file with '.bai' appended,
or with the last extension replaced with '.bai'
(that is, for $(I file.bam) paths $(I file.bai) and
(that is, for $(I file.bam) paths $(I file.bai) and
$(I file.bam.bai) will be checked)
*/
bool has_index() @property {
@@ -206,7 +208,7 @@ class BamReader : IBamSamReader {
/**
Range of all alignment records in the file.
$(BR)
Element type of the returned range depends on the policy.
Element type of the returned range depends on the policy.
Default one is $(DPREF2 bam, readrange, withoutOffsets),
in this case range element type is $(DPREF2 bam, read, BamRead).
$(BR)
@@ -242,7 +244,7 @@ class BamReader : IBamSamReader {
static if (__traits(identifier, IteratePolicy) == "withOffsets") {
auto front() @property {
return _range.front;
}
}
} else static if (__traits(identifier, IteratePolicy) == "withoutOffsets") {
auto front() @property {
return _range.front.read;
@@ -262,8 +264,8 @@ class BamReader : IBamSamReader {
_bytes_read += _range.front.read.size_in_bytes;
_range.popFront();
if (_progress_bar_func !is null) {
_progress_bar_func(min(1.0,
cast(float)_bytes_read / (_stream.total_compressed_size *
_progress_bar_func(min(1.0,
cast(float)_bytes_read / (_stream.total_compressed_size *
_stream.average_compression_ratio)));
}
}
@@ -278,13 +280,13 @@ class BamReader : IBamSamReader {

/**
Returns: range of all reads in the file, calling $(I progressBarFunc)
for each read.
for each read.
$(BR)
$(I progressBarFunc) will be called
each time next alignment is read, with the argument being a number from [0.0, 1.0],
which is estimated progress percentage.
$(BR)
Notice that $(I progressBarFunc) takes $(D lazy) argument,
Notice that $(I progressBarFunc) takes $(D lazy) argument,
so that the number of relatively expensive float division operations
can be controlled by user.

@@ -306,14 +308,14 @@ class BamReader : IBamSamReader {
*/
auto readsWithProgress(alias IteratePolicy=bio.std.hts.bam.readrange.withoutOffsets)
(void delegate(lazy float p) progressBarFunc,
void delegate() finishFunc=null)
void delegate() finishFunc=null)
{
auto _decompressed_stream = getDecompressedBamReadStream();
auto reads_with_offsets = bamReadRange!withOffsets(_decompressed_stream, this);
alias ReadsWithProgressResult!(IteratePolicy,
alias ReadsWithProgressResult!(IteratePolicy,
typeof(reads_with_offsets), BgzfInputStream) Result;
return Result(reads_with_offsets, _decompressed_stream,
progressBarFunc, finishFunc);
}
@@ -344,12 +346,12 @@ class BamReader : IBamSamReader {
and be strictly less than the second one.
$(BR)
For decompression, the task pool specified at the construction is used.
*/
auto getReadsBetween(bio.core.bgzf.virtualoffset.VirtualOffset from,
*/
auto getReadsBetween(bio.core.bgzf.virtualoffset.VirtualOffset from,
bio.core.bgzf.virtualoffset.VirtualOffset to) {
enforce(from <= to, "First offset must be less than second");
enforce(_stream_is_seekable, "Stream is not seekable");
return _random_access_manager.getReadsBetween(from, to);
}

@@ -403,7 +405,7 @@ class BamReader : IBamSamReader {
*/
bio.std.hts.bam.reference.ReferenceSequence reference(int ref_id) {
enforce(ref_id < _reference_sequences.length, "Invalid reference index");
return ReferenceSequence(_random_access_manager,
return ReferenceSequence(_random_access_manager,
ref_id,
_reference_sequences[ref_id]);
}
@@ -435,7 +437,7 @@ class BamReader : IBamSamReader {
/**
Set buffer size for I/O operations. Values less than 4096 are disallowed.
$(BR)
This can help in multithreaded applications when several files are read
This can help in multithreaded applications when several files are read
simultaneously (e.g. merging).
*/
void setBufferSize(size_t buffer_size) {
@@ -447,7 +449,7 @@ class BamReader : IBamSamReader {
package bool _seqprocmode; // available for bio.std.hts.bam.readrange;

private:
string _filename; // filename (if available)
Stream _source_stream; // compressed
BgzfInputStream _decompressed_stream; // decompressed
@@ -482,7 +484,7 @@ private:
@property ref BaiFile _bai_file() { // initialized lazily
initBai();
return _dont_access_me_directly_use_bai_file_for_that;
};
};

RandomAccessManager _rndaccssmgr; // unreadable for a purpose
@property RandomAccessManager _random_access_manager() {
@@ -529,7 +531,7 @@ private:
} else {
_source_stream.seekSet(0);
return _source_stream;
}
}
} else {
return null;
}
@@ -544,7 +546,7 @@ private:
compressed_stream);

return new BgzfInputStream(block_supplier, _task_pool,
null, _buffer_size);
_buffer_size);
}

// get decompressed stream starting from the first alignment record
@@ -556,8 +558,8 @@ private:

compressed_stream.seekCur(_reads_start_voffset.coffset);
auto block_supplier = new StreamSupplier(compressed_stream);
auto stream = new BgzfInputStream(block_supplier, _task_pool,
null, _buffer_size);
auto stream = new BgzfInputStream(block_supplier, _task_pool,
_buffer_size);
stream.readString(_reads_start_voffset.uoffset);
return stream;
} else {
@@ -568,9 +570,9 @@ private:

// sets up the streams and ranges
void initializeStreams() {
_decompressed_stream = getDecompressedStream();
_bam = new EndianStream(_decompressed_stream, Endian.littleEndian);
_bam = new EndianStream(_decompressed_stream, Endian.littleEndian);
}

// initializes _header

+ 52
- 0
test/read_bam_file.d View File

@@ -0,0 +1,52 @@

import bio.std.hts.bam.cigar;
import bio.std.hts.bam.reader;
import bio.std.hts.bam.writer;
import bio.std.hts.sam.reader;
import bio.std.hts.sam.header;
import bio.std.hts.bam.md.core;
import bio.std.hts.bam.md.reconstruct;
import bio.std.hts.bam.pileup;
import bio.std.hts.bam.baseinfo;
import bio.std.hts.bam.validation.samheader;
import bio.std.hts.bam.validation.alignment;
import bio.std.hts.utils.samheadermerger;
import bio.std.hts.sam.utils.recordparser;
import bio.core.bgzf.block;
import bio.core.bgzf.inputstream;
import bio.core.bgzf.outputstream;
import bio.core.utils.roundbuf;
import bio.core.utils.range;
import bio.core.utils.tmpfile;
import bio.core.utils.stream;
import bio.core.sequence;
import bio.core.base;
import bio.core.tinymap;
import bio.core.utils.roundbuf;

import std.path;
import std.range;
import std.stdio;
// import undead.stream;
import std.algorithm;
import std.array;
import std.conv;
import std.exception;
import std.math;
import std.typetuple;
import std.regex;
import core.memory;

void main() {
BamReader bam;
auto fn = buildPath(dirName(__FILE__), "data", "ex1_header.bam");
foreach (i; 0 .. 60)
{
bam = new BamReader(fn);
assert(bam.header.format_version == "1.3");
}
// Time to kick in GC
// stderr.writeln("**** Calling GC final");
// GC.collect();
// stderr.writeln("**** Past calling GC final");
}

+ 0
- 2
test/unittests.d View File

@@ -487,5 +487,3 @@ unittest {
writer.finish();
}
}

void main() {}

Loading…
Cancel
Save