@@ -1,6 +1,10 @@ | |||
# Simple Makefile | |||
# | |||
# make sharedlibrary : make shared library | |||
# make shared : make shared lib | |||
# make lib : make static lib (nyi) | |||
# make check | |||
# | |||
# You can also use 'dub' and 'dub test' instead | |||
D_COMPILER=ldc2 | |||
DFLAGS = -wi -g -relocation-model=pic -Icontrib/undead -L-lz | |||
@@ -17,32 +21,43 @@ endif | |||
DLIBS = $(LIBRARY_PATH)/libphobos2-ldc.a $(LIBRARY_PATH)/libdruntime-ldc.a | |||
DLIBS_DEBUG = $(LIBRARY_PATH)/libphobos2-ldc-debug.a $(LIBRARY_PATH)/libdruntime-ldc-debug.a | |||
SRC = $(wildcard contrib/undead/*.d) contrib/undead/*/*.d $(wildcard bio/*.d bio/*/*.d bio/*/*/*.d bio/*/*/*/*.d bio/*/*/*/*/*.d bio/*/*/*/*/*/*.d) test/unittests.d | |||
SRC = $(wildcard contrib/undead/*.d) contrib/undead/*/*.d $(wildcard bio/*.d bio/*/*.d bio/*/*/*.d bio/*/*/*/*.d bio/*/*/*/*/*.d bio/*/*/*/*/*/*.d) test/unittests.d test/read_bam_file.d | |||
OBJ = $(SRC:.d=.o) | |||
BIN = bin/biod_tests | |||
sharedlibrary: BIN = libbiod.so | |||
shared: LIB = libbiod.so | |||
lib: LIB = libbiod | |||
debug check: DFLAGS += -O0 -d-debug -unittest -link-debuglib | |||
# debug check: DFLAGS += -O0 -d-debug -unittest -link-debuglib | |||
check: DFLAGS += -O0 -d-debug -link-debuglib -unittest | |||
release static: DFLAGS += -O3 -release -enable-inlining -boundscheck=off | |||
static: DFLAGS += -static -L-Bstatic | |||
sharedlibrary: DFLAGS += -shared | |||
shared: DFLAGS += -shared | |||
lib: DFLAGS += -lib | |||
all: debug | |||
default: all | |||
default debug release static sharedlibrary: $(BIN) | |||
default debug release static: $(BIN) | |||
shared lib: $(LIB) | |||
%.o: %.d | |||
$(D_COMPILER) $(DFLAGS) -c $< -od=$(dir $@) | |||
$(LIB): $(OBJ) | |||
$(info linking lib...) | |||
$(D_COMPILER) $(DFLAGS) $(OBJ) -of=$(LIB) | |||
$(BIN): $(OBJ) | |||
$(info linking...) | |||
$(D_COMPILER) -main $(DFLAGS) $(OBJ) -of=$(BIN) | |||
$(D_COMPILER) $(DFLAGS) $(OBJ) -of=$(BIN) | |||
check: $(BIN) | |||
$(info running tests...) | |||
$(BIN) "--DRT-gcopt=gc:precise disable:1 cleanup:none" | |||
$(info Make check running tests...) | |||
$(BIN) | |||
# $(BIN) "--DRT-gcopt=gc:precise disable:1 cleanup:none" | |||
clean: | |||
rm -vf $(OBJ) | |||
@@ -1,3 +1,9 @@ | |||
## ChangeLog v0.2.4 (2019xxxx) | |||
+ Fixed dub and make files | |||
+ Dub test still fails because of GC | |||
+ Debian package (thanks https://github.com/atille https://github.com/biod/BioD/issues/50) | |||
## ChangeLog v0.2.3 (20191119) | |||
+ Compiles and tests pass on Debian with dub and ldc 1.17.0 | |||
@@ -1 +1 @@ | |||
0.2.2 | |||
0.2.3 |
@@ -24,7 +24,7 @@ | |||
module bio.core.bgzf.block; | |||
import bio.std.hts.bam.constants; | |||
import bio.core.utils.memoize; | |||
// import bio.core.utils.memoize; | |||
import bio.core.utils.zlib; | |||
import std.array; | |||
@@ -35,6 +35,9 @@ import std.exception; | |||
/** | |||
Structure representing BGZF block. | |||
In general, users shouldn't use it, as it is EXTREMELY low-level. | |||
Note it is a struct that has support for comparison based | |||
on its crc32 value. | |||
*/ | |||
struct BgzfBlock { | |||
// field types are as in the SAM/BAM specification | |||
@@ -89,6 +92,8 @@ struct BgzfBlock { | |||
} | |||
} | |||
import std.stdio; | |||
/** | |||
Struct representing decompressed BgzfBlock | |||
@@ -96,33 +101,45 @@ struct BgzfBlock { | |||
and yet be able to decompress blocks in parallel. | |||
*/ | |||
struct DecompressedBgzfBlock { | |||
ulong start_offset; | |||
ulong end_offset; | |||
ubyte[] decompressed_data; | |||
/* For the class version: | |||
this(ulong start, ulong end, ubyte[] buf) { | |||
start_offset = start; | |||
end_offset = end; | |||
decompressed_data = buf; | |||
} | |||
~this() { | |||
stderr.writeln("destroy DecompressedBgzfBlock ",start_offset,":",end_offset," ",decompressed_data.sizeof); | |||
}; | |||
*/ | |||
ulong start_offset; | |||
ulong end_offset; | |||
ubyte[] decompressed_data; | |||
} | |||
/// | |||
alias Cache!(BgzfBlock, DecompressedBgzfBlock) BgzfBlockCache; | |||
// alias Cache!(BgzfBlock, DecompressedBgzfBlock) BgzfBlockCache; | |||
/// Function for BGZF block decompression. | |||
/// Reuses buffer allocated for storing compressed data, | |||
/// i.e. after execution buffer of the passed $(D block) | |||
/// is overwritten with uncompressed data. | |||
DecompressedBgzfBlock decompressBgzfBlock(BgzfBlock block, | |||
BgzfBlockCache cache=null) | |||
DecompressedBgzfBlock decompressBgzfBlock(BgzfBlock block) | |||
{ | |||
if (block.input_size == 0) { | |||
return DecompressedBgzfBlock(block.start_offset, | |||
block.start_offset + block.bsize + 1, | |||
cast(ubyte[])[]); // EOF marker | |||
// TODO: add check for correctness of EOF marker | |||
return DecompressedBgzfBlock(block.start_offset, | |||
block.start_offset + block.bsize + 1, | |||
cast(ubyte[])[]); // EOF marker | |||
// TODO: add check for correctness of EOF marker | |||
} | |||
/* | |||
if (cache !is null) { | |||
auto ptr = cache.lookup(block); | |||
if (ptr !is null) | |||
return *ptr; | |||
} | |||
*/ | |||
int err = void; | |||
@@ -169,6 +186,7 @@ DecompressedBgzfBlock decompressBgzfBlock(BgzfBlock block, | |||
assert(block.crc32 == crc32(0, uncompressed[])); | |||
/* | |||
if (cache !is null) { | |||
BgzfBlock compressed_bgzf_block = block; | |||
compressed_bgzf_block._buffer = block._buffer.dup; | |||
@@ -180,6 +198,7 @@ DecompressedBgzfBlock decompressBgzfBlock(BgzfBlock block, | |||
} | |||
cache.put(compressed_bgzf_block, decompressed_bgzf_block); | |||
} | |||
*/ | |||
// Now copy back to block._buffer, overwriting existing data. | |||
// It should have enough bytes already allocated. | |||
@@ -192,6 +211,6 @@ DecompressedBgzfBlock decompressBgzfBlock(BgzfBlock block, | |||
block._buffer[0 .. block.input_size] = uncompressed[]; | |||
block.dirty = true; | |||
return DecompressedBgzfBlock(block.start_offset, block.end_offset, | |||
block._buffer[0 .. block.input_size]); | |||
auto decompressed = DecompressedBgzfBlock(block.start_offset, block.end_offset, block._buffer[0 .. block.input_size]); | |||
return decompressed; | |||
} |
@@ -42,6 +42,15 @@ class BgzfException : Exception { | |||
this(string msg) { super(msg); } | |||
} | |||
/* | |||
Called by | |||
randomaccessmanager.d: fillBgzfBufferFromStream(stream, true, &block, buf.ptr); | |||
inputstream.d: auto result = fillBgzfBufferFromStream(_stream, _seekable, block, buffer, | |||
*/ | |||
bool fillBgzfBufferFromStream(Stream stream, bool is_seekable, | |||
BgzfBlock* block, ubyte* buffer, | |||
size_t *number_of_bytes_read=null) | |||
@@ -336,12 +345,13 @@ class StreamChunksSupplier : BgzfBlockSupplier { | |||
} | |||
/// | |||
// Provided an uncompressed stream block by class | |||
class BgzfInputStream : Stream { | |||
private { | |||
BgzfBlockSupplier _supplier; | |||
ubyte[] _data; | |||
BgzfBlockCache _cache; | |||
// BgzfBlockCache _cache; | |||
ubyte[] _read_buffer; | |||
VirtualOffset _current_vo; | |||
@@ -355,17 +365,18 @@ class BgzfInputStream : Stream { | |||
TaskPool _pool; | |||
enum _max_block_size = BGZF_MAX_BLOCK_SIZE * 2; | |||
alias Task!(decompressBgzfBlock, BgzfBlock, BgzfBlockCache) | |||
DecompressionTask; | |||
DecompressionTask[] _task_buf; | |||
alias Task!(decompressBgzfBlock, BgzfBlock) DecompressionTask; | |||
// DecompressionTask[] _task_buf; | |||
// static here means that BlockAux has no access to | |||
// its surrounding scope https://dlang.org/spec/struct.html | |||
static struct BlockAux { | |||
BgzfBlock block; | |||
ushort skip_start; | |||
ushort skip_end; | |||
BgzfBlock block; | |||
ushort skip_start; | |||
ushort skip_end; | |||
DecompressionTask* task; | |||
alias task this; | |||
DecompressionTask* task; | |||
// alias task this; // https://dlang.org/spec/class.html#AliasThis | |||
} | |||
RoundBuf!BlockAux _tasks = void; | |||
@@ -373,6 +384,7 @@ class BgzfInputStream : Stream { | |||
size_t _offset; | |||
bool fillNextBlock() { | |||
// Sets up a decompression task and pushes it on the roundbuf _tasks | |||
ubyte* p = _data.ptr + _offset; | |||
BlockAux b = void; | |||
if (_supplier.getNextBgzfBlock(&b.block, p, | |||
@@ -388,15 +400,21 @@ class BgzfInputStream : Stream { | |||
stderr.writeln("[creating task] ", b.block.start_offset, " / ", b.skip_start, " / ", b.skip_end); | |||
} | |||
/* | |||
DecompressionTask tmp = void; | |||
tmp = scopedTask!decompressBgzfBlock(b.block, _cache); | |||
tmp = scopedTask!decompressBgzfBlock(b.block); | |||
auto t = _task_buf.ptr + _offset / _max_block_size; | |||
import core.stdc.string : memcpy; | |||
memcpy(t, &tmp, DecompressionTask.sizeof); | |||
b.task = t; | |||
_tasks.put(b); | |||
_pool.put(b.task); | |||
*/ | |||
// tmp = scopedTask!decompressBgzfBlock(b.block); | |||
auto task = task!decompressBgzfBlock(b.block); | |||
b.task = task; | |||
_tasks.put(b); // _tasks is roundbuf | |||
_pool.put(b.task); // _pool is thread pool | |||
_offset += _max_block_size; | |||
if (_offset == _data.length) | |||
_offset = 0; | |||
@@ -436,20 +454,22 @@ class BgzfInputStream : Stream { | |||
this(BgzfBlockSupplier supplier, | |||
TaskPool pool=taskPool, | |||
BgzfBlockCache cache=null, | |||
// BgzfBlockCache cache=null, | |||
size_t buffer_size=0) | |||
{ | |||
_supplier = supplier; | |||
_compressed_size = _supplier.totalCompressedSize(); | |||
_pool = pool; | |||
_cache = cache; | |||
// _cache = cache; | |||
// The roundbuf size (n_tasks) should be at least | |||
// the number of threads | |||
size_t n_tasks = max(pool.size, 1) * 2; | |||
if (buffer_size > 0) | |||
n_tasks = max(n_tasks, buffer_size / BGZF_MAX_BLOCK_SIZE); | |||
// n_tasks is 13 on my machine | |||
_tasks = RoundBuf!BlockAux(n_tasks); | |||
_task_buf = uninitializedArray!(DecompressionTask[])(n_tasks); | |||
// _task_buf = uninitializedArray!(DecompressionTask[])(n_tasks); | |||
_data = uninitializedArray!(ubyte[])(n_tasks * _max_block_size); | |||
@@ -8,10 +8,10 @@ | |||
the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
and/or sell copies of the Software, and to permit persons to whom the | |||
Software is furnished to do so, subject to the following conditions: | |||
The above copyright notice and this permission notice shall be included in | |||
all copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
@@ -67,9 +67,9 @@ class BgzfOutputStream : Stream { | |||
/// Create new BGZF output stream which will use | |||
/// provided $(D task_pool) to do multithreaded compression. | |||
this(Stream output_stream, | |||
int compression_level=-1, | |||
TaskPool task_pool=taskPool, | |||
this(Stream output_stream, | |||
int compression_level=-1, | |||
TaskPool task_pool=taskPool, | |||
size_t buffer_size=0, | |||
size_t max_block_size=BGZF_MAX_BLOCK_SIZE, | |||
size_t block_size=BGZF_BLOCK_SIZE) | |||
@@ -132,7 +132,7 @@ class BgzfOutputStream : Stream { | |||
} | |||
/// Force flushing current block, even if it is not yet filled. | |||
/// Should be used when it's not desired to have records crossing block borders. | |||
/// Should be used when it's not desired to have records crossing block borders. | |||
void flushCurrentBlock() { | |||
if (_current_size == 0) | |||
@@ -197,7 +197,7 @@ class BgzfOutputStream : Stream { | |||
writeResult(block); | |||
_compression_tasks.popFront(); | |||
} | |||
_stream.flush(); | |||
_current_size = 0; | |||
} | |||
@@ -218,7 +218,7 @@ class BgzfOutputStream : Stream { | |||
/// Adds EOF block. This function is called in close() method. | |||
void addEofBlock() { | |||
_stream.writeExact(BGZF_EOF.ptr, BGZF_EOF.length); | |||
_stream.writeExact(BGZF_EOF.ptr, BGZF_EOF.length); | |||
} | |||
} | |||
@@ -8,10 +8,10 @@ | |||
the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
and/or sell copies of the Software, and to permit persons to whom the | |||
Software is furnished to do so, subject to the following conditions: | |||
The above copyright notice and this permission notice shall be included in | |||
all copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
@@ -25,13 +25,18 @@ module bio.core.utils.roundbuf; | |||
import std.exception; | |||
/// Cyclic buffer | |||
/// Cyclic buffer, see https://en.wikipedia.org/wiki/Circular_buffer | |||
/// Bails out if you try to push beyond its size | |||
/// | |||
/// Note the RoundBuf has copy semantics. So if you pass in a | |||
/// Struct it will copy its contents. | |||
struct RoundBuf(T) { | |||
private { | |||
T[] _items = void; | |||
size_t _put; | |||
size_t _taken; | |||
T[] _items = void; // max_length is the size of round buf | |||
size_t _put; // moves the counter forward | |||
size_t _start; // current start of the buffer | |||
} | |||
/** initializes round buffer of size $(D n) */ | |||
@@ -41,47 +46,52 @@ struct RoundBuf(T) { | |||
/// Input range primitives | |||
bool empty() @property const { | |||
return _put == _taken; | |||
return _put == _start; | |||
} | |||
/// ditto | |||
/// Get the item at the front (non-destructive) | |||
auto ref front() @property { | |||
enforce(!empty, "roundbuffer is empty"); | |||
return _items[_taken % $]; | |||
return _items[_start % $]; | |||
} | |||
/// ditto | |||
/// Move the front forward (destructive) | |||
void popFront() { | |||
enforce(!empty, "roundbuffer is empty"); | |||
++_taken; | |||
++_start; | |||
} | |||
/// | |||
/// Returns the tail item (non-destructive) | |||
auto ref back() @property { | |||
enforce(!empty, "roundbuffer is empty"); | |||
return _items[(_put - 1) % $]; | |||
} | |||
/// Output range primitive | |||
/// Add and item at the tail and move pointer forward (destructive) | |||
void put(T item) { | |||
enforce(!full, "roundbuffer is full"); | |||
enforce(_put < _put.max, "ringbuffer overflow"); | |||
enforce(_put < _put.max, "ringbuffer size_t overflow"); | |||
_items[_put % $] = item; | |||
++_put; | |||
} | |||
/// Check if buffer is full | |||
bool full() @property const { | |||
return _put == _taken + _items.length; | |||
return _put == _start + max_length; | |||
} | |||
/// Current number of elements | |||
size_t length() @property const { | |||
return _put - _taken; | |||
return _put - _start; | |||
} | |||
size_t max_length() @property const { | |||
return _items.length; | |||
} | |||
} | |||
unittest { | |||
import std.stdio; | |||
auto buf = RoundBuf!int(4); | |||
assert(buf.empty); | |||
@@ -101,4 +111,10 @@ unittest { | |||
assert(buf.front == 3); | |||
buf.popFront(); | |||
assert(buf.front == 4); | |||
buf.put(4); | |||
buf.put(5); | |||
buf.put(6); | |||
assert(buf.length == buf.max_length); | |||
// should bomb out | |||
assertThrown!Exception(buf.put(7)); | |||
} |
@@ -8,10 +8,10 @@ | |||
the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
and/or sell copies of the Software, and to permit persons to whom the | |||
Software is furnished to do so, subject to the following conditions: | |||
The above copyright notice and this permission notice shall be included in | |||
all copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
@@ -67,10 +67,10 @@ private { | |||
/// Class which random access tasks are delegated to. | |||
class RandomAccessManager { | |||
void setCache(BgzfBlockCache cache) { | |||
_cache = cache; | |||
} | |||
// void setCache(BgzfBlockCache cache) { | |||
// _cache = cache; | |||
//} | |||
void setTaskPool(TaskPool task_pool) { | |||
_task_pool = task_pool; | |||
} | |||
@@ -146,7 +146,8 @@ class RandomAccessManager { | |||
auto _compressed_stream = new EndianStream(_stream, Endian.littleEndian); | |||
_compressed_stream.seekSet(cast(size_t)(offset.coffset)); | |||
auto supplier = new StreamSupplier(_compressed_stream, offset.uoffset); | |||
auto bgzf_stream = new BgzfInputStream(supplier, _task_pool, _cache); | |||
// auto bgzf_stream = new BgzfInputStream(supplier, _task_pool, _cache); | |||
auto bgzf_stream = new BgzfInputStream(supplier, _task_pool); | |||
return bgzf_stream; | |||
} | |||
@@ -154,7 +155,7 @@ class RandomAccessManager { | |||
/// Every time new stream is used. | |||
BamRead getReadAt(VirtualOffset offset) { | |||
auto stream = createStreamStartingFrom(offset); | |||
bool old_mode = _reader._seqprocmode; | |||
_reader._seqprocmode = true; | |||
auto read = bamReadRange(stream, _reader).front.dup; | |||
@@ -303,7 +304,7 @@ class RandomAccessManager { | |||
auto reads = readsFromChunks!IteratePolicy(chunks); | |||
return filterBamReads(reads, regions); | |||
} | |||
/// Fetch alignments with given reference sequence id, overlapping [beg..end) | |||
auto getReads(alias IteratePolicy=withOffsets)(BamRegion region) | |||
{ | |||
@@ -325,7 +326,7 @@ class RandomAccessManager { | |||
last_ref_id = region.ref_id; | |||
} | |||
} | |||
static ref auto regB(ref BamRegion region) { return region.start; } | |||
static ref auto regE(ref BamRegion region) { return region.end; } | |||
foreach (ref group; regions_by_ref) | |||
@@ -341,24 +342,25 @@ private: | |||
auto fstream = new bio.core.utils.stream.File(_filename); | |||
auto compressed_stream = new EndianStream(fstream, Endian.littleEndian); | |||
auto supplier = new StreamChunksSupplier(compressed_stream, chunks); | |||
auto stream = new BgzfInputStream(supplier, _task_pool, _cache); | |||
// auto stream = new BgzfInputStream(supplier, _task_pool, _cache); | |||
auto stream = new BgzfInputStream(supplier, _task_pool); | |||
return bamReadRange!IteratePolicy(stream, _reader); | |||
} | |||
string _filename; | |||
BaiFile _bai; | |||
BamReader _reader; | |||
TaskPool _task_pool; | |||
size_t _buffer_size; | |||
BgzfBlockCache _cache; | |||
// BgzfBlockCache _cache; | |||
TaskPool task_pool() @property { | |||
if (_task_pool is null) | |||
_task_pool = taskPool; | |||
return _task_pool; | |||
} | |||
public: | |||
static struct BamReadFilter(R) { | |||
@@ -378,13 +380,13 @@ public: | |||
ElementType!R front() @property { | |||
return _current_read; | |||
} | |||
void popFront() { | |||
_range.popFront(); | |||
findNext(); | |||
} | |||
private: | |||
private: | |||
R _range; | |||
uint _ref_id; | |||
BamRegion _region; | |||
@@ -417,9 +419,9 @@ public: | |||
if (_current_read.position >= _region.end) { | |||
// As reads are sorted by leftmost coordinate, | |||
// all remaining alignments in _range | |||
// all remaining alignments in _range | |||
// will not overlap the current interval as well. | |||
// | |||
// | |||
// [-----) | |||
// . [-----------) | |||
// . [---) | |||
@@ -443,7 +445,7 @@ public: | |||
} | |||
if (_current_read.position + | |||
_current_read.basesCovered() <= _region.start) | |||
_current_read.basesCovered() <= _region.start) | |||
{ | |||
/// ends before beginning of the region | |||
/// [-----------) | |||
@@ -455,7 +457,7 @@ public: | |||
return; /// _current_read overlaps the region | |||
} | |||
} | |||
_empty = true; | |||
_empty = true; | |||
} | |||
} | |||
@@ -8,10 +8,10 @@ | |||
the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
and/or sell copies of the Software, and to permit persons to whom the | |||
Software is furnished to do so, subject to the following conditions: | |||
The above copyright notice and this permission notice shall be included in | |||
all copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
@@ -22,7 +22,7 @@ | |||
*/ | |||
/// Writing a script/tool for processing BAM data often starts this way: | |||
/// | |||
/// | |||
/// ------------------------ | |||
/// import bio.std.hts.bam.reader; | |||
/// | |||
@@ -35,7 +35,7 @@ | |||
/// } | |||
/// } | |||
/// ------------------------ | |||
/// | |||
/// | |||
/// Or, if a specific interval on the reference sequence is to be explored: | |||
/// ------------------------ | |||
/// import bio.std.hts.bam.pileup; | |||
@@ -72,6 +72,7 @@ import std.parallelism; | |||
import std.array; | |||
import core.stdc.stdio; | |||
import std.string; | |||
import std.stdio; | |||
/** | |||
BAM file reader, featuring parallel decompression of BGZF blocks. | |||
@@ -82,7 +83,7 @@ class BamReader : IBamSamReader { | |||
Creates reader associated with file or stream. | |||
(If stream constructor is used, no random access is possible.) | |||
$(BR) | |||
Optionally, task pool can be specified. | |||
Optionally, task pool can be specified. | |||
It will be used to unpack BGZF blocks in parallel. | |||
Example: | |||
@@ -108,7 +109,7 @@ class BamReader : IBamSamReader { | |||
initializeStreams(); | |||
auto magic = _bam.readString(4); | |||
enforce(magic == "BAM\1", "Invalid file format: expected BAM\\1"); | |||
readSamHeader(); | |||
@@ -130,11 +131,12 @@ class BamReader : IBamSamReader { | |||
this(_source_stream, task_pool); | |||
} | |||
/// ditto | |||
this(string filename) { | |||
this(filename, std.parallelism.taskPool); | |||
this(filename, std.parallelism.taskPool); | |||
} | |||
/** | |||
True if BAI file was found for this BAM file. | |||
This is necessary for any random-access operations. | |||
@@ -142,7 +144,7 @@ class BamReader : IBamSamReader { | |||
Looks for files in the same directory which filename | |||
is either the file name of BAM file with '.bai' appended, | |||
or with the last extension replaced with '.bai' | |||
(that is, for $(I file.bam) paths $(I file.bai) and | |||
(that is, for $(I file.bam) paths $(I file.bai) and | |||
$(I file.bam.bai) will be checked) | |||
*/ | |||
bool has_index() @property { | |||
@@ -206,7 +208,7 @@ class BamReader : IBamSamReader { | |||
/** | |||
Range of all alignment records in the file. | |||
$(BR) | |||
Element type of the returned range depends on the policy. | |||
Element type of the returned range depends on the policy. | |||
Default one is $(DPREF2 bam, readrange, withoutOffsets), | |||
in this case range element type is $(DPREF2 bam, read, BamRead). | |||
$(BR) | |||
@@ -242,7 +244,7 @@ class BamReader : IBamSamReader { | |||
static if (__traits(identifier, IteratePolicy) == "withOffsets") { | |||
auto front() @property { | |||
return _range.front; | |||
} | |||
} | |||
} else static if (__traits(identifier, IteratePolicy) == "withoutOffsets") { | |||
auto front() @property { | |||
return _range.front.read; | |||
@@ -262,8 +264,8 @@ class BamReader : IBamSamReader { | |||
_bytes_read += _range.front.read.size_in_bytes; | |||
_range.popFront(); | |||
if (_progress_bar_func !is null) { | |||
_progress_bar_func(min(1.0, | |||
cast(float)_bytes_read / (_stream.total_compressed_size * | |||
_progress_bar_func(min(1.0, | |||
cast(float)_bytes_read / (_stream.total_compressed_size * | |||
_stream.average_compression_ratio))); | |||
} | |||
} | |||
@@ -278,13 +280,13 @@ class BamReader : IBamSamReader { | |||
/** | |||
Returns: range of all reads in the file, calling $(I progressBarFunc) | |||
for each read. | |||
for each read. | |||
$(BR) | |||
$(I progressBarFunc) will be called | |||
each time next alignment is read, with the argument being a number from [0.0, 1.0], | |||
which is estimated progress percentage. | |||
$(BR) | |||
Notice that $(I progressBarFunc) takes $(D lazy) argument, | |||
Notice that $(I progressBarFunc) takes $(D lazy) argument, | |||
so that the number of relatively expensive float division operations | |||
can be controlled by user. | |||
@@ -306,14 +308,14 @@ class BamReader : IBamSamReader { | |||
*/ | |||
auto readsWithProgress(alias IteratePolicy=bio.std.hts.bam.readrange.withoutOffsets) | |||
(void delegate(lazy float p) progressBarFunc, | |||
void delegate() finishFunc=null) | |||
void delegate() finishFunc=null) | |||
{ | |||
auto _decompressed_stream = getDecompressedBamReadStream(); | |||
auto reads_with_offsets = bamReadRange!withOffsets(_decompressed_stream, this); | |||
alias ReadsWithProgressResult!(IteratePolicy, | |||
alias ReadsWithProgressResult!(IteratePolicy, | |||
typeof(reads_with_offsets), BgzfInputStream) Result; | |||
return Result(reads_with_offsets, _decompressed_stream, | |||
progressBarFunc, finishFunc); | |||
} | |||
@@ -344,12 +346,12 @@ class BamReader : IBamSamReader { | |||
and be strictly less than the second one. | |||
$(BR) | |||
For decompression, the task pool specified at the construction is used. | |||
*/ | |||
auto getReadsBetween(bio.core.bgzf.virtualoffset.VirtualOffset from, | |||
*/ | |||
auto getReadsBetween(bio.core.bgzf.virtualoffset.VirtualOffset from, | |||
bio.core.bgzf.virtualoffset.VirtualOffset to) { | |||
enforce(from <= to, "First offset must be less than second"); | |||
enforce(_stream_is_seekable, "Stream is not seekable"); | |||
return _random_access_manager.getReadsBetween(from, to); | |||
} | |||
@@ -403,7 +405,7 @@ class BamReader : IBamSamReader { | |||
*/ | |||
bio.std.hts.bam.reference.ReferenceSequence reference(int ref_id) { | |||
enforce(ref_id < _reference_sequences.length, "Invalid reference index"); | |||
return ReferenceSequence(_random_access_manager, | |||
return ReferenceSequence(_random_access_manager, | |||
ref_id, | |||
_reference_sequences[ref_id]); | |||
} | |||
@@ -435,7 +437,7 @@ class BamReader : IBamSamReader { | |||
/** | |||
Set buffer size for I/O operations. Values less than 4096 are disallowed. | |||
$(BR) | |||
This can help in multithreaded applications when several files are read | |||
This can help in multithreaded applications when several files are read | |||
simultaneously (e.g. merging). | |||
*/ | |||
void setBufferSize(size_t buffer_size) { | |||
@@ -447,7 +449,7 @@ class BamReader : IBamSamReader { | |||
package bool _seqprocmode; // available for bio.std.hts.bam.readrange; | |||
private: | |||
string _filename; // filename (if available) | |||
Stream _source_stream; // compressed | |||
BgzfInputStream _decompressed_stream; // decompressed | |||
@@ -482,7 +484,7 @@ private: | |||
@property ref BaiFile _bai_file() { // initialized lazily | |||
initBai(); | |||
return _dont_access_me_directly_use_bai_file_for_that; | |||
}; | |||
}; | |||
RandomAccessManager _rndaccssmgr; // unreadable for a purpose | |||
@property RandomAccessManager _random_access_manager() { | |||
@@ -529,7 +531,7 @@ private: | |||
} else { | |||
_source_stream.seekSet(0); | |||
return _source_stream; | |||
} | |||
} | |||
} else { | |||
return null; | |||
} | |||
@@ -544,7 +546,7 @@ private: | |||
compressed_stream); | |||
return new BgzfInputStream(block_supplier, _task_pool, | |||
null, _buffer_size); | |||
_buffer_size); | |||
} | |||
// get decompressed stream starting from the first alignment record | |||
@@ -556,8 +558,8 @@ private: | |||
compressed_stream.seekCur(_reads_start_voffset.coffset); | |||
auto block_supplier = new StreamSupplier(compressed_stream); | |||
auto stream = new BgzfInputStream(block_supplier, _task_pool, | |||
null, _buffer_size); | |||
auto stream = new BgzfInputStream(block_supplier, _task_pool, | |||
_buffer_size); | |||
stream.readString(_reads_start_voffset.uoffset); | |||
return stream; | |||
} else { | |||
@@ -568,9 +570,9 @@ private: | |||
// sets up the streams and ranges | |||
void initializeStreams() { | |||
_decompressed_stream = getDecompressedStream(); | |||
_bam = new EndianStream(_decompressed_stream, Endian.littleEndian); | |||
_bam = new EndianStream(_decompressed_stream, Endian.littleEndian); | |||
} | |||
// initializes _header | |||
@@ -0,0 +1,52 @@ | |||
import bio.std.hts.bam.cigar; | |||
import bio.std.hts.bam.reader; | |||
import bio.std.hts.bam.writer; | |||
import bio.std.hts.sam.reader; | |||
import bio.std.hts.sam.header; | |||
import bio.std.hts.bam.md.core; | |||
import bio.std.hts.bam.md.reconstruct; | |||
import bio.std.hts.bam.pileup; | |||
import bio.std.hts.bam.baseinfo; | |||
import bio.std.hts.bam.validation.samheader; | |||
import bio.std.hts.bam.validation.alignment; | |||
import bio.std.hts.utils.samheadermerger; | |||
import bio.std.hts.sam.utils.recordparser; | |||
import bio.core.bgzf.block; | |||
import bio.core.bgzf.inputstream; | |||
import bio.core.bgzf.outputstream; | |||
import bio.core.utils.roundbuf; | |||
import bio.core.utils.range; | |||
import bio.core.utils.tmpfile; | |||
import bio.core.utils.stream; | |||
import bio.core.sequence; | |||
import bio.core.base; | |||
import bio.core.tinymap; | |||
import bio.core.utils.roundbuf; | |||
import std.path; | |||
import std.range; | |||
import std.stdio; | |||
// import undead.stream; | |||
import std.algorithm; | |||
import std.array; | |||
import std.conv; | |||
import std.exception; | |||
import std.math; | |||
import std.typetuple; | |||
import std.regex; | |||
import core.memory; | |||
void main() { | |||
BamReader bam; | |||
auto fn = buildPath(dirName(__FILE__), "data", "ex1_header.bam"); | |||
foreach (i; 0 .. 60) | |||
{ | |||
bam = new BamReader(fn); | |||
assert(bam.header.format_version == "1.3"); | |||
} | |||
// Time to kick in GC | |||
// stderr.writeln("**** Calling GC final"); | |||
// GC.collect(); | |||
// stderr.writeln("**** Past calling GC final"); | |||
} |
@@ -487,5 +487,3 @@ unittest { | |||
writer.finish(); | |||
} | |||
} | |||
void main() {} |