Browse Source

replaced dump algo of getting bins for region

remotes/georgeg/no_streams
lomereiter 9 years ago
parent
commit
e53af29402
  1. 26
      bio/bam/bai/bin.d
  2. 33
      bio/bam/baifile.d
  3. 5
      bio/bam/randomaccessmanager.d

26
bio/bam/bai/bin.d

@ -30,7 +30,7 @@ import bio.bam.constants;
struct Bin {
/// Construct a bin with an id
this(ushort id) nothrow {
this(uint id) nothrow {
this.id = id;
}
@ -51,30 +51,6 @@ struct Bin {
bool is_leaf() @property const nothrow {
return id > BAI_MAX_NONLEAF_BIN_ID;
}
/// Check if bin can overlap with a region
bool canOverlapWith(int begin, int end) const nothrow {
if (id == 0) return true;
if (id > BAI_MAX_BIN_ID) return false;
/// The following code is based on reg2bins() function
if (begin < 0) begin = 0;
auto magic_number = 4681;
auto b = begin >> 14;
auto e = end >> 14;
while (true) {
auto delta = id - magic_number;
if (b <= delta && delta <= e) return true;
magic_number >>= 3;
if (magic_number == 0) return false;
b >>= 3;
e >>= 3;
}
}
}
/// Returns bin number for [beg, end) interval (zero-based).

33
bio/bam/baifile.d

@ -40,8 +40,8 @@ import std.path;
/// Represents index for a single reference
struct Index {
/// Information about bins
Bin[] bins;
Bin[uint] bins;
/// Virtual file offsets of first alignments overlapping 16384-byte windows
/// on the reference sequence. This linear index is used to reduce amount
/// of file seeks for region queries, since with its help one can reduce the
@ -79,6 +79,22 @@ struct Index {
auto min_offset = (_i == -1) ? VirtualOffset(0) : ioffsets[_i];
return min_offset;
}
/// Range of bins that overlap interval [beg, end)
auto getBins(uint beg, uint end) {
assert(beg < end);
if (end >= 1u<<29) end = 1u<<29;
--end;
return chain(repeat(0).takeOne(),
iota(1 + (beg >> 26), 2 + (end >> 26)),
iota(9 + (beg >> 23), 10 + (end >> 23)),
iota(73 + (beg >> 20), 74 + (end >> 20)),
iota(585 + (beg >> 17), 586 + (end >> 17)),
iota(4681 + (beg >> 14), 4682 + (end >> 14)))
.zip(bins.repeat())
.map!"a[0] in a[1]"()
.filter!"a !is null"();
}
}
struct BaiFile {
@ -133,22 +149,25 @@ private:
foreach (i; 0 .. n_ref) {
int n_bin;
_stream.read(n_bin);
indices[i].bins.length = n_bin;
foreach (j; 0 .. n_bin) {
_stream.read(indices[i].bins[j].id);
uint id;
_stream.read(id);
auto bin = Bin(id);
int n_chunk;
_stream.read(n_chunk);
indices[i].bins[j].chunks.length = n_chunk;
bin.chunks.length = n_chunk;
foreach (k; 0 .. n_chunk) {
ulong tmp;
_stream.read(tmp);
indices[i].bins[j].chunks[k].beg = VirtualOffset(tmp);
bin.chunks[k].beg = VirtualOffset(tmp);
_stream.read(tmp);
indices[i].bins[j].chunks[k].end = VirtualOffset(tmp);
bin.chunks[k].end = VirtualOffset(tmp);
}
indices[i].bins[id] = bin;
}
int n_intv;

5
bio/bam/randomaccessmanager.d

@ -246,10 +246,7 @@ class RandomAccessManager {
auto min_offset = _bai.indices[ref_id].getMinimumOffset(beg);
Chunk[] bai_chunks;
foreach (b; _bai.indices[ref_id].bins) {
if (!b.canOverlapWith(beg, end)) {
continue;
}
foreach (b; _bai.indices[ref_id].getBins(beg, end)) {
foreach (chunk; b.chunks) {
if (chunk.end > min_offset) {

Loading…
Cancel
Save