Browse Source

renamed Sam/BamFile to Sam/BamReader and made them classes

remotes/georgeg/bam_output_redesign
lomereiter 9 years ago
parent
commit
c37567fee0
  1. 2
      bio/bam/output.d
  2. 8
      bio/bam/reader.d
  3. 2
      bio/bam/utils/samheadermerger.d
  4. 2
      bio/bam/validation/samheader.d
  5. 2
      bio/sam/header.d
  6. 10
      bio/sam/reader.d
  7. 4
      bio/sam/utils/fastrecordparser.d
  8. 4
      bio/sam/utils/recordparser.d
  9. 8
      src_ragel/Makefile
  10. 2
      src_ragel/sam_alignment.rl
  11. 43
      test/unittests.d

2
bio/bam/output.d

@ -19,7 +19,7 @@
*/
module bio.bam.output;
import bio.bam.samheader;
import bio.sam.header;
import bio.bam.reference;
import bio.bam.read;
import bio.bam.readrange;

8
bio/bam/bamfile.d → bio/bam/reader.d

@ -17,9 +17,9 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
module bio.bam.bamfile;
module bio.bam.reader;
public import bio.bam.samheader;
public import bio.sam.header;
public import bio.bam.reference;
public import bio.bam.read;
public import bio.bam.virtualoffset;
@ -46,7 +46,7 @@ import std.string;
/**
Represents BAM file
*/
struct BamFile {
class BamReader {
this(Stream stream, TaskPool task_pool = taskPool) {
_source_stream = new EndianStream(stream, Endian.littleEndian);
@ -208,7 +208,7 @@ struct BamFile {
First offset must point to the start of an alignment record,
and be strictly less than the second one.
For decompression, uses task pool specified at BamFile construction.
For decompression, uses task pool specified at BamReader construction.
*/
auto getReadsBetween(VirtualOffset from, VirtualOffset to) {
enforce(from < to, "First offset must be strictly less than second");

2
bio/bam/utils/samheadermerger.d

@ -19,7 +19,7 @@
*/
module bio.bam.utils.samheadermerger;
import bio.bam.samheader;
import bio.sam.header;
import bio.bam.validation.samheader;
import std.array;

2
bio/bam/validation/samheader.d

@ -26,7 +26,7 @@
*/
module bio.bam.validation.samheader;
public import bio.bam.samheader;
public import bio.sam.header;
import bio.core.utils.algo;
import std.algorithm;

2
bio/bam/samheader.d → bio/sam/header.d

@ -17,7 +17,7 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
module bio.bam.samheader;
module bio.sam.header;
import bio.bam.thirdparty.msgpack;
import bio.core.utils.format;

10
bio/bam/samfile.d → bio/sam/reader.d

@ -17,16 +17,16 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
module bio.bam.samfile;
module bio.sam.reader;
import bio.sam.header;
import bio.bam.read;
import bio.bam.samheader;
import bio.bam.reference;
version(DigitalMars) {
import bio.bam.utils.samrecordparser;
import bio.sam.utils.recordparser;
} else {
import bio.bam.utils.fastsamrecordparser;
import bio.sam.utils.fastrecordparser;
}
import std.stdio;
@ -37,7 +37,7 @@ private {
extern(C) size_t lseek(int, size_t, int);
}
struct SamFile {
class SamReader {
this(string filename) {
_file = File(filename);

4
bio/bam/utils/fastsamrecordparser.d → bio/sam/utils/fastrecordparser.d

@ -1,4 +1,4 @@
module bio.bam.utils.fastsamrecordparser;
module bio.sam.utils.fastrecordparser;
#line 1 "sam_alignment.rl"
/*
@ -32,9 +32,9 @@ static const int sam_alignment_en_alignment = 0;
#line 234 "sam_alignment.rl"
import bio.sam.header;
import bio.bam.read;
import bio.bam.tagvalue;
import bio.bam.samheader;
import bio.bam.utils.tagstoragebuilder;
import std.array;

4
bio/bam/utils/samrecordparser.d → bio/sam/utils/recordparser.d

@ -1,4 +1,4 @@
module bio.bam.utils.samrecordparser;
module bio.sam.utils.recordparser;
#line 1 "sam_alignment.rl"
/*
@ -537,9 +537,9 @@ static int sam_alignment_en_alignment = 0;
#line 234 "sam_alignment.rl"
import bio.sam.header;
import bio.bam.read;
import bio.bam.tagvalue;
import bio.bam.samheader;
import bio.bam.utils.tagstoragebuilder;
import std.array;

8
src_ragel/Makefile

@ -9,16 +9,16 @@ all: fastrecordparser recordparser regionparser
fastrecordparser:
ragel sam_alignment.rl -D -G2
./workarounds/fix_switch_case_fallthrough.sh sam_alignment.d
echo 'module bio.bam.utils.fastsamrecordparser;' | cat - sam_alignment.d > .sam_alignment.d.tmp
echo 'module bio.sam.utils.fastrecordparser;' | cat - sam_alignment.d > .sam_alignment.d.tmp
rm sam_alignment.d
mv .sam_alignment.d.tmp fastsamrecordparser.d
mv .sam_alignment.d.tmp fastrecordparser.d
recordparser:
ragel sam_alignment.rl -D
./workarounds/fix_static_const.sh sam_alignment.d
echo 'module bio.bam.utils.samrecordparser;' | cat - sam_alignment.d > .sam_alignment.d.tmp
echo 'module bio.sam.utils.recordparser;' | cat - sam_alignment.d > .sam_alignment.d.tmp
rm sam_alignment.d
mv .sam_alignment.d.tmp samrecordparser.d
mv .sam_alignment.d.tmp recordparser.d
regionparser:
ragel region.rl -D

2
src_ragel/sam_alignment.rl

@ -233,9 +233,9 @@
write data;
}%%
import bio.sam.header;
import bio.bam.read;
import bio.bam.tagvalue;
import bio.bam.samheader;
import bio.bam.utils.tagstoragebuilder;
import std.array;

43
test/unittests.d

@ -17,18 +17,19 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
import bio.sam.reader;
import bio.sam.header;
import bio.bam.bgzf.blockrange;
import bio.bam.bamfile;
import bio.bam.samfile;
import bio.bam.reader;
import bio.bam.output;
import bio.bam.samheader;
import bio.bam.md.reconstruct;
import bio.bam.pileuprange;
import bio.bam.baseinfo;
import bio.bam.validation.samheader;
import bio.bam.validation.alignment;
import bio.bam.utils.samheadermerger;
import bio.bam.utils.samrecordparser;
import bio.sam.utils.recordparser;
import bio.bam.serialization.sam;
import bio.core.utils.tmpfile;
@ -48,7 +49,7 @@ unittest {
writeln("Testing extracting SAM header...");
auto fn = buildPath(dirName(__FILE__), "data", "ex1_header.bam");
auto bf = BamFile(fn);
auto bf = new BamReader(fn);
assert(bf.header.format_version == "1.3");
assert(bf.header.sorting_order == SortingOrder.coordinate);
assert(bf.header.sequences.length == 2);
@ -56,7 +57,7 @@ unittest {
assert(bf.header.sequences["chr2"].length == 1584);
fn = buildPath(dirName(__FILE__), "data", "bins.bam");
bf = BamFile(fn);
bf = new BamReader(fn);
assert(bf.header.sorting_order == SortingOrder.unknown);
assert(bf.header.sequences.length == 3);
assert(bf.header.read_groups.length == 0);
@ -66,7 +67,7 @@ unittest {
{
writeln("Testing alignment parsing...");
fn = buildPath(dirName(__FILE__), "data", "ex1_header.bam");
bf = BamFile(fn);
bf = new BamReader(fn);
auto reads = bf.reads;
auto read = reads.front;
assert(equal(read.sequence, "CTCAAGGTTGTTGCAAGGGGGTCTATGTGAACAAA"));
@ -88,7 +89,7 @@ unittest {
writeln("Testing tag parsing...");
fn = buildPath(dirName(__FILE__), "data", "tags.bam");
bf = BamFile(fn);
bf = new BamReader(fn);
foreach (alignment; bf.reads) {
auto read_name = alignment.read_name;
assert(read_name[0..4] == "tag_");
@ -110,19 +111,19 @@ unittest {
writeln("Testing exception handling...");
fn = buildPath(dirName(__FILE__), "data", "duplicated_block_size.bam");
assertThrown!BgzfException(BamFile(fn));
assertThrown!BgzfException(new BamReader(fn));
fn = buildPath(dirName(__FILE__), "data", "no_block_size.bam");
assertThrown!BgzfException(BamFile(fn));
assertThrown!BgzfException(new BamReader(fn));
fn = buildPath(dirName(__FILE__), "data", "wrong_extra_gzip_length.bam");
assertThrown!BgzfException(BamFile(fn));
assertThrown!BgzfException(new BamReader(fn));
fn = buildPath(dirName(__FILE__), "data", "wrong_bc_subfield_length.bam");
assertThrown!BgzfException(reduce!"a+b.sequence_length"(0,BamFile(fn).reads!withoutOffsets));
assertThrown!BgzfException(reduce!"a+b.sequence_length"(0, (new BamReader(fn)).reads!withoutOffsets));
fn = buildPath(dirName(__FILE__), "data", "corrupted_zlib_archive.bam");
assertThrown!ZlibException(walkLength(BamFile(fn).reads));
assertThrown!ZlibException(walkLength((new BamReader(fn)).reads));
writeln("Testing random access...");
fn = buildPath(dirName(__FILE__), "data", "bins.bam");
bf = BamFile(fn);
bf = new BamReader(fn);
void compareWithNaiveApproach(int beg, int end) {
@ -230,7 +231,7 @@ unittest {
writeln("Test parseAlignmentLine/toSam functions...");
fn = buildPath(dirName(__FILE__), "data", "ex1_header.bam");
bf = BamFile(fn);
bf = new BamReader(fn);
foreach (read; bf.reads) {
auto line = toSam(read, bf.reference_sequences);
auto read2 = parseAlignmentLine(line, bf.header);
@ -241,7 +242,7 @@ unittest {
}
fn = buildPath(dirName(__FILE__), "data", "tags.bam");
bf = BamFile(fn);
bf = new BamReader(fn);
foreach (read; bf.reads) {
auto line = toSam(read, bf.reference_sequences);
auto read2 = parseAlignmentLine(line, bf.header);
@ -253,19 +254,19 @@ unittest {
writeln("Test BAM writing...");
fn = buildPath(dirName(__FILE__), "data", "ex1_header.bam");
bf = BamFile(fn);
bf = new BamReader(fn);
{
string tmp = tmpFile("12035913820619231129310.bam");
auto stream = new BufferedFile(tmp, FileMode.Out, 8192);
writeBAM(stream, bf.header.text, bf.reference_sequences, bf.reads!withoutOffsets, 9);
stream.seekSet(0);
assert(walkLength(BamFile(tmp).reads!withoutOffsets) == 3270);
assert(walkLength((new BamReader(tmp)).reads!withoutOffsets) == 3270);
stream.close();
}
writeln("Test SAM reading...");
{
auto sf = SamFile(buildPath(dirName(__FILE__), "data", "ex1_header.sam"));
auto sf = new SamReader(buildPath(dirName(__FILE__), "data", "ex1_header.sam"));
assert(sf.reads.front.ref_id == 0);
assert(equal(sf.reads, bf.reads!withoutOffsets));
}
@ -276,7 +277,7 @@ unittest {
// When reads in a range are aligned to different references,
// pileup objects should process only the first one.
bf = BamFile(fn); // chr1, chr2
bf = new BamReader(fn); // chr1, chr2
{
auto pileup = makePileup(bf.reads);
foreach (column; pileup) {
@ -329,7 +330,7 @@ unittest {
writeln("Testing basesWith functionality...");
{
fn = buildPath(dirName(__FILE__), "data", "mg1655_chunk.bam");
bf = BamFile(fn);
bf = new BamReader(fn);
auto flow_order = bf.header.read_groups.values.front.flow_order;
auto reads = array(bf.reads);

Loading…
Cancel
Save