Browse Source

sff

remotes/georgeg/bam_output_redesign
lomereiter 10 years ago
parent
commit
6a18d0885b
  1. 33
      bio/core/utils/stream.d
  2. 5
      bio/sff/constants.d
  3. 3
      bio/sff/read.d
  4. 43
      bio/sff/reader.d
  5. 7
      bio/sff/readrange.d
  6. 13
      bio/sff/utils/roundup.d
  7. 116
      bio/sff/writer.d

33
bio/core/utils/stream.d

@ -7,15 +7,42 @@ version(Posix){
private import core.sys.posix.unistd;
}
FileMode toFileMode(string mode) {
FileMode result = FileMode.In;
switch (mode) {
case "r", "rb":
result = FileMode.In; // 1000
break;
case "r+", "r+b", "rb+":
result = FileMode.In | FileMode.Out; // 1100
case "w", "wb":
result = FileMode.OutNew; // 0110
break;
case "w+", "w+b", "wb+":
result = FileMode.In | FileMode.OutNew; // 1110
break;
case "a", "ab":
result = FileMode.Append; // 0001
break;
case "a+", "a+b", "ab+":
result = FileMode.In | FileMode.Append; // 1001
break;
default:
break;
}
return result;
}
final class File: std.stream.File {
this(string filename) {
this(string filename, string mode="rb") {
// Issue 8528 workaround
auto file = fopen(toStringz(filename), "rb");
auto file = fopen(toStringz(filename), toStringz(mode));
if (file == null) {
throw new OpenException(cast(string) ("Cannot open or create file '"
~ filename ~ "'"));
}
super(core.stdc.stdio.fileno(file), FileMode.In);
super(core.stdc.stdio.fileno(file), toFileMode(mode));
}
override ulong seek(long offset, SeekPos rel) {

5
bio/sff/constants.d

@ -0,0 +1,5 @@
module bio.sff.constants;
immutable SFF_MAGIC = 0x2E736666;
immutable char[4] SFF_VERSION = [0, 0, 0, 1];

3
bio/sff/read.d

@ -20,4 +20,7 @@ struct SffRead {
ushort clip_qual_right;
ushort clip_adapter_left;
ushort clip_adapter_right;
/// Record start offset in the file
size_t file_offset;
}

43
bio/sff/file.d → bio/sff/reader.d

@ -17,10 +17,12 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
module bio.sff.file;
module bio.sff.reader;
import bio.sff.index;
public import bio.sff.index;
import bio.sff.read;
import bio.sff.readrange;
import bio.sff.constants;
import bio.core.utils.stream;
import std.stream;
@ -29,7 +31,7 @@ import std.range;
import std.exception;
/// SFF file reader
struct SffFile {
class SffReader {
/// Open file by filename
this(string filename) {
@ -48,6 +50,17 @@ struct SffFile {
return takeExactly(sff_reads, _n_reads);
}
///
SffRead getReadAtOffset(size_t offset) {
auto stream = new bio.core.utils.stream.File(filename);
Stream sff = new EndianStream(stream, Endian.bigEndian);
sff.seekSet(offset);
auto read = SffReadRange(sff, cast(ushort)_flow_chars.length, _index_location).front;
sff.close();
return read;
}
/// File name
string filename() @property const {
return _filename;
@ -58,6 +71,26 @@ struct SffFile {
return _index_location;
}
///
bool has_index() @property const {
return _index_location.offset != 0 && _index_location.length != 0;
}
/// Set index location (saves new index location to the file)
void index_location(IndexLocation location) @property {
_index_location = location;
// offset spans 8 bytes (8 .. 16),
// length spans 4 bytes (16 .. 20)
auto stream = new bio.core.utils.stream.File(filename, "r+");
stream.seekSet(8);
auto endian_stream = new EndianStream(stream, Endian.bigEndian);
endian_stream.write(location.offset);
endian_stream.write(location.length);
endian_stream.close();
}
/// Nucleotides used for each flow of each read
string flow_order() @property const {
return _flow_chars;
@ -87,10 +120,10 @@ struct SffFile {
auto sff = new EndianStream(stream, Endian.bigEndian);
sff.read(_magic_number);
enforce(_magic_number == 0x2E736666, "Wrong magic number, expected 0x2E736666");
enforce(_magic_number == SFF_MAGIC, "Wrong magic number, expected 0x2E736666");
sff.readExact(_version.ptr, 4);
enforce(_version == [0, 0, 0, 1], "Unsupported version, expected 1");
enforce(_version == SFF_VERSION, "Unsupported version, expected 1");
sff.read(_index_location.offset);
sff.read(_index_location.length);

7
bio/sff/readrange.d

@ -63,12 +63,13 @@ struct SffReadRange {
SffRead _read;
void _fetchNextRead() {
if (_stream.position == _index_loc.offset)
_stream.seekCur(_index_loc.length);
if (_stream.eof) {
_empty = true;
} else {
if (_stream.position == _index_loc.offset)
_stream.seekCur(_index_loc.length);
_read.file_offset = _stream.position;
// determine how many bytes to read
ushort read_header_length = void;
ushort name_length = void;

13
bio/sff/utils/roundup.d

@ -0,0 +1,13 @@
module bio.sff.utils.roundup;
import std.traits;
import std.conv;
/// First number
T roundup(T)(T number)
if (isUnsigned!T)
{
if (number % 8 == 0)
return number;
return to!T(number + (8 - number % 8));
}

116
bio/sff/writer.d

@ -0,0 +1,116 @@
module bio.sff.writer;
import bio.sff.constants;
import bio.sff.utils.roundup;
import bio.core.utils.stream;
import std.stream;
import std.system;
/// Class for outputting SFF files
class SffWriter {
/// Create new writer.
this(string filename, string flow_order, string key_sequence)
{
_filename = filename;
_flow_order = flow_order;
_key_seq = key_sequence;
auto f = new bio.core.utils.stream.File(filename, "wb+");
auto stream = new BufferedStream(f, 1024576);
_endian_stream = new EndianStream(stream, Endian.bigEndian);
writeHeader();
}
/// Flow order
string flow_order() @property const {
return _flow_order;
}
/// Key sequence
string key_sequence() @property const {
return _key_seq;
}
/// Add a read to the end of file
void append(R)(R sff_read) {
// compute read_header_length
ushort exact_read_header_length = cast(ushort)(16 + sff_read.name.length);
ushort read_header_length = roundup(exact_read_header_length);
_endian_stream.write(read_header_length);
_endian_stream.write(cast(ushort)sff_read.name.length);
_endian_stream.write(cast(uint)sff_read.bases.length);
_endian_stream.write(sff_read.clip_qual_left);
_endian_stream.write(sff_read.clip_qual_right);
_endian_stream.write(sff_read.clip_adapter_left);
_endian_stream.write(sff_read.clip_adapter_right);
_endian_stream.writeExact(sff_read.name.ptr, sff_read.name.length);
for (size_t i = 0; i < read_header_length - exact_read_header_length; ++i)
_endian_stream.write(cast(ubyte)0);
for (size_t i = 0; i < _flow_order.length; ++i)
_endian_stream.write(sff_read.flowgram_values[i]);
auto n_bases = sff_read.bases.length;
_endian_stream.writeExact(sff_read.flow_index_per_base.ptr, n_bases);
_endian_stream.writeExact(sff_read.bases.ptr, n_bases);
_endian_stream.writeExact(sff_read.quality_scores.ptr, n_bases);
auto k = 2 * _flow_order.length + 3 * n_bases;
auto padding = roundup(k) - k;
for (size_t i = 0; i < padding; ++i)
_endian_stream.write(cast(ubyte)0);
++_n_reads;
}
/// Flush all buffers and update number of reads in the file header
void finish() {
updateNumberOfReads();
_endian_stream.close();
}
private {
string _filename;
string _flow_order;
string _key_seq;
Stream _endian_stream;
uint _n_reads;
ushort _exact_header_len() @property const {
return cast(ushort)(31 + _flow_order.length + _key_seq.length);
}
ushort _header_len() @property const {
return roundup(_exact_header_len);
}
void writeHeader() {
_endian_stream.write(SFF_MAGIC);
_endian_stream.writeExact(SFF_VERSION.ptr, 4);
_endian_stream.write(0UL);
_endian_stream.write(0U);
_endian_stream.write(_n_reads);
_endian_stream.write(_header_len);
_endian_stream.write(cast(ushort)_key_seq.length);
_endian_stream.write(cast(ushort)_flow_order.length);
_endian_stream.write(cast(ubyte)1);
_endian_stream.writeExact(_flow_order.ptr, _flow_order.length);
_endian_stream.writeExact(_key_seq.ptr, _key_seq.length);
for (size_t i = 0; i < _header_len - _exact_header_len; ++i)
_endian_stream.write(cast(ubyte)0);
}
void updateNumberOfReads() {
auto old_pos = _endian_stream.position;
_endian_stream.position = 20;
_endian_stream.write(_n_reads);
_endian_stream.position = old_pos;
}
}
}
Loading…
Cancel
Save