You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

416 lines
13 KiB

/*
This file is part of BioD.
Copyright (C) 2012 Artem Tarasov <lomereiter@gmail.com>
BioD is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
BioD is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
module bio.bam.tagvalue;
public import std.conv;
import std.typetuple;
import std.exception;
import bio.bam.thirdparty.msgpack;
struct CharToType(char c, T) {
/** symbol */
enum ch = c;
/** type which corresponds to the symbol
according to SAM/BAM specification
*/
alias T ValueType;
}
/**
Thrown in case of unrecognized tag type
*/
class UnknownTagTypeException : Exception {
this(string msg) { super(msg); }
}
alias TypeTuple!(CharToType!('A', char),
CharToType!('c', byte),
CharToType!('C', ubyte),
CharToType!('s', short),
CharToType!('S', ushort),
CharToType!('i', int),
CharToType!('I', uint),
CharToType!('f', float)) PrimitiveTagValueTypes;
alias TypeTuple!(CharToType!('Z', string),
CharToType!('H', string)) StringTagValueTypes;
alias TypeTuple!(CharToType!('c', byte),
CharToType!('C', ubyte),
CharToType!('s', short),
CharToType!('S', ushort),
CharToType!('i', int),
CharToType!('I', uint),
CharToType!('f', float)) ArrayElementTagValueTypes;
/**
Useful in TagStorage implementations, for skipping elements
Params:
c = primitive type identifier
Returns: size of corresponding type in bytes
*/
uint charToSizeof(char c) {
string charToSizeofHelper() {
char[] cases;
foreach (c2t; PrimitiveTagValueTypes) {
cases ~= "case '"~c2t.ch~"':"~
" return "~to!string(c2t.ValueType.sizeof)~";".dup;
}
return "switch (c) { " ~ cases.idup ~
" default: " ~
" throw new UnknownTagTypeException(to!string(c));"~
"}";
}
mixin(charToSizeofHelper());
}
/**
Pair of type and its ubyte identifier.
(Currently, ubyte is enough, but that might change in the future.)
*/
struct TypeId(T, ubyte id) {
enum Id = id;
alias T Type;
}
/*
Structure of type identifier:
0 1
primitive array/string
something null/nothing numeric string
numeric char 0 0 Z H
integer float 0 [see left 0 0
unsigned signed 0 0 branch] 0 0
[ size in bytes] [size in bytes] 0 [element size] 1 1
(TypeId >> 5) == elementType.sizeof
*/
alias TypeTuple!(TypeId!(char, 0b001_00_1_00),
TypeId!(ubyte, 0b001_0_0000),
TypeId!(ushort, 0b010_0_0000),
TypeId!(uint, 0b100_0__0__0__0__0),
/* Let's take 4 u i n s p
uint as an n n u o r
example b s t m m i
y i e e e m
t g g r t i
e n e i h t
s e r c i i
d n v
g e
*/
TypeId!(byte, 0b001_1_0000),
TypeId!(short, 0b010_1_0000),
TypeId!(int, 0b100_1_0000),
TypeId!(float, 0b100_0_1_000),
TypeId!(ubyte[], 0b001_000_01),
TypeId!(ushort[], 0b010_000_01),
TypeId!(uint[], 0b100_000_01),
TypeId!(byte[], 0b001_010_01),
TypeId!(short[], 0b010_010_01),
TypeId!(int[], 0b100_010_01),
TypeId!(float[], 0b100_00_1_01),
TypeId!(string, 0b001_00_0_11),
TypeId!(string, 0b001_00_1_11),
TypeId!(typeof(null), 0b0000_0010))
TypeIdMap;
private immutable hexStringTag = 0b001_00_1_11;
private template GetType(U) {
alias U.Type GetType;
}
/// Get tag for type T.
///
/// Useful for comparison with tag field of Value struct.
///
/// Example:
/// -----------------------------------
/// Value v = "zzz";
/// assert(v.tag == GetTypeId!string);
/// -----------------------------------
template GetTypeId(T) {
enum GetTypeId = TypeIdMap[staticIndexOf!(T, staticMap!(GetType, TypeIdMap))].Id;
}
string generateUnion() {
char[] u = "union U {".dup;
foreach (t; PrimitiveTagValueTypes) {
u ~= t.ValueType.stringof ~ " " ~ t.ch ~ ";".dup;
}
foreach (t; StringTagValueTypes) {
u ~= t.ValueType.stringof ~ " " ~ t.ch ~ ";".dup;
}
foreach (t; ArrayElementTagValueTypes) {
u ~= t.ValueType.stringof ~ "[] " ~ 'B' ~ t.ch ~ ";".dup;
}
u ~= "}; U u;".dup;
return u.idup;
}
template ArrayOf(T) {
alias T[] ArrayOf;
}
string injectOpAssign() {
char[] cs;
foreach (t; PrimitiveTagValueTypes) {
cs ~= "final void opAssign(" ~ t.ValueType.stringof ~ " value) {" ~
" this.u." ~ t.ch ~ " = value;" ~
" this._tag = " ~ to!string(GetTypeId!(t.ValueType)) ~ ";" ~
" this.bam_typeid = '" ~ t.ch ~ "';" ~
"}";
}
cs ~= "final void opAssign(string value) {" ~
" this.u.Z = value;" ~
" this._tag = " ~ to!string(GetTypeId!string) ~ ";" ~
" this.bam_typeid = 'Z';" ~
"}";
foreach (t; ArrayElementTagValueTypes) {
cs ~= "final void opAssign(" ~ t.ValueType.stringof ~ "[] value) {" ~
" this.u.B" ~ t.ch ~ " = value;" ~
" this._tag = " ~ to!string(GetTypeId!(ArrayOf!(t.ValueType))) ~ ";" ~
" this.bam_typeid = '" ~ t.ch ~ "';" ~
"}";
}
return cs.idup;
}
string injectOpCast() {
char[] cs = "static if".dup;
string injectSwitchPrimitive(string requested_type)
{
char[] cs = `switch (_tag) {`.dup;
foreach (t2; PrimitiveTagValueTypes) {
cs ~= `case GetTypeId!`~t2.ValueType.stringof~`: `~
` return to!T(u.`~t2.ch~`);`.dup;
}
cs ~= ` default: throw new ConvException("Cannot convert Value to `~
requested_type~`");`~
`}`;
return cs.idup;
}
string injectSwitchArrayElement(string requested_type)
{
char[] cs = `switch (_tag) {`.dup;
foreach (t2; ArrayElementTagValueTypes) {
cs ~= `case GetTypeId!(`~t2.ValueType.stringof~`[]): `~
` return to!T(u.B`~t2.ch~`);`.dup;
}
cs ~= ` default: throw new ConvException("Cannot convert Value to `~
requested_type~`");`~
`}`;
return cs.idup;
}
foreach (t; TypeTuple!(byte, ubyte, short, ushort, int, uint,
char, float, double, real, long, ulong))
{
cs ~= `(is(T == `~t.stringof~`)) {`~
injectSwitchPrimitive(t.stringof)~
`} else static if`.dup;
}
foreach (t; ArrayElementTagValueTypes) {
cs ~= `(is(T == ` ~ t.ValueType.stringof ~ `[])) {` ~
injectSwitchArrayElement(t.ValueType.stringof ~ "[]")~
`} else static if `;
}
cs ~= `(is(T == string)) {` ~
` if (is_string) {`
` return bam_typeid == 'Z' ? u.Z : u.H;`~
` } else if (is_integer || is_float || is_character) {`~
` `~injectSwitchPrimitive("string")~
` } else {`~
injectSwitchArrayElement("string")~
` }`~
`}`.dup;
return "final T opCast(T)() const {" ~ cs.idup ~ "}";
}
/**
Struct for representing tag values.
Tagged union, allows to store
8/16/32-bit integers, floats, chars, strings,
and arrays of integers/floats.
Currently, opCast is very restrictive and requires that
the requested type is exactly the same as stored in Value
(otherwise, ConvException is thrown). That means that
you can't cast Value to string when it contains integer,
although it's possible to convert integer to string.
*/
struct Value {
/**
Notice that having union first allows to do simple casts,
without using opCast(). That's a bit hackish but
allows for better speed.
*/
private mixin(generateUnion());
/**
If this is an array, one of [cCsSiIf].
Otherwise, one of [AcCsSiIfZH]
See SAM/BAM specification for details.
*/
public char bam_typeid;
/*
WARNING:
Currently, type identifier for (u)int requires 8 bits.
Fortunately, SAM/BAM specification doesn't use bigger integer types.
However, in case of need to extend the hierarchy, the type
should be changed from ubyte to something bigger.
*/
ubyte _tag;
/// Designates the type of currently stored value.
///
/// Supposed to be used externally for checking type with GetTypeId.
ubyte tag() @property const {
return _tag;
}
mixin(injectOpAssign());
mixin(injectOpCast());
final void opAssign(Value v) {
bam_typeid = v.bam_typeid;
_tag = v._tag;
u = v.u;
}
final void opAssign(typeof(null) n) {
_tag = GetTypeId!(typeof(null));
}
final bool opEquals(T)(const T val) {
try {
return to!T(this) == val;
} catch (ConvException e) {
return false;
}
}
/// Conversion to string occurs only when Value stores
/// 'Z' or 'H' tag. Otherwise ConvException is thrown.
string toString() {
return opCast!string();
}
this(T)(T value) {
opAssign(value);
}
/// sets 'H' tag instead of default 'Z'. Is not expected to be used much.
void setHexadecimalFlag() {
enforce(this.is_string);
bam_typeid = 'H';
_tag = hexStringTag;
if (_tag != 0b111) {
u.H = u.Z;
}
}
bool is_nothing() @property const { return _tag == GetTypeId!(typeof(null)); }
bool is_character() @property const { return _tag == GetTypeId!char; }
bool is_float() @property const { return _tag == GetTypeId!float; }
bool is_numeric_array() @property const { return (_tag & 0b11) == 0b01; }
bool is_array_of_integers() @property const { return (_tag & 0b111) == 0b001; }
bool is_array_of_floats() @property const { return (_tag & 0b111) == 0b101; }
bool is_integer() @property const { return (_tag & 0b1111) == 0; }
/// true if the value is unsigned integer
bool is_unsigned() @property const { return (_tag & 0b11111) == 0; }
/// true if the value is signed integer
bool is_signed() @property const { return (_tag & 0b11111) == 0b10000; }
/// true if the value represents 'Z' or 'H' tag
bool is_string() @property const { return (_tag & 0b11) == 0b11; }
/// true if the value represents 'H' tag
bool is_hexadecimal_string() @property const { return (_tag & 0b111) == 0b111; }
public void toMsgpack(Packer)(ref Packer packer) const {
switch (_tag) {
case GetTypeId!byte: packer.pack(*cast(byte*)(&u)); break;
case GetTypeId!ubyte: packer.pack(*cast(ubyte*)(&u)); break;
case GetTypeId!short: packer.pack(*cast(short*)(&u)); break;
case GetTypeId!ushort: packer.pack(*cast(ushort*)(&u)); break;
case GetTypeId!int: packer.pack(*cast(int*)(&u)); break;
case GetTypeId!uint: packer.pack(*cast(uint*)(&u)); break;
case GetTypeId!float: packer.pack(*cast(float*)(&u)); break;
case GetTypeId!string: packer.pack(*cast(char[]*)(&u)); break;
case hexStringTag: packer.pack(*cast(char[]*)(&u)); break;
case GetTypeId!char: packer.pack(*cast(ubyte*)(&u)); break;
case GetTypeId!(byte[]): packer.pack(*cast(byte[]*)(&u)); break;
case GetTypeId!(ubyte[]): packer.pack(*cast(ubyte[]*)(&u)); break;
case GetTypeId!(short[]): packer.pack(*cast(short[]*)(&u)); break;
case GetTypeId!(ushort[]): packer.pack(*cast(ushort[]*)(&u)); break;
case GetTypeId!(int[]): packer.pack(*cast(int[]*)(&u)); break;
case GetTypeId!(uint[]): packer.pack(*cast(uint[]*)(&u)); break;
case GetTypeId!(float[]): packer.pack(*cast(float[]*)(&u)); break;
case GetTypeId!(typeof(null)): packer.pack(null); break;
default: break;
}
}
}