You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

728 lines
21 KiB

  1. /*
  2. This file is part of BioD.
  3. Copyright (C) 2012 Artem Tarasov <lomereiter@gmail.com>
  4. Permission is hereby granted, free of charge, to any person obtaining a
  5. copy of this software and associated documentation files (the "Software"),
  6. to deal in the Software without restriction, including without limitation
  7. the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. and/or sell copies of the Software, and to permit persons to whom the
  9. Software is furnished to do so, subject to the following conditions:
  10. The above copyright notice and this permission notice shall be included in
  11. all copies or substantial portions of the Software.
  12. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  13. IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  14. FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  15. AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  16. LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  17. FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  18. DEALINGS IN THE SOFTWARE.
  19. */
  20. module bio.bam.baseinfo;
  21. import bio.core.base;
  22. import bio.core.sequence;
  23. import bio.bam.read;
  24. import bio.bam.tagvalue;
  25. import bio.bam.iontorrent.flowcall;
  26. import bio.bam.md.core;
  27. import std.range;
  28. import std.conv;
  29. import std.traits;
  30. import std.typecons;
  31. import std.typetuple;
  32. ///
  33. enum Option
  34. {
  35. /// adds 'cigar_before' and 'cigar_after' properties
  36. cigarExtra,
  37. /// adds 'md_operation', 'md_operation_offset' properties
  38. mdCurrentOp,
  39. /// adds 'previous_md_operation' property
  40. mdPreviousOp,
  41. /// adds 'next_md_operation' property
  42. mdNextOp
  43. }
  44. ///
  45. struct MixinArg(T, string Tag) {
  46. T value;
  47. alias value this;
  48. alias Tag TagName;
  49. }
  50. /// Wrapper for arguments to $(D basesWith) function (see below).
  51. /// Required to distinguish to which tag each parameter refers.
  52. MixinArg!(T, Tag) arg(string Tag, T)(T value) {
  53. return MixinArg!(T, Tag)(value);
  54. }
  55. template staticFilter(alias P, T...)
  56. {
  57. static if (T.length == 0)
  58. alias TypeTuple!() staticFilter;
  59. else static if (P!(T[0]))
  60. alias TypeTuple!(T[0], staticFilter!(P, T[1..$])) staticFilter;
  61. else
  62. alias staticFilter!(P, T[1..$]) staticFilter;
  63. }
  64. template isTag(alias argument)
  65. {
  66. enum isTag = is(typeof(argument) == string);
  67. }
  68. template isOption(alias argument)
  69. {
  70. enum isOption = is(typeof(argument) == Option);
  71. }
  72. struct PerBaseInfo(R, TagsAndOptions...) {
  73. alias staticFilter!(isTag, TagsAndOptions) Tags;
  74. alias staticFilter!(isOption, TagsAndOptions) Options;
  75. private alias TypeTuple!("CIGAR", Tags) Extensions;
  76. // /////////////////////////////////////////////////////////////////////////
  77. //
  78. // Each 'extension' is a template with name TAGbaseInfo, containing
  79. // a couple of mixin templates:
  80. //
  81. // * resultProperties
  82. // These are additional properties provided by the template
  83. //
  84. // * rangeMethods
  85. // These describe how to proceed to the next base.
  86. // The following methods must be implemented:
  87. //
  88. // - void setup(Args...)(const ref R read, Args args);
  89. // Gets called during range construction. All constructor
  90. // arguments are forwarded, and it's this function which
  91. // is responsible for getting required parameters for this
  92. // particular template.
  93. //
  94. // - void populate(Result)(ref Result r);
  95. // Populates fields of the result declared in resultProperties.
  96. // Should run in O(1), just copying a few variables.
  97. // Current base of the result is updated before the call.
  98. //
  99. // - void update(const ref R read);
  100. // Encapsulates logic of moving to the next base and updating
  101. // mixin variables correspondingly.
  102. //
  103. // - void copy(Range)(const ref Range source, ref Range target);
  104. // Gets called during $(D source.save). Therefore, any ranges
  105. // used in mixin templates must be saved as well at that time.
  106. //
  107. // /////////////////////////////////////////////////////////////////////////
  108. private static string getResultProperties(Exts...)() {
  109. char[] result;
  110. foreach (ext; Exts)
  111. result ~= "mixin " ~ ext ~ "baseInfo!(R, Options).resultProperties;".dup;
  112. return cast(string)result;
  113. }
  114. static struct Result {
  115. /// Actual read base, with strand taken into account.
  116. Base base;
  117. alias base this;
  118. string opCast(T)() if (is(T == string))
  119. {
  120. return to!string(base);
  121. }
  122. bool opEquals(T)(T base) const
  123. if (is(Unqual!T == Base))
  124. {
  125. return this.base == base;
  126. }
  127. bool opEquals(T)(T result) const
  128. if (is(Unqual!T == Result))
  129. {
  130. return this == result;
  131. }
  132. bool opEquals(T)(T base) const
  133. if (is(Unqual!T == char) || is(Unqual!T == dchar))
  134. {
  135. return this.base == base;
  136. }
  137. mixin(getResultProperties!Extensions());
  138. }
  139. private static string getRangeMethods(Exts...)() {
  140. char[] result;
  141. foreach (ext; Exts)
  142. result ~= "mixin " ~ ext ~ "baseInfo!(R, Options).rangeMethods " ~ ext ~ ";".dup;
  143. return cast(string)result;
  144. }
  145. mixin(getRangeMethods!Extensions());
  146. private void setup(string tag, Args...)(R read, Args args) {
  147. mixin(tag ~ ".setup(read, args);");
  148. }
  149. private void populate(string tag)(ref Result r) {
  150. mixin(tag ~ ".populate(r);");
  151. }
  152. private void update(string tag)() {
  153. mixin(tag ~ ".update(_read);");
  154. }
  155. private void copy(string tag)(ref typeof(this) other) {
  156. mixin(tag ~ ".copy(this, other);");
  157. }
  158. this(Args...)(R read, Args args) {
  159. _read = read;
  160. _rev = read.is_reverse_strand;
  161. _seq = reversableRange!complementBase(read.sequence, _rev);
  162. foreach (t; Extensions) {
  163. setup!t(read, args);
  164. }
  165. }
  166. bool empty() @property {
  167. return _seq.empty;
  168. }
  169. /// Allows to construct front element in-place, avoiding a copy.
  170. void constructFront(Result* addr)
  171. {
  172. addr.base = _seq.front;
  173. foreach (t; Extensions)
  174. populate!t(*addr);
  175. }
  176. Result front() @property {
  177. Result r = void;
  178. r.base = _seq.front;
  179. foreach (t; Extensions)
  180. populate!t(r);
  181. return r;
  182. }
  183. void popFront() {
  184. moveToNextBase();
  185. }
  186. PerBaseInfo save() @property {
  187. PerBaseInfo r = void;
  188. r._read = _read.dup;
  189. r._seq = _seq.save;
  190. r._rev = _rev;
  191. foreach (t; Extensions)
  192. copy!t(r);
  193. return r;
  194. }
  195. ref PerBaseInfo opAssign(PerBaseInfo other) {
  196. _read = other._read;
  197. _seq = other._seq.save;
  198. _rev = other._rev;
  199. foreach (t; Extensions)
  200. other.copy!t(this);
  201. return this;
  202. }
  203. private void moveToNextBase() {
  204. foreach (t; Extensions) {
  205. update!t();
  206. }
  207. _seq.popFront();
  208. }
  209. /// Returns true if the read is reverse strand,
  210. /// and false otherwise.
  211. bool reverse_strand() @property const {
  212. return _rev;
  213. }
  214. private {
  215. bool _rev = void;
  216. R _read = void;
  217. ReversableRange!(complementBase, typeof(_read.sequence)) _seq = void;
  218. }
  219. }
  220. ///
  221. /// Collect per-base information from available tags.
  222. /// Use $(D arg!TagName) to pass a parameter related to a particular tag.
  223. ///
  224. /// Example:
  225. ///
  226. /// basesWith!"FZ"(arg!"flowOrder"(flow_order), arg!"keySequence"(key_sequence));
  227. ///
  228. template basesWith(TagsAndOptions...) {
  229. auto basesWith(R, Args...)(R read, Args args) {
  230. return PerBaseInfo!(R, TagsAndOptions)(read, args);
  231. }
  232. }
  233. /// Provides additional property $(D reference_base)
  234. template MDbaseInfo(R, Options...) {
  235. mixin template resultProperties() {
  236. enum MdCurrentOp = staticIndexOf!(Option.mdCurrentOp, Options) != -1;
  237. enum MdPreviousOp = staticIndexOf!(Option.mdPreviousOp, Options) != -1;
  238. enum MdNextOp = staticIndexOf!(Option.mdNextOp, Options) != -1;
  239. /// If current CIGAR operation is reference consuming,
  240. /// returns reference base at this position, otherwise
  241. /// returns '-'.
  242. ///
  243. /// If read is on '-' strand, the result will be
  244. /// complementary base.
  245. char reference_base() @property const {
  246. return _ref_base;
  247. }
  248. private char _ref_base = void;
  249. static if (MdPreviousOp)
  250. {
  251. private Nullable!MdOperation _previous_md_operation = void;
  252. /// Previous MD operation
  253. Nullable!MdOperation previous_md_operation() @property {
  254. return _previous_md_operation;
  255. }
  256. }
  257. static if (MdCurrentOp)
  258. {
  259. private MdOperation _current_md_operation = void;
  260. private uint _current_md_operation_offset = void;
  261. /// Current MD operation
  262. MdOperation md_operation() @property {
  263. return _current_md_operation;
  264. }
  265. /// If current MD operation is match, returns how many bases
  266. /// have matched before the current base. Otherwise returns 0.
  267. uint md_operation_offset() @property const {
  268. return _current_md_operation_offset;
  269. }
  270. }
  271. static if (MdNextOp)
  272. {
  273. private Nullable!MdOperation _next_md_operation = void;
  274. /// Next MD operation
  275. Nullable!MdOperation next_md_operation() @property {
  276. return _next_md_operation;
  277. }
  278. }
  279. }
  280. mixin template rangeMethods() {
  281. enum MdCurrentOp = staticIndexOf!(Option.mdCurrentOp, Options) != -1;
  282. enum MdPreviousOp = staticIndexOf!(Option.mdPreviousOp, Options) != -1;
  283. enum MdNextOp = staticIndexOf!(Option.mdNextOp, Options) != -1;
  284. private {
  285. ReversableRange!(reverseMdOp, MdOperationRange) _md_ops = void;
  286. uint _match; // remaining length of current match operation
  287. MdOperation _md_front = void;
  288. static if (MdPreviousOp)
  289. {
  290. Nullable!MdOperation _previous_md_op;
  291. bool _md_front_is_initialized;
  292. }
  293. }
  294. private void updateMdFrontVariable()
  295. {
  296. static if (MdPreviousOp)
  297. {
  298. if (_md_front_is_initialized)
  299. _previous_md_op = _md_front;
  300. _md_front_is_initialized = true;
  301. }
  302. _md_front = _md_ops.front;
  303. _md_ops.popFront();
  304. }
  305. void setup(Args...)(const ref R read, Args args)
  306. {
  307. auto md = read["MD"];
  308. auto md_str = *(cast(string*)&md);
  309. _md_ops = reversableRange!reverseMdOp(mdOperations(md_str),
  310. read.is_reverse_strand);
  311. while (!_md_ops.empty)
  312. {
  313. updateMdFrontVariable();
  314. if (!_md_front.is_deletion) {
  315. if (_md_front.is_match) {
  316. _match = _md_front.match;
  317. }
  318. break;
  319. }
  320. }
  321. }
  322. void populate(Result)(ref Result result)
  323. {
  324. if (!current_cigar_operation.is_reference_consuming)
  325. {
  326. result._ref_base = '-';
  327. return;
  328. }
  329. MdOperation op = _md_front;
  330. if (op.is_mismatch)
  331. result._ref_base = op.mismatch.asCharacter;
  332. else if (op.is_match) {
  333. result._ref_base = result.base.asCharacter;
  334. }
  335. else assert(0);
  336. static if (MdPreviousOp)
  337. {
  338. if (_previous_md_op.isNull)
  339. result._previous_md_operation.nullify();
  340. else
  341. result._previous_md_operation = _previous_md_op.get;
  342. }
  343. static if (MdCurrentOp)
  344. {
  345. result._current_md_operation = op;
  346. result._current_md_operation_offset = _md_front.match - _match;
  347. }
  348. static if (MdNextOp)
  349. {
  350. if (_md_ops.empty)
  351. result._next_md_operation.nullify();
  352. else
  353. result._next_md_operation = _md_ops.front;
  354. }
  355. }
  356. void update(const ref R read)
  357. {
  358. if (!current_cigar_operation.is_reference_consuming)
  359. return;
  360. if (_md_front.is_mismatch)
  361. {
  362. if (_md_ops.empty)
  363. return;
  364. updateMdFrontVariable();
  365. }
  366. else if (_md_front.is_match)
  367. {
  368. --_match;
  369. if (_match == 0 && !_md_ops.empty) {
  370. updateMdFrontVariable();
  371. }
  372. }
  373. else assert(0);
  374. while (_md_front.is_deletion) {
  375. if (_md_ops.empty)
  376. return;
  377. updateMdFrontVariable();
  378. }
  379. if (_match == 0 && _md_front.is_match)
  380. _match = _md_front.match;
  381. }
  382. void copy(Range)(ref Range source, ref Range target)
  383. {
  384. target.MD._md_ops = source.MD._md_ops.save;
  385. target.MD._md_front = source.MD._md_front;
  386. static if (MdPreviousOp)
  387. {
  388. if (source.MD._previous_md_op.isNull)
  389. target.MD._previous_md_op.nullify();
  390. else
  391. target.MD._previous_md_op = source.MD._previous_md_op.get;
  392. target.MD._md_front_is_initialized = source.MD._md_front_is_initialized;
  393. }
  394. }
  395. }
  396. }
  397. /// Provides additional property $(D flow_call).
  398. template FZbaseInfo(R, Options...) {
  399. mixin template resultProperties() {
  400. /// Current flow call
  401. ReadFlowCall flow_call() @property const {
  402. return _flow_call;
  403. }
  404. private {
  405. ReadFlowCall _flow_call;
  406. }
  407. }
  408. mixin template rangeMethods() {
  409. private {
  410. ReadFlowCallRange!(BamRead.SequenceResult) _flow_calls = void;
  411. ReadFlowCall _current_flow_call = void;
  412. ushort _at = void;
  413. debug {
  414. string _read_name;
  415. }
  416. }
  417. void setup(Args...)(const ref R read, Args args)
  418. {
  419. string flow_order = void;
  420. string key_sequence = void;
  421. debug {
  422. _read_name = read.name.idup;
  423. }
  424. enum flowOrderExists = staticIndexOf!(MixinArg!(string, "flowOrder"), Args);
  425. enum keySequenceExists = staticIndexOf!(MixinArg!(string, "keySequence"), Args);
  426. static assert(flowOrderExists != -1, `Flow order must be provided via arg!"flowOrder"`);
  427. static assert(keySequenceExists != -1, `Flow order must be provided via arg!"keySequence"`);
  428. foreach (arg; args) {
  429. static if(is(typeof(arg) == MixinArg!(string, "flowOrder")))
  430. flow_order = arg;
  431. static if(is(typeof(arg) == MixinArg!(string, "keySequence")))
  432. key_sequence = arg;
  433. }
  434. _at = 0;
  435. _flow_calls = readFlowCalls(read, flow_order, key_sequence);
  436. if (!_flow_calls.empty) {
  437. _current_flow_call = _flow_calls.front;
  438. }
  439. }
  440. void populate(Result)(ref Result result) {
  441. result._flow_call = _current_flow_call;
  442. debug {
  443. if (result.base != result._flow_call.base) {
  444. import std.stdio;
  445. stderr.writeln("invalid flow call at ", _read_name, ": ", result.position);
  446. }
  447. }
  448. }
  449. void update(const ref R read)
  450. {
  451. ++_at;
  452. if (_at == _current_flow_call.length) {
  453. _flow_calls.popFront();
  454. if (!_flow_calls.empty) {
  455. _current_flow_call = _flow_calls.front;
  456. _at = 0;
  457. }
  458. }
  459. }
  460. void copy(Range)(ref Range source, ref Range target) {
  461. target.FZ._flow_calls = source._flow_calls.save();
  462. target.FZ._at = source.FZ._at;
  463. target.FZ._current_flow_call = source._current_flow_call;
  464. debug {
  465. target._read_name = _read_name;
  466. }
  467. }
  468. }
  469. }
  470. /// Retrieving flow signal intensities from ZM tags is also available.
  471. alias FZbaseInfo ZMbaseInfo;
  472. /// Provides additional properties
  473. /// * position
  474. /// * cigar_operation
  475. /// * cigar_operation_offset
  476. template CIGARbaseInfo(R, Options...) {
  477. mixin template resultProperties() {
  478. enum CigarExtraProperties = staticIndexOf!(Option.cigarExtra, Options) != -1;
  479. static if (CigarExtraProperties)
  480. {
  481. /// Current CIGAR operation
  482. CigarOperation cigar_operation() @property {
  483. return _cigar[_operation_index];
  484. }
  485. /// CIGAR operations before current one
  486. auto cigar_before() @property {
  487. return _cigar[0 .. _operation_index];
  488. }
  489. /// CIGAR operations after current one
  490. auto cigar_after() @property {
  491. return _cigar[_operation_index + 1 .. _cigar.length];
  492. }
  493. }
  494. else
  495. {
  496. /// Current CIGAR operation
  497. CigarOperation cigar_operation() @property const {
  498. return _current_cigar_op;
  499. }
  500. }
  501. /// Position of the corresponding base on the reference.
  502. /// If current CIGAR operation is not one of 'M', '=', 'X',
  503. /// returns the position of the previous mapped base.
  504. uint position() @property const {
  505. return _reference_position;
  506. }
  507. /// Offset in current CIGAR operation, starting from 0.
  508. uint cigar_operation_offset() @property const {
  509. return _cigar_operation_offset;
  510. }
  511. private {
  512. int _operation_index = void;
  513. uint _reference_position = void;
  514. uint _cigar_operation_offset = void;
  515. static if (CigarExtraProperties)
  516. {
  517. ReversableRange!(identity, const(CigarOperation)[]) _cigar = void;
  518. }
  519. else
  520. {
  521. CigarOperation _current_cigar_op;
  522. }
  523. }
  524. }
  525. mixin template rangeMethods() {
  526. enum CigarExtraProperties = staticIndexOf!(Option.cigarExtra, Options) != -1;
  527. private {
  528. CigarOperation _current_cigar_op = void;
  529. ulong _cur_cig_op_len = void;
  530. bool _cur_cig_op_is_ref_cons = void;
  531. int _index = void;
  532. uint _at = void;
  533. uint _ref_pos = void;
  534. ReversableRange!(identity, const(CigarOperation)[]) _cigar = void;
  535. }
  536. /// Current CIGAR operation, available to all extensions
  537. const(CigarOperation) current_cigar_operation() @property const {
  538. return _current_cigar_op;
  539. }
  540. void setup(Args...)(const ref R read, Args)
  541. {
  542. _cigar = reversableRange(read.cigar, read.is_reverse_strand);
  543. _index = -1;
  544. _ref_pos = reverse_strand ? (read.position + read.basesCovered() - 1)
  545. : read.position;
  546. _moveToNextCigarOperator();
  547. assert(_index >= 0);
  548. }
  549. void populate(Result)(ref Result result) {
  550. result._reference_position = _ref_pos;
  551. result._cigar_operation_offset = _at;
  552. static if (CigarExtraProperties)
  553. {
  554. result._cigar = _cigar;
  555. result._operation_index = _index;
  556. }
  557. else
  558. {
  559. result._current_cigar_op = _current_cigar_op;
  560. }
  561. }
  562. void update(const ref R read)
  563. {
  564. ++_at;
  565. if (_cur_cig_op_is_ref_cons) {
  566. _ref_pos += reverse_strand ? -1 : 1;
  567. }
  568. if (_at == _cur_cig_op_len) {
  569. _moveToNextCigarOperator();
  570. }
  571. }
  572. void copy(Range)(const ref Range source, ref Range target) {
  573. target.CIGAR._cigar = source.CIGAR._cigar;
  574. target.CIGAR._index = source.CIGAR._index;
  575. target.CIGAR._current_cigar_op = source.CIGAR._current_cigar_op;
  576. target.CIGAR._cur_cig_op_len = source.CIGAR._cur_cig_op_len;
  577. target.CIGAR._cur_cig_op_is_ref_cons = source.CIGAR._cur_cig_op_is_ref_cons;
  578. target.CIGAR._at = source.CIGAR._at;
  579. target.CIGAR._ref_pos = source.CIGAR._ref_pos;
  580. }
  581. private void _moveToNextCigarOperator() {
  582. _at = 0;
  583. for (++_index; _index < _cigar.length; ++_index)
  584. {
  585. _current_cigar_op = _cigar[_index];
  586. _cur_cig_op_is_ref_cons = _current_cigar_op.is_reference_consuming;
  587. _cur_cig_op_len = _current_cigar_op.length;
  588. if (_current_cigar_op.is_query_consuming)
  589. break;
  590. if (_cur_cig_op_is_ref_cons)
  591. {
  592. if (reverse_strand)
  593. _ref_pos -= _cur_cig_op_len;
  594. else
  595. _ref_pos += _cur_cig_op_len;
  596. }
  597. }
  598. }
  599. }
  600. }