You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

82 lines
2.4 KiB

10 years ago
  1. /*
  2. This file is part of BioD.
  3. Copyright (C) 2012 Artem Tarasov <lomereiter@gmail.com>
  4. BioD is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. BioD is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  15. */
  16. module bio.core.region;
  17. %%{
  18. machine region_parser;
  19. action init_integer { uint_value = 0; }
  20. action consume_next_digit { if (fc != ',') uint_value *= 10, uint_value += fc - '0'; }
  21. integer = [,0-9]+ > init_integer @consume_next_digit ;
  22. action set_reference { region.reference = str[0 .. p - str.ptr]; }
  23. action set_left_end { region.beg = to!uint(uint_value - 1); }
  24. action set_right_end { region.end = to!uint(uint_value); }
  25. reference = ([!-()+-<>-~] [!-~]*) % set_reference ;
  26. reference_and_left_end = reference :> ':' integer % set_left_end ;
  27. reference_and_both_ends = reference_and_left_end '-' integer % set_right_end ;
  28. region := (reference @ 0) | (reference_and_left_end @ 1) | (reference_and_both_ends @ 1);
  29. write data;
  30. }%%
  31. import std.conv;
  32. struct Region {
  33. string reference;
  34. uint beg;
  35. uint end;
  36. }
  37. Region parseRegion(string str) {
  38. char* p = cast(char*)str.ptr;
  39. char* pe = p + str.length;
  40. char* eof = pe;
  41. int cs;
  42. long uint_value;
  43. Region region;
  44. region.beg = 0;
  45. region.end = uint.max;
  46. %%write init;
  47. %%write exec;
  48. return region;
  49. }
  50. unittest {
  51. auto region1 = parseRegion("chr1:1,000-2000");
  52. assert(region1.reference == "chr1");
  53. assert(region1.beg == 999);
  54. assert(region1.end == 2000);
  55. auto region2 = parseRegion("chr2");
  56. assert(region2.reference == "chr2");
  57. assert(region2.beg == 0);
  58. assert(region2.end == uint.max);
  59. auto region3 = parseRegion("chr3:1,000,000");
  60. assert(region3.reference == "chr3");
  61. assert(region3.beg == 999_999);
  62. assert(region3.end == uint.max);
  63. }