diff options
author | Alexander_Kabui | 2024-01-02 13:21:07 +0300 |
---|---|---|
committer | Alexander_Kabui | 2024-01-02 13:21:07 +0300 |
commit | 70c4201b332e0e2c0d958428086512f291469b87 (patch) | |
tree | aea4fac8782c110fc233c589c3f0f7bd34bada6c /wqflask/utility/genofile_parser.py | |
parent | 5092eb42f062b1695c4e39619f0bd74a876cfac2 (diff) | |
parent | 965ce5114d585624d5edb082c710b83d83a3be40 (diff) | |
download | genenetwork2-70c4201b332e0e2c0d958428086512f291469b87.tar.gz |
merge changes
Diffstat (limited to 'wqflask/utility/genofile_parser.py')
-rw-r--r-- | wqflask/utility/genofile_parser.py | 100 |
1 files changed, 0 insertions, 100 deletions
diff --git a/wqflask/utility/genofile_parser.py b/wqflask/utility/genofile_parser.py deleted file mode 100644 index 86d9823e..00000000 --- a/wqflask/utility/genofile_parser.py +++ /dev/null @@ -1,100 +0,0 @@ -# CTL analysis for GN2 -# Author / Maintainer: Danny Arends <Danny.Arends@gmail.com> - -import sys -import os -import glob -import traceback -import gzip - - -import simplejson as json - -from pprint import pformat as pf - - -class Marker: - def __init__(self): - self.name = None - self.chr = None - self.cM = None - self.Mb = None - self.genotypes = [] - - -class ConvertGenoFile: - - def __init__(self, input_file): - self.mb_exists = False - self.cm_exists = False - self.markers = [] - - self.latest_row_pos = None - self.latest_col_pos = None - - self.latest_row_value = None - self.latest_col_value = None - self.input_fh = open(input_file) - print("!!!!!!!!!!!!!!!!PARSER!!!!!!!!!!!!!!!!!!") - self.haplotype_notation = { - '@mat': "1", - '@pat': "2", - '@het': "-999", - '@unk': "-999" - } - self.configurations = {} - - def process_rows(self): - for self.latest_row_pos, row in enumerate(self.input_fh): - self.latest_row_value = row - # Take care of headers - if not row.strip(): - continue - if row.startswith('#'): - continue - if row.startswith('Chr'): - if 'Mb' in row.split(): - self.mb_exists = True - if 'cM' in row.split(): - self.cm_exists = True - skip = 2 + self.cm_exists + self.mb_exists - self.individuals = row.split()[skip:] - continue - if row.startswith('@'): - key, _separater, value = row.partition(':') - key = key.strip() - value = value.strip() - if key in self.haplotype_notation: - self.configurations[value] = self.haplotype_notation[key] - continue - if not len(self.configurations): - raise EmptyConfigurations - yield row - - def process_csv(self): - for row in self.process_rows(): - row_items = row.split("\t") - - this_marker = Marker() - this_marker.name = row_items[1] - this_marker.chr = row_items[0] - if self.cm_exists and self.mb_exists: - this_marker.cM = row_items[2] - this_marker.Mb = row_items[3] - genotypes = row_items[4:] - elif self.cm_exists: - this_marker.cM = row_items[2] - genotypes = row_items[3:] - elif self.mb_exists: - this_marker.Mb = row_items[2] - genotypes = row_items[3:] - else: - genotypes = row_items[2:] - for item_count, genotype in enumerate(genotypes): - if genotype.upper().strip() in self.configurations: - this_marker.genotypes.append( - self.configurations[genotype.upper().strip()]) - else: - print("WARNING:", genotype.upper()) - this_marker.genotypes.append("NA") - self.markers.append(this_marker.__dict__) |