From 679a1af832ad9585c7cf72996043edb08e1b0d10 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Mon, 6 Sep 2021 08:06:14 +0300 Subject: Leave "Chr" value as string when parsing Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * The "Chr" value seems to be mostly a name of some sort, despite it being, seemingly an number. This commit parses the "Chr" value as a string. It also updates the tests to expec a string, rather than a number for "Chr" values. --- gn3/computations/qtlreaper.py | 5 +++-- tests/unit/computations/test_qtlreaper.py | 20 ++++++++++---------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py index eff2a80..9b20309 100644 --- a/gn3/computations/qtlreaper.py +++ b/gn3/computations/qtlreaper.py @@ -94,7 +94,7 @@ def parse_reaper_main_results(results_file): with open(results_file, "r") as infile: lines = infile.readlines() - def __parse_column_value(value): + def __parse_column_float_value(value): try: return float(value) except: @@ -102,7 +102,8 @@ def parse_reaper_main_results(results_file): def __parse_line(line): items = line.strip().split("\t") - return items[0:2] + [__parse_column_value(item) for item in items[2:]] + return items[0:3] + [ + __parse_column_float_value(item) for item in items[3:]] header = lines[0].strip().split("\t") return [dict(zip(header, __parse_line(line))) for line in lines[1:]] diff --git a/tests/unit/computations/test_qtlreaper.py b/tests/unit/computations/test_qtlreaper.py index 6c3b64d..fd3434a 100644 --- a/tests/unit/computations/test_qtlreaper.py +++ b/tests/unit/computations/test_qtlreaper.py @@ -13,52 +13,52 @@ class TestQTLReaper(TestCase): "tests/unit/computations/data/qtlreaper/main_output_sample.txt"), [ { - "ID": "T1", "Locus": "rs31443144", "Chr": 1, "cM": 1.500, + "ID": "T1", "Locus": "rs31443144", "Chr": "1", "cM": 1.500, "Mb": 3.010, "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 }, { - "ID": "T1", "Locus": "rs6269442", "Chr": 1, "cM": 1.500, + "ID": "T1", "Locus": "rs6269442", "Chr": "1", "cM": 1.500, "Mb": 3.492, "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 }, { - "ID": "T1", "Locus": "rs32285189", "Chr": 1, "cM": 1.630, + "ID": "T1", "Locus": "rs32285189", "Chr": "1", "cM": 1.630, "Mb": 3.511, "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 }, { - "ID": "T1", "Locus": "rs258367496", "Chr": 1, "cM": 1.630, + "ID": "T1", "Locus": "rs258367496", "Chr": "1", "cM": 1.630, "Mb": 3.660, "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 }, { - "ID": "T1", "Locus": "rs32430919", "Chr": 1, "cM": 1.750, + "ID": "T1", "Locus": "rs32430919", "Chr": "1", "cM": 1.750, "Mb": 3.777, "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 }, { - "ID": "T1", "Locus": "rs36251697", "Chr": 1, "cM": 1.880, + "ID": "T1", "Locus": "rs36251697", "Chr": "1", "cM": 1.880, "Mb": 3.812, "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 }, { - "ID": "T1", "Locus": "rs30658298", "Chr": 1, "cM": 2.010, + "ID": "T1", "Locus": "rs30658298", "Chr": "1", "cM": 2.010, "Mb": 4.431, "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 }, { - "ID": "T1", "Locus": "rs51852623", "Chr": 1, "cM": 2.010, + "ID": "T1", "Locus": "rs51852623", "Chr": "1", "cM": 2.010, "Mb": 4.447, "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 }, { - "ID": "T1", "Locus": "rs31879829", "Chr": 1, "cM": 2.140, + "ID": "T1", "Locus": "rs31879829", "Chr": "1", "cM": 2.140, "Mb": 4.519, "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 }, { - "ID": "T1", "Locus": "rs36742481", "Chr": 1, "cM": 2.140, + "ID": "T1", "Locus": "rs36742481", "Chr": "1", "cM": 2.140, "Mb": 4.776, "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 } -- cgit v1.2.3