From 58f59b8f7df82969b58a604070aec095d17e0501 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Mon, 30 Aug 2021 11:44:37 +0300 Subject: Fix issues with traits file format * README.md: update header: Traits ==> Trait * gn3/computations/qtlreaper.py: update header: Traits ==> Trait * qtlfilesexport.py: Choose only BXD strains Rename the first column header from "Traits" to "Trait" to correspond with what `rust-qtlreaper` expects. Choose only the BXD strains for the proof-of-concept example - this helped bring out the fact that the traits file SHOULD NOT contain a strain column for a strain that does not exist in the genotype file in consideration. If the traits file has a strain column which does not exist in the genotype file, then `rust-qtlreaper` fails with a panic, since, from what I can tell, it tries to get a value from the genotype file for the non-existent strain, which results to a `None` type. Subsequent attempts at running an operation on the `None` type lead to the panic. --- README.md | 4 +++- gn3/computations/qtlreaper.py | 2 +- qtlfilesexport.py | 31 ++++++++++++++++++++++++++++++- 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 0e0e509..b54015f 100644 --- a/README.md +++ b/README.md @@ -136,8 +136,10 @@ Under the **"Trait"** column, the traits are numbered from **T1** to **T** wh As an example, you could end up with a trait file like the following: ```txt -Traits BXD27 BXD32 DBA/2J BXD21 ... +Trait BXD27 BXD32 DBA/2J BXD21 ... T1 10.5735 9.27408 9.48255 9.18253 ... T2 6.4471 6.7191 5.98015 6.68051 ... ... ``` + +It is very important that the column header names for the strains correspond to the genotype file used. diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py index a88659e..9b13a55 100644 --- a/gn3/computations/qtlreaper.py +++ b/gn3/computations/qtlreaper.py @@ -24,7 +24,7 @@ def generate_traits_file(strains, trait_values, traits_filename): traits_filename: The tab-separated value to put the values in for computation of QTLs. """ - header = "Traits\t{}\n".format("\t".join(strains)) + header = "Trait\t{}\n".format("\t".join(strains)) data = [header] + [ "T{}\t{}\n".format(i+1, "\t".join([str(i) for i in t])) for i, t in enumerate(trait_values[:-1])] + [ diff --git a/qtlfilesexport.py b/qtlfilesexport.py index 0543dc9..adc5e77 100644 --- a/qtlfilesexport.py +++ b/qtlfilesexport.py @@ -41,7 +41,36 @@ def main(): retrieve_trait_info(threshold, fullname, conn) for fullname in trait_fullnames()] traits_data_list = [retrieve_trait_data(t, conn) for t in traits] - strains = list(set([k for td in traits_data_list for k in td["data"].keys()])) + # strains = list(set([k for td in traits_data_list for k in td["data"].keys()])) + strains = [# Use only the strains in the BXD.geno genotype file + "BXD1", "BXD2", "BXD5", "BXD6", "BXD8", "BXD9", "BXD11", "BXD12", + "BXD13", "BXD14", "BXD15", "BXD16", "BXD18", "BXD19", "BXD20", "BXD21", + "BXD22", "BXD23", "BXD24", "BXD24a", "BXD25", "BXD27", "BXD28", "BXD29", + "BXD30", "BXD31", "BXD32", "BXD33", "BXD34", "BXD35", "BXD36", "BXD37", + "BXD38", "BXD39", "BXD40", "BXD41", "BXD42", "BXD43", "BXD44", "BXD45", + "BXD48", "BXD48a", "BXD49", "BXD50", "BXD51", "BXD52", "BXD53", "BXD54", + "BXD55", "BXD56", "BXD59", "BXD60", "BXD61", "BXD62", "BXD63", "BXD64", + "BXD65", "BXD65a", "BXD65b", "BXD66", "BXD67", "BXD68", "BXD69", + "BXD70", "BXD71", "BXD72", "BXD73", "BXD73a", "BXD73b", "BXD74", + "BXD75", "BXD76", "BXD77", "BXD78", "BXD79", "BXD81", "BXD83", "BXD84", + "BXD85", "BXD86", "BXD87", "BXD88", "BXD89", "BXD90", "BXD91", "BXD93", + "BXD94", "BXD95", "BXD98", "BXD99", "BXD100", "BXD101", "BXD102", + "BXD104", "BXD105", "BXD106", "BXD107", "BXD108", "BXD109", "BXD110", + "BXD111", "BXD112", "BXD113", "BXD114", "BXD115", "BXD116", "BXD117", + "BXD119", "BXD120", "BXD121", "BXD122", "BXD123", "BXD124", "BXD125", + "BXD126", "BXD127", "BXD128", "BXD128a", "BXD130", "BXD131", "BXD132", + "BXD133", "BXD134", "BXD135", "BXD136", "BXD137", "BXD138", "BXD139", + "BXD141", "BXD142", "BXD144", "BXD145", "BXD146", "BXD147", "BXD148", + "BXD149", "BXD150", "BXD151", "BXD152", "BXD153", "BXD154", "BXD155", + "BXD156", "BXD157", "BXD160", "BXD161", "BXD162", "BXD165", "BXD168", + "BXD169", "BXD170", "BXD171", "BXD172", "BXD173", "BXD174", "BXD175", + "BXD176", "BXD177", "BXD178", "BXD180", "BXD181", "BXD183", "BXD184", + "BXD186", "BXD187", "BXD188", "BXD189", "BXD190", "BXD191", "BXD192", + "BXD193", "BXD194", "BXD195", "BXD196", "BXD197", "BXD198", "BXD199", + "BXD200", "BXD201", "BXD202", "BXD203", "BXD204", "BXD205", "BXD206", + "BXD207", "BXD208", "BXD209", "BXD210", "BXD211", "BXD212", "BXD213", + "BXD214", "BXD215", "BXD216", "BXD217", "BXD218", "BXD219", "BXD220" + ] exported_traits_data_list = [ export_trait_data(td, strains) for td in traits_data_list] slinked = slink(cluster_traits(exported_traits_data_list)) -- cgit v1.2.3