aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMuriithi Frederick Muriuki2021-08-30 11:44:37 +0300
committerMuriithi Frederick Muriuki2021-08-30 11:44:37 +0300
commit58f59b8f7df82969b58a604070aec095d17e0501 (patch)
treea428d08e0a5eefe8d608fe74fc04f30f3952c362
parentbb1fd69fa24cec4ff605450d241601b3f0ced8cb (diff)
downloadgenenetwork3-58f59b8f7df82969b58a604070aec095d17e0501.tar.gz
Fix issues with traits file format
* README.md: update header: Traits ==> Trait * gn3/computations/qtlreaper.py: update header: Traits ==> Trait * qtlfilesexport.py: Choose only BXD strains Rename the first column header from "Traits" to "Trait" to correspond with what `rust-qtlreaper` expects. Choose only the BXD strains for the proof-of-concept example - this helped bring out the fact that the traits file SHOULD NOT contain a strain column for a strain that does not exist in the genotype file in consideration. If the traits file has a strain column which does not exist in the genotype file, then `rust-qtlreaper` fails with a panic, since, from what I can tell, it tries to get a value from the genotype file for the non-existent strain, which results to a `None` type. Subsequent attempts at running an operation on the `None` type lead to the panic.
-rw-r--r--README.md4
-rw-r--r--gn3/computations/qtlreaper.py2
-rw-r--r--qtlfilesexport.py31
3 files changed, 34 insertions, 3 deletions
diff --git a/README.md b/README.md
index 0e0e509..b54015f 100644
--- a/README.md
+++ b/README.md
@@ -136,8 +136,10 @@ Under the **"Trait"** column, the traits are numbered from **T1** to **T<n>** wh
As an example, you could end up with a trait file like the following:
```txt
-Traits BXD27 BXD32 DBA/2J BXD21 ...
+Trait BXD27 BXD32 DBA/2J BXD21 ...
T1 10.5735 9.27408 9.48255 9.18253 ...
T2 6.4471 6.7191 5.98015 6.68051 ...
...
```
+
+It is very important that the column header names for the strains correspond to the genotype file used.
diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index a88659e..9b13a55 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -24,7 +24,7 @@ def generate_traits_file(strains, trait_values, traits_filename):
traits_filename: The tab-separated value to put the values in for
computation of QTLs.
"""
- header = "Traits\t{}\n".format("\t".join(strains))
+ header = "Trait\t{}\n".format("\t".join(strains))
data = [header] + [
"T{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
for i, t in enumerate(trait_values[:-1])] + [
diff --git a/qtlfilesexport.py b/qtlfilesexport.py
index 0543dc9..adc5e77 100644
--- a/qtlfilesexport.py
+++ b/qtlfilesexport.py
@@ -41,7 +41,36 @@ def main():
retrieve_trait_info(threshold, fullname, conn)
for fullname in trait_fullnames()]
traits_data_list = [retrieve_trait_data(t, conn) for t in traits]
- strains = list(set([k for td in traits_data_list for k in td["data"].keys()]))
+ # strains = list(set([k for td in traits_data_list for k in td["data"].keys()]))
+ strains = [# Use only the strains in the BXD.geno genotype file
+ "BXD1", "BXD2", "BXD5", "BXD6", "BXD8", "BXD9", "BXD11", "BXD12",
+ "BXD13", "BXD14", "BXD15", "BXD16", "BXD18", "BXD19", "BXD20", "BXD21",
+ "BXD22", "BXD23", "BXD24", "BXD24a", "BXD25", "BXD27", "BXD28", "BXD29",
+ "BXD30", "BXD31", "BXD32", "BXD33", "BXD34", "BXD35", "BXD36", "BXD37",
+ "BXD38", "BXD39", "BXD40", "BXD41", "BXD42", "BXD43", "BXD44", "BXD45",
+ "BXD48", "BXD48a", "BXD49", "BXD50", "BXD51", "BXD52", "BXD53", "BXD54",
+ "BXD55", "BXD56", "BXD59", "BXD60", "BXD61", "BXD62", "BXD63", "BXD64",
+ "BXD65", "BXD65a", "BXD65b", "BXD66", "BXD67", "BXD68", "BXD69",
+ "BXD70", "BXD71", "BXD72", "BXD73", "BXD73a", "BXD73b", "BXD74",
+ "BXD75", "BXD76", "BXD77", "BXD78", "BXD79", "BXD81", "BXD83", "BXD84",
+ "BXD85", "BXD86", "BXD87", "BXD88", "BXD89", "BXD90", "BXD91", "BXD93",
+ "BXD94", "BXD95", "BXD98", "BXD99", "BXD100", "BXD101", "BXD102",
+ "BXD104", "BXD105", "BXD106", "BXD107", "BXD108", "BXD109", "BXD110",
+ "BXD111", "BXD112", "BXD113", "BXD114", "BXD115", "BXD116", "BXD117",
+ "BXD119", "BXD120", "BXD121", "BXD122", "BXD123", "BXD124", "BXD125",
+ "BXD126", "BXD127", "BXD128", "BXD128a", "BXD130", "BXD131", "BXD132",
+ "BXD133", "BXD134", "BXD135", "BXD136", "BXD137", "BXD138", "BXD139",
+ "BXD141", "BXD142", "BXD144", "BXD145", "BXD146", "BXD147", "BXD148",
+ "BXD149", "BXD150", "BXD151", "BXD152", "BXD153", "BXD154", "BXD155",
+ "BXD156", "BXD157", "BXD160", "BXD161", "BXD162", "BXD165", "BXD168",
+ "BXD169", "BXD170", "BXD171", "BXD172", "BXD173", "BXD174", "BXD175",
+ "BXD176", "BXD177", "BXD178", "BXD180", "BXD181", "BXD183", "BXD184",
+ "BXD186", "BXD187", "BXD188", "BXD189", "BXD190", "BXD191", "BXD192",
+ "BXD193", "BXD194", "BXD195", "BXD196", "BXD197", "BXD198", "BXD199",
+ "BXD200", "BXD201", "BXD202", "BXD203", "BXD204", "BXD205", "BXD206",
+ "BXD207", "BXD208", "BXD209", "BXD210", "BXD211", "BXD212", "BXD213",
+ "BXD214", "BXD215", "BXD216", "BXD217", "BXD218", "BXD219", "BXD220"
+ ]
exported_traits_data_list = [
export_trait_data(td, strains) for td in traits_data_list]
slinked = slink(cluster_traits(exported_traits_data_list))