Add function for mapping strain to sample pos + begin creating generate_new_genofiles function

author: zsloan 2022-03-09 19:41:55 +0000
committer: zsloan 2022-03-16 14:41:09 -0500
commit: 27530d5a59bded06f644e4704ef21cb6da491350 (patch)
tree: 5afd553ea36787f69dc21c05804f987bf938f764 /wqflask/maintenance/gen_ind_genofiles.py
parent: 743a4623c53d30779cb884a69d0cf2c7ff411f0a (diff)
download: genenetwork2-27530d5a59bded06f644e4704ef21cb6da491350.tar.gz
1 files changed, 28 insertions, 1 deletions
diff --git a/wqflask/maintenance/gen_ind_genofiles.py b/wqflask/maintenance/gen_ind_genofiles.py
index 6e818945..b91660a4 100644
--- a/wqflask/maintenance/gen_ind_genofiles.py
+++ b/wqflask/maintenance/gen_ind_genofiles.py
@@ -28,7 +28,7 @@ def main(args):
         target_groups = [args[2]]
 
     # Generate the output .geno files
-    generate_new_genofiles(strain_genotypes(source_genofile), target_groups)
+    generate_new_genofiles(source_genofile, strain_genotypes(source_genofile), target_groups)
 
 def get_strain_for_sample(sample):
     query = (
@@ -41,6 +41,33 @@ def get_strain_for_sample(sample):
     with conn.cursor() as cursor:
         return cursor.execute(query, {"name": name}).fetchone()[0]
 
+def generate_new_genofiles(source_genofile, strain_genotypes, target_groups):
+    for group in target_groups:
+        base_samples = group_samples(source_genofile)
+        target_samples = group_samples(group)
+        strain_pos_map = map_strain_pos_to_target_group(base_samples, target_samples)
+
+        new_genofile = app.config.get("GENENETWORK_FILES") + "/genotype/_" + group
+
+
+def map_strain_pos_to_target_group(base_samples, target_samples):
+    """
+    Retrieve corresponding strain position for each sample in the target group
+
+    This is so the genotypes from the base genofile can be mapped to the samples in the target group
+
+    For example:
+    Base strains: BXD1, BXD2, BXD3
+    Target samples: BXD1_1, BXD1_2, BXD2_1, BXD3_1, BXD3_2, BXD3_3
+    Returns: [0, 0, 1, 2, 2, 2]
+    """
+    pos_map = []
+    for i, sample in enumerate(target_samples):
+        sample_strain = get_strain_for_sample(sample)
+        pos_map.append(base_samples.index(sample_strain))
+
+    return pos_map
+
 def group_samples(target_group: str) -> List:
     """
     Get the group samples from its "dummy" .geno file (which still contains the sample list)
author	zsloan	2022-03-09 19:41:55 +0000
committer	zsloan	2022-03-16 14:41:09 -0500
commit	27530d5a59bded06f644e4704ef21cb6da491350 (patch)
tree	5afd553ea36787f69dc21c05804f987bf938f764 /wqflask/maintenance/gen_ind_genofiles.py
parent	743a4623c53d30779cb884a69d0cf2c7ff411f0a (diff)
download	genenetwork2-27530d5a59bded06f644e4704ef21cb6da491350.tar.gz