aboutsummaryrefslogtreecommitdiff
path: root/wqflask/maintenance/gen_ind_genofiles.py
diff options
context:
space:
mode:
authorzsloan2022-03-09 19:41:55 +0000
committerzsloan2022-03-16 14:41:09 -0500
commit27530d5a59bded06f644e4704ef21cb6da491350 (patch)
tree5afd553ea36787f69dc21c05804f987bf938f764 /wqflask/maintenance/gen_ind_genofiles.py
parent743a4623c53d30779cb884a69d0cf2c7ff411f0a (diff)
downloadgenenetwork2-27530d5a59bded06f644e4704ef21cb6da491350.tar.gz
Add function for mapping strain to sample pos + begin creating generate_new_genofiles function
Diffstat (limited to 'wqflask/maintenance/gen_ind_genofiles.py')
-rw-r--r--wqflask/maintenance/gen_ind_genofiles.py29
1 files changed, 28 insertions, 1 deletions
diff --git a/wqflask/maintenance/gen_ind_genofiles.py b/wqflask/maintenance/gen_ind_genofiles.py
index 6e818945..b91660a4 100644
--- a/wqflask/maintenance/gen_ind_genofiles.py
+++ b/wqflask/maintenance/gen_ind_genofiles.py
@@ -28,7 +28,7 @@ def main(args):
target_groups = [args[2]]
# Generate the output .geno files
- generate_new_genofiles(strain_genotypes(source_genofile), target_groups)
+ generate_new_genofiles(source_genofile, strain_genotypes(source_genofile), target_groups)
def get_strain_for_sample(sample):
query = (
@@ -41,6 +41,33 @@ def get_strain_for_sample(sample):
with conn.cursor() as cursor:
return cursor.execute(query, {"name": name}).fetchone()[0]
+def generate_new_genofiles(source_genofile, strain_genotypes, target_groups):
+ for group in target_groups:
+ base_samples = group_samples(source_genofile)
+ target_samples = group_samples(group)
+ strain_pos_map = map_strain_pos_to_target_group(base_samples, target_samples)
+
+ new_genofile = app.config.get("GENENETWORK_FILES") + "/genotype/_" + group
+
+
+def map_strain_pos_to_target_group(base_samples, target_samples):
+ """
+ Retrieve corresponding strain position for each sample in the target group
+
+ This is so the genotypes from the base genofile can be mapped to the samples in the target group
+
+ For example:
+ Base strains: BXD1, BXD2, BXD3
+ Target samples: BXD1_1, BXD1_2, BXD2_1, BXD3_1, BXD3_2, BXD3_3
+ Returns: [0, 0, 1, 2, 2, 2]
+ """
+ pos_map = []
+ for i, sample in enumerate(target_samples):
+ sample_strain = get_strain_for_sample(sample)
+ pos_map.append(base_samples.index(sample_strain))
+
+ return pos_map
+
def group_samples(target_group: str) -> List:
"""
Get the group samples from its "dummy" .geno file (which still contains the sample list)