From 4a7e2c1602ed82aabd7d04953067ba49cb1cebff Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Thu, 10 Mar 2022 08:55:26 +0300 Subject: Use context manager with database connection Use the `with` context manager with database connections and cursors to ensure that they are closed once they are no longer needed. Where it was not feasible to use the `with` context manager without a huge refactor/rewrite, the cursors and connections are closed manually. --- wqflask/maintenance/gen_select_dataset.py | 36 +++++++++------------- .../maintenance/generate_probesetfreeze_file.py | 26 ++++++---------- wqflask/maintenance/quantile_normalize.py | 27 ++++++++-------- wqflask/maintenance/set_resource_defaults.py | 35 ++++++++++----------- 4 files changed, 53 insertions(+), 71 deletions(-) (limited to 'wqflask/maintenance') diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py index db65a11f..9f4b670d 100644 --- a/wqflask/maintenance/gen_select_dataset.py +++ b/wqflask/maintenance/gen_select_dataset.py @@ -39,21 +39,13 @@ from wqflask import app from utility.tools import locate, locate_ignore_error, TEMPDIR, SQL_URI -import MySQLdb - import simplejson as json import urllib.parse -#import sqlalchemy as sa - from pprint import pformat as pf -#Engine = sa.create_engine(zach_settings.SQL_URI) - -# build MySql database connection - -#conn = Engine.connect() +from wqflask.database import database_connection def parse_db_uri(): @@ -71,19 +63,19 @@ def parse_db_uri(): return db_conn_info -def get_species(): +def get_species(cursor): """Build species list""" - #Cursor.execute("select Name, MenuName from Species where Species.Name != 'macaque monkey' order by OrderId") - Cursor.execute("select Name, MenuName from Species order by OrderId") - species = list(Cursor.fetchall()) + #cursor.execute("select Name, MenuName from Species where Species.Name != 'macaque monkey' order by OrderId") + cursor.execute("select Name, MenuName from Species order by OrderId") + species = list(cursor.fetchall()) return species -def get_groups(species): +def get_groups(cursor, species): """Build groups list""" groups = {} for species_name, _species_full_name in species: - Cursor.execute("""select InbredSet.Name, InbredSet.FullName from InbredSet, + cursor.execute("""select InbredSet.Name, InbredSet.FullName from InbredSet, Species, ProbeFreeze, GenoFreeze, PublishFreeze where Species.Name = '%s' and InbredSet.SpeciesId = Species.Id and @@ -92,7 +84,7 @@ def get_groups(species): or ProbeFreeze.InbredSetId = InbredSet.Id) group by InbredSet.Name order by InbredSet.FullName""" % species_name) - results = Cursor.fetchall() + results = cursor.fetchall() groups[species_name] = list(results) return groups @@ -273,13 +265,13 @@ def build_datasets(species, group, type_name): return datasets -def main(): +def main(cursor): """Generates and outputs (as json file) the data for the main dropdown menus on the home page""" parse_db_uri() - species = get_species() - groups = get_groups(species) + species = get_species(cursor) + groups = get_groups(cursor, species) types = get_types(groups) datasets = get_datasets(types) @@ -316,6 +308,6 @@ def _test_it(): if __name__ == '__main__': - Conn = MySQLdb.Connect(**parse_db_uri()) - Cursor = Conn.cursor() - main() + with database_connection() as conn: + with conn.cursor() as cursor: + main(cursor) diff --git a/wqflask/maintenance/generate_probesetfreeze_file.py b/wqflask/maintenance/generate_probesetfreeze_file.py index e964c8ed..f43f952b 100644 --- a/wqflask/maintenance/generate_probesetfreeze_file.py +++ b/wqflask/maintenance/generate_probesetfreeze_file.py @@ -8,20 +8,11 @@ import os import collections import csv -import MySQLdb - from base import webqtlConfig from pprint import pformat as pf - -def get_cursor(): - con = MySQLdb.Connect(db=webqtlConfig.DB_UPDNAME, - host=webqtlConfig.MYSQL_UPDSERVER, - user=webqtlConfig.DB_UPDUSER, - passwd=webqtlConfig.DB_UPDPASSWD) - cursor = con.cursor() - return cursor +from wqflask.database import database_connection def show_progress(process, counter): @@ -116,13 +107,14 @@ def main(): "(Oct08)_RankInv_Beta.txt") dataset_name = "Eye_AXBXA_1008_RankInv" - cursor = get_cursor() - strains = get_strains(cursor) - print("Getting probset_vals") - probeset_vals = get_probeset_vals(cursor, dataset_name) - print("Finished getting probeset_vals") - trimmed_strains = trim_strains(strains, probeset_vals) - write_data_matrix_file(trimmed_strains, probeset_vals, filename) + with database_connection as conn: + with conn.cursor() as cursor: + strains = get_strains(cursor) + print("Getting probset_vals") + probeset_vals = get_probeset_vals(cursor, dataset_name) + print("Finished getting probeset_vals") + trimmed_strains = trim_strains(strains, probeset_vals) + write_data_matrix_file(trimmed_strains, probeset_vals, filename) if __name__ == '__main__': diff --git a/wqflask/maintenance/quantile_normalize.py b/wqflask/maintenance/quantile_normalize.py index 32780ca6..2e2b0ec3 100644 --- a/wqflask/maintenance/quantile_normalize.py +++ b/wqflask/maintenance/quantile_normalize.py @@ -1,6 +1,5 @@ import sys sys.path.insert(0, './') -import MySQLdb import urllib.parse import numpy as np @@ -9,6 +8,7 @@ import pandas as pd from flask import Flask, g, request from wqflask import app +from wqflask.database import database_connection def parse_db_uri(): @@ -52,7 +52,7 @@ def quantileNormalize(df_input): return df -def set_data(dataset_name): +def set_data(cursor, dataset_name): orig_file = "/home/zas1024/cfw_data/" + dataset_name + ".txt" sample_list = [] @@ -80,8 +80,8 @@ def set_data(dataset_name): ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId and ProbeSetXRef.ProbeSetId = ProbeSet.Id and ProbeSet.Name = '%s'""" % (dataset_name, line1.split('\t')[0]) - Cursor.execute(query) - result_info = Cursor.fetchone() + cursor.execute(query) + result_info = cursor.fetchone() yield { "_index": "traits", @@ -99,15 +99,14 @@ def set_data(dataset_name): if __name__ == '__main__': - Conn = MySQLdb.Connect(**parse_db_uri()) - Cursor = Conn.cursor() + with database_connection as conn: + with conn.cursor as cursor: + success, _ = bulk(es, set_data(cursor, sys.argv[1])) - success, _ = bulk(es, set_data(sys.argv[1])) - - response = es.search( - index="traits", doc_type="trait", body={ - "query": {"match": {"name": "ENSMUSG00000028982"}} - } - ) + response = es.search( + index="traits", doc_type="trait", body={ + "query": {"match": {"name": "ENSMUSG00000028982"}} + } + ) - print(response) + print(response) diff --git a/wqflask/maintenance/set_resource_defaults.py b/wqflask/maintenance/set_resource_defaults.py index 0f472494..22d73ba3 100644 --- a/wqflask/maintenance/set_resource_defaults.py +++ b/wqflask/maintenance/set_resource_defaults.py @@ -30,10 +30,9 @@ from utility.tools import SQL_URI from utility.redis_tools import get_redis_conn, get_user_id, add_resource, get_resources, get_resource_info Redis = get_redis_conn() -import MySQLdb - import urllib.parse +from wqflask.database import database_connection from utility.logger import getLogger logger = getLogger(__name__) @@ -53,14 +52,14 @@ def parse_db_uri(): return db_conn_info -def insert_probeset_resources(default_owner_id): +def insert_probeset_resources(cursor, default_owner_id): current_resources = Redis.hgetall("resources") - Cursor.execute(""" SELECT + cursor.execute(""" SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.confidentiality, ProbeSetFreeze.public FROM ProbeSetFreeze""") - resource_results = Cursor.fetchall() + resource_results = cursor.fetchall() for i, resource in enumerate(resource_results): resource_ob = {} resource_ob['name'] = resource[1] @@ -80,9 +79,9 @@ def insert_probeset_resources(default_owner_id): add_resource(resource_ob, update=False) -def insert_publish_resources(default_owner_id): +def insert_publish_resources(cursor, default_owner_id): current_resources = Redis.hgetall("resources") - Cursor.execute(""" SELECT + cursor.execute(""" SELECT PublishXRef.Id, PublishFreeze.Id, InbredSet.InbredSetCode FROM PublishXRef, PublishFreeze, InbredSet, Publication @@ -91,7 +90,7 @@ def insert_publish_resources(default_owner_id): InbredSet.Id = PublishXRef.InbredSetId AND Publication.Id = PublishXRef.PublicationId""") - resource_results = Cursor.fetchall() + resource_results = cursor.fetchall() for resource in resource_results: if resource[2]: resource_ob = {} @@ -114,14 +113,14 @@ def insert_publish_resources(default_owner_id): continue -def insert_geno_resources(default_owner_id): +def insert_geno_resources(cursor, default_owner_id): current_resources = Redis.hgetall("resources") - Cursor.execute(""" SELECT + cursor.execute(""" SELECT GenoFreeze.Id, GenoFreeze.ShortName, GenoFreeze.confidentiality FROM GenoFreeze""") - resource_results = Cursor.fetchall() + resource_results = cursor.fetchall() for i, resource in enumerate(resource_results): resource_ob = {} resource_ob['name'] = resource[1] @@ -147,15 +146,15 @@ def insert_geno_resources(default_owner_id): def insert_resources(default_owner_id): current_resources = get_resources() print("START") - insert_publish_resources(default_owner_id) + insert_publish_resources(cursor, default_owner_id) print("AFTER PUBLISH") - insert_geno_resources(default_owner_id) + insert_geno_resources(cursor, default_owner_id) print("AFTER GENO") - insert_probeset_resources(default_owner_id) + insert_probeset_resources(cursor, default_owner_id) print("AFTER PROBESET") -def main(): +def main(cursor): """Generates and outputs (as json file) the data for the main dropdown menus on the home page""" Redis.delete("resources") @@ -166,6 +165,6 @@ def main(): if __name__ == '__main__': - Conn = MySQLdb.Connect(**parse_db_uri()) - Cursor = Conn.cursor() - main() + with database_connection() as conn: + with conn.cursor() as cursor: + main(cursor) -- cgit v1.2.3 From e841dd524ee33386a47abb694dea90363de144b8 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 28 Jan 2022 22:31:01 +0000 Subject: Add in-progress gen_ind_genofiles.py gen_ind_genofiles.py is a command line script to generate genotype files for groups of individuals/samples, taking a source .geno or .json file and a target 'dummy' .geno file as input --- wqflask/maintenance/gen_ind_genofiles.py | 120 +++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 wqflask/maintenance/gen_ind_genofiles.py (limited to 'wqflask/maintenance') diff --git a/wqflask/maintenance/gen_ind_genofiles.py b/wqflask/maintenance/gen_ind_genofiles.py new file mode 100644 index 00000000..546bc60d --- /dev/null +++ b/wqflask/maintenance/gen_ind_genofiles.py @@ -0,0 +1,120 @@ +# Example command: env GN2_PROFILE=/usr/local/guix-profiles/gn-latest-20220122 TMPDIR=/export/local/home/zas1024/gn2-zach/tmp WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG SERVER_PORT=5002 GENENETWORK_FILES=/export/local/home/zas1024/gn2-zach/genotype_files SQL_URI=mysql://webqtlout:webqtlout@localhost/db_webqtl ./bin/genenetwork2 ./etc/default_settings.py -c ./maintenance/gen_ind_genofiles.py + +import sys + +import MySQLdb + +#from flask import Blueprint + +from wqflask import app + +from gn3.db.datasets import retrieve_group_samples + +#gen_geno = Blueprint('gen_geno', __name__) + +def db_conn(): + return MySQLdb.Connect(db=app.config.get("DB_NAME"), + user=app.config.get("DB_USER"), + passwd=app.config.get("DB_PASS"), + host=app.config.get("DB_HOST")) + +def main(args): + + # The file of the "main" .geno file for the group in question + # For example: BXD.geno or BXD.6.geno if converting to BXD individual genofiles + strain_genofile = args[1] + + # Get genotypes from the source strain genofile + strain_genotypes(strain_genofile) + + # The target individuals/samples group(s) we're generating the .geno files for + # This can be passed as either a specific .geno file, or as a JSON file + # containing a set of .geno files (and their corresponding file names and sample lists) + if ".json" in args[2]: + target_groups = json.load(args[2])['genofile'] + else: + target_groups = [args[2]] + +def group_samples(target_group): + """ + Get the group samples from its "dummy" .geno file (which still contains the sample list) + """ + + # Allow for inputting the target group as either the group name or .geno file + file_location = app.config.get("GENENETWORK_FILES") + "/genotype/" + target_group\ + if ".geno" not in target_group: + file_location += ".geno" + + sample_list = [] + with open(file_location, "r") as target_geno: + for i, line in enumerate(target_geno): + # Skip header lines + if line[0] in ["#", "@"] or not len(line): + continue + + line_items = line.split("\t") + sample_list = [item for item in line_items if item not in ["Chr", "Locus", "Mb", "cM"]] + break + + return sample_list + +def strain_genotypes(strain_genofile: str) -> List: + """ + Read genotypes from source strain .geno file + + :param strain_genofile: string of genofile filename + :return: a list of dictionaries representing each marker's genotypes + + Example output: [ + { + 'Chr': '1', + 'Locus': 'marker1', + 'Mb': '10.0', + 'cM': '8.0', + 'genotypes': [('BXD1', 'B'), ('BXD2', 'D'), ('BXD3', 'H'), ...] + }, + ... + ] + """ + + file_location = app.config.get("GENENETWORK_FILES") + "/genotype/" + strain_genofile + + geno_start_col = None + header_columns = [] + sample_list = [] + marker_genotypes = [] + with open(file_location, "r") as source_geno: + for i, line in enumerate(source_geno): + # Skip header lines + if line[0] in ["#", "@"] or not len(line): + continue + + line_items = line.split("\t") + + if "Chr" in line_items: # Header row + # Get the first column index containing genotypes + header_columns = line_items + for j, item in enumerate(line_items): + if item not in ["Chr", "Locus", "Mb", "cM"]: + geno_start_col = j + break + + sample_list = line_items[geno_start_col:] + if not geno_start_col: + print("Check .geno file - expected columns not found") + sys.exit() + else: # Marker rows + this_marker = { + 'Chr': line_items[header_columns.index("Chr")], + 'Locus': line_items[header_columns.index("Locus")], + 'Mb': line_items[header_columns.index("Mb")], + 'cM': line_items[header_columns.index("cM")], + 'genotypes': zip(sample_list, line_items[geno_start_col:]) + } + marker_genotypes.append(this_marker) + + return marker_genotypes + +if __name__ == "__main__": + print("command line arguments:\n\t%s" % sys.argv) + main(sys.argv) -- cgit v1.2.3 From bdf0653adda955b93127a1ddb7e70f9ba490e8b8 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 28 Jan 2022 22:52:49 +0000 Subject: Minor changes/bug fixes - Removed some unused code - Strip marker genotype to avoid newline character at end - Convert zip to list for marker genotypes - Add typing to group_samples - Rename strain_genofile to source_genofile --- wqflask/maintenance/gen_ind_genofiles.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'wqflask/maintenance') diff --git a/wqflask/maintenance/gen_ind_genofiles.py b/wqflask/maintenance/gen_ind_genofiles.py index 546bc60d..ec0fcd55 100644 --- a/wqflask/maintenance/gen_ind_genofiles.py +++ b/wqflask/maintenance/gen_ind_genofiles.py @@ -1,17 +1,14 @@ # Example command: env GN2_PROFILE=/usr/local/guix-profiles/gn-latest-20220122 TMPDIR=/export/local/home/zas1024/gn2-zach/tmp WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG SERVER_PORT=5002 GENENETWORK_FILES=/export/local/home/zas1024/gn2-zach/genotype_files SQL_URI=mysql://webqtlout:webqtlout@localhost/db_webqtl ./bin/genenetwork2 ./etc/default_settings.py -c ./maintenance/gen_ind_genofiles.py import sys +from typing import List import MySQLdb -#from flask import Blueprint - from wqflask import app from gn3.db.datasets import retrieve_group_samples -#gen_geno = Blueprint('gen_geno', __name__) - def db_conn(): return MySQLdb.Connect(db=app.config.get("DB_NAME"), user=app.config.get("DB_USER"), @@ -22,10 +19,7 @@ def main(args): # The file of the "main" .geno file for the group in question # For example: BXD.geno or BXD.6.geno if converting to BXD individual genofiles - strain_genofile = args[1] - - # Get genotypes from the source strain genofile - strain_genotypes(strain_genofile) + source_genofile = args[1] # The target individuals/samples group(s) we're generating the .geno files for # This can be passed as either a specific .geno file, or as a JSON file @@ -35,13 +29,16 @@ def main(args): else: target_groups = [args[2]] -def group_samples(target_group): + # Generate the output .geno files + generate_new_genofiles(strain_genotypes(source_genofile), target_groups) + +def group_samples(target_group: str) -> List: """ Get the group samples from its "dummy" .geno file (which still contains the sample list) """ # Allow for inputting the target group as either the group name or .geno file - file_location = app.config.get("GENENETWORK_FILES") + "/genotype/" + target_group\ + file_location = app.config.get("GENENETWORK_FILES") + "/genotype/" + target_group if ".geno" not in target_group: file_location += ".geno" @@ -109,7 +106,7 @@ def strain_genotypes(strain_genofile: str) -> List: 'Locus': line_items[header_columns.index("Locus")], 'Mb': line_items[header_columns.index("Mb")], 'cM': line_items[header_columns.index("cM")], - 'genotypes': zip(sample_list, line_items[geno_start_col:]) + 'genotypes': list(zip(sample_list, [item.strip() for item in line_items][geno_start_col:])) } marker_genotypes.append(this_marker) -- cgit v1.2.3 From 45694eb46fa0c337d8b2f1be945bdb96c4a2af44 Mon Sep 17 00:00:00 2001 From: zsloan Date: Fri, 28 Jan 2022 22:54:29 +0000 Subject: Change EOL from CRLF to LF --- wqflask/maintenance/gen_ind_genofiles.py | 234 +++++++++++++++---------------- 1 file changed, 117 insertions(+), 117 deletions(-) (limited to 'wqflask/maintenance') diff --git a/wqflask/maintenance/gen_ind_genofiles.py b/wqflask/maintenance/gen_ind_genofiles.py index ec0fcd55..abca4a4a 100644 --- a/wqflask/maintenance/gen_ind_genofiles.py +++ b/wqflask/maintenance/gen_ind_genofiles.py @@ -1,117 +1,117 @@ -# Example command: env GN2_PROFILE=/usr/local/guix-profiles/gn-latest-20220122 TMPDIR=/export/local/home/zas1024/gn2-zach/tmp WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG SERVER_PORT=5002 GENENETWORK_FILES=/export/local/home/zas1024/gn2-zach/genotype_files SQL_URI=mysql://webqtlout:webqtlout@localhost/db_webqtl ./bin/genenetwork2 ./etc/default_settings.py -c ./maintenance/gen_ind_genofiles.py - -import sys -from typing import List - -import MySQLdb - -from wqflask import app - -from gn3.db.datasets import retrieve_group_samples - -def db_conn(): - return MySQLdb.Connect(db=app.config.get("DB_NAME"), - user=app.config.get("DB_USER"), - passwd=app.config.get("DB_PASS"), - host=app.config.get("DB_HOST")) - -def main(args): - - # The file of the "main" .geno file for the group in question - # For example: BXD.geno or BXD.6.geno if converting to BXD individual genofiles - source_genofile = args[1] - - # The target individuals/samples group(s) we're generating the .geno files for - # This can be passed as either a specific .geno file, or as a JSON file - # containing a set of .geno files (and their corresponding file names and sample lists) - if ".json" in args[2]: - target_groups = json.load(args[2])['genofile'] - else: - target_groups = [args[2]] - - # Generate the output .geno files - generate_new_genofiles(strain_genotypes(source_genofile), target_groups) - -def group_samples(target_group: str) -> List: - """ - Get the group samples from its "dummy" .geno file (which still contains the sample list) - """ - - # Allow for inputting the target group as either the group name or .geno file - file_location = app.config.get("GENENETWORK_FILES") + "/genotype/" + target_group - if ".geno" not in target_group: - file_location += ".geno" - - sample_list = [] - with open(file_location, "r") as target_geno: - for i, line in enumerate(target_geno): - # Skip header lines - if line[0] in ["#", "@"] or not len(line): - continue - - line_items = line.split("\t") - sample_list = [item for item in line_items if item not in ["Chr", "Locus", "Mb", "cM"]] - break - - return sample_list - -def strain_genotypes(strain_genofile: str) -> List: - """ - Read genotypes from source strain .geno file - - :param strain_genofile: string of genofile filename - :return: a list of dictionaries representing each marker's genotypes - - Example output: [ - { - 'Chr': '1', - 'Locus': 'marker1', - 'Mb': '10.0', - 'cM': '8.0', - 'genotypes': [('BXD1', 'B'), ('BXD2', 'D'), ('BXD3', 'H'), ...] - }, - ... - ] - """ - - file_location = app.config.get("GENENETWORK_FILES") + "/genotype/" + strain_genofile - - geno_start_col = None - header_columns = [] - sample_list = [] - marker_genotypes = [] - with open(file_location, "r") as source_geno: - for i, line in enumerate(source_geno): - # Skip header lines - if line[0] in ["#", "@"] or not len(line): - continue - - line_items = line.split("\t") - - if "Chr" in line_items: # Header row - # Get the first column index containing genotypes - header_columns = line_items - for j, item in enumerate(line_items): - if item not in ["Chr", "Locus", "Mb", "cM"]: - geno_start_col = j - break - - sample_list = line_items[geno_start_col:] - if not geno_start_col: - print("Check .geno file - expected columns not found") - sys.exit() - else: # Marker rows - this_marker = { - 'Chr': line_items[header_columns.index("Chr")], - 'Locus': line_items[header_columns.index("Locus")], - 'Mb': line_items[header_columns.index("Mb")], - 'cM': line_items[header_columns.index("cM")], - 'genotypes': list(zip(sample_list, [item.strip() for item in line_items][geno_start_col:])) - } - marker_genotypes.append(this_marker) - - return marker_genotypes - -if __name__ == "__main__": - print("command line arguments:\n\t%s" % sys.argv) - main(sys.argv) +# Example command: env GN2_PROFILE=/usr/local/guix-profiles/gn-latest-20220122 TMPDIR=/export/local/home/zas1024/gn2-zach/tmp WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG SERVER_PORT=5002 GENENETWORK_FILES=/export/local/home/zas1024/gn2-zach/genotype_files SQL_URI=mysql://webqtlout:webqtlout@localhost/db_webqtl ./bin/genenetwork2 ./etc/default_settings.py -c ./maintenance/gen_ind_genofiles.py + +import sys +from typing import List + +import MySQLdb + +from wqflask import app + +from gn3.db.datasets import retrieve_group_samples + +def db_conn(): + return MySQLdb.Connect(db=app.config.get("DB_NAME"), + user=app.config.get("DB_USER"), + passwd=app.config.get("DB_PASS"), + host=app.config.get("DB_HOST")) + +def main(args): + + # The file of the "main" .geno file for the group in question + # For example: BXD.geno or BXD.6.geno if converting to BXD individual genofiles + source_genofile = args[1] + + # The target individuals/samples group(s) we're generating the .geno files for + # This can be passed as either a specific .geno file, or as a JSON file + # containing a set of .geno files (and their corresponding file names and sample lists) + if ".json" in args[2]: + target_groups = json.load(args[2])['genofile'] + else: + target_groups = [args[2]] + + # Generate the output .geno files + generate_new_genofiles(strain_genotypes(source_genofile), target_groups) + +def group_samples(target_group: str) -> List: + """ + Get the group samples from its "dummy" .geno file (which still contains the sample list) + """ + + # Allow for inputting the target group as either the group name or .geno file + file_location = app.config.get("GENENETWORK_FILES") + "/genotype/" + target_group + if ".geno" not in target_group: + file_location += ".geno" + + sample_list = [] + with open(file_location, "r") as target_geno: + for i, line in enumerate(target_geno): + # Skip header lines + if line[0] in ["#", "@"] or not len(line): + continue + + line_items = line.split("\t") + sample_list = [item for item in line_items if item not in ["Chr", "Locus", "Mb", "cM"]] + break + + return sample_list + +def strain_genotypes(strain_genofile: str) -> List: + """ + Read genotypes from source strain .geno file + + :param strain_genofile: string of genofile filename + :return: a list of dictionaries representing each marker's genotypes + + Example output: [ + { + 'Chr': '1', + 'Locus': 'marker1', + 'Mb': '10.0', + 'cM': '8.0', + 'genotypes': [('BXD1', 'B'), ('BXD2', 'D'), ('BXD3', 'H'), ...] + }, + ... + ] + """ + + file_location = app.config.get("GENENETWORK_FILES") + "/genotype/" + strain_genofile + + geno_start_col = None + header_columns = [] + sample_list = [] + marker_genotypes = [] + with open(file_location, "r") as source_geno: + for i, line in enumerate(source_geno): + # Skip header lines + if line[0] in ["#", "@"] or not len(line): + continue + + line_items = line.split("\t") + + if "Chr" in line_items: # Header row + # Get the first column index containing genotypes + header_columns = line_items + for j, item in enumerate(line_items): + if item not in ["Chr", "Locus", "Mb", "cM"]: + geno_start_col = j + break + + sample_list = line_items[geno_start_col:] + if not geno_start_col: + print("Check .geno file - expected columns not found") + sys.exit() + else: # Marker rows + this_marker = { + 'Chr': line_items[header_columns.index("Chr")], + 'Locus': line_items[header_columns.index("Locus")], + 'Mb': line_items[header_columns.index("Mb")], + 'cM': line_items[header_columns.index("cM")], + 'genotypes': list(zip(sample_list, [item.strip() for item in line_items][geno_start_col:])) + } + marker_genotypes.append(this_marker) + + return marker_genotypes + +if __name__ == "__main__": + print("command line arguments:\n\t%s" % sys.argv) + main(sys.argv) -- cgit v1.2.3 From 743a4623c53d30779cb884a69d0cf2c7ff411f0a Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 9 Mar 2022 19:13:59 +0000 Subject: Add function for getting strain name from sample name --- wqflask/maintenance/gen_ind_genofiles.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'wqflask/maintenance') diff --git a/wqflask/maintenance/gen_ind_genofiles.py b/wqflask/maintenance/gen_ind_genofiles.py index abca4a4a..6e818945 100644 --- a/wqflask/maintenance/gen_ind_genofiles.py +++ b/wqflask/maintenance/gen_ind_genofiles.py @@ -7,9 +7,7 @@ import MySQLdb from wqflask import app -from gn3.db.datasets import retrieve_group_samples - -def db_conn(): +def conn(): return MySQLdb.Connect(db=app.config.get("DB_NAME"), user=app.config.get("DB_USER"), passwd=app.config.get("DB_PASS"), @@ -32,6 +30,17 @@ def main(args): # Generate the output .geno files generate_new_genofiles(strain_genotypes(source_genofile), target_groups) +def get_strain_for_sample(sample): + query = ( + "SELECT CaseAttributeXRefNew.Value " + "FROM CaseAttributeXRefNew, Strain " + "WHERE CaseAttributeXRefNew.CaseAttributeId=11 " + "AND CaseAttributeXRef.New.StrainId = Strain.Id " + "AND Strain.Name = %(name)s" ) + + with conn.cursor() as cursor: + return cursor.execute(query, {"name": name}).fetchone()[0] + def group_samples(target_group: str) -> List: """ Get the group samples from its "dummy" .geno file (which still contains the sample list) @@ -115,3 +124,4 @@ def strain_genotypes(strain_genofile: str) -> List: if __name__ == "__main__": print("command line arguments:\n\t%s" % sys.argv) main(sys.argv) + -- cgit v1.2.3 From 27530d5a59bded06f644e4704ef21cb6da491350 Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 9 Mar 2022 19:41:55 +0000 Subject: Add function for mapping strain to sample pos + begin creating generate_new_genofiles function --- wqflask/maintenance/gen_ind_genofiles.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'wqflask/maintenance') diff --git a/wqflask/maintenance/gen_ind_genofiles.py b/wqflask/maintenance/gen_ind_genofiles.py index 6e818945..b91660a4 100644 --- a/wqflask/maintenance/gen_ind_genofiles.py +++ b/wqflask/maintenance/gen_ind_genofiles.py @@ -28,7 +28,7 @@ def main(args): target_groups = [args[2]] # Generate the output .geno files - generate_new_genofiles(strain_genotypes(source_genofile), target_groups) + generate_new_genofiles(source_genofile, strain_genotypes(source_genofile), target_groups) def get_strain_for_sample(sample): query = ( @@ -41,6 +41,33 @@ def get_strain_for_sample(sample): with conn.cursor() as cursor: return cursor.execute(query, {"name": name}).fetchone()[0] +def generate_new_genofiles(source_genofile, strain_genotypes, target_groups): + for group in target_groups: + base_samples = group_samples(source_genofile) + target_samples = group_samples(group) + strain_pos_map = map_strain_pos_to_target_group(base_samples, target_samples) + + new_genofile = app.config.get("GENENETWORK_FILES") + "/genotype/_" + group + + +def map_strain_pos_to_target_group(base_samples, target_samples): + """ + Retrieve corresponding strain position for each sample in the target group + + This is so the genotypes from the base genofile can be mapped to the samples in the target group + + For example: + Base strains: BXD1, BXD2, BXD3 + Target samples: BXD1_1, BXD1_2, BXD2_1, BXD3_1, BXD3_2, BXD3_3 + Returns: [0, 0, 1, 2, 2, 2] + """ + pos_map = [] + for i, sample in enumerate(target_samples): + sample_strain = get_strain_for_sample(sample) + pos_map.append(base_samples.index(sample_strain)) + + return pos_map + def group_samples(target_group: str) -> List: """ Get the group samples from its "dummy" .geno file (which still contains the sample list) -- cgit v1.2.3 From f72480dee99ee6ab107bb84c6f3b5c663a04cc86 Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 9 Mar 2022 20:01:53 +0000 Subject: Fix the way target/source genofiles were being processed + some other changes - I was mixing up source/target genofiles previously; the JSON file is for the source genofiles - references to the app context are removed in favor of just taking input as arguments or environment variables - Updated example commands --- wqflask/maintenance/gen_ind_genofiles.py | 64 ++++++++++++++++---------------- 1 file changed, 31 insertions(+), 33 deletions(-) (limited to 'wqflask/maintenance') diff --git a/wqflask/maintenance/gen_ind_genofiles.py b/wqflask/maintenance/gen_ind_genofiles.py index b91660a4..b781d7d1 100644 --- a/wqflask/maintenance/gen_ind_genofiles.py +++ b/wqflask/maintenance/gen_ind_genofiles.py @@ -1,34 +1,41 @@ -# Example command: env GN2_PROFILE=/usr/local/guix-profiles/gn-latest-20220122 TMPDIR=/export/local/home/zas1024/gn2-zach/tmp WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG SERVER_PORT=5002 GENENETWORK_FILES=/export/local/home/zas1024/gn2-zach/genotype_files SQL_URI=mysql://webqtlout:webqtlout@localhost/db_webqtl ./bin/genenetwork2 ./etc/default_settings.py -c ./maintenance/gen_ind_genofiles.py +# Example commands: +# python3 gen_ind_genofiles.py /home/zas1024/gn2-zach/genotype_files/genotype/ /home/zas1024/gn2-zach/new_geno/ BXD-Micturition.geno BXD.json +# python3 gen_ind_genofiles.py /home/zas1024/gn2-zach/genotype_files/genotype/ /home/zas1024/gn2-zach/new_geno/ BXD-Micturition.geno BXD.2.geno BXD.4.geno BXD.5.geno +import os import sys from typing import List import MySQLdb -from wqflask import app - def conn(): - return MySQLdb.Connect(db=app.config.get("DB_NAME"), - user=app.config.get("DB_USER"), - passwd=app.config.get("DB_PASS"), - host=app.config.get("DB_HOST")) + return MySQLdb.Connect(db=os.environ.get("DB_NAME"), + user=os.environ.get("DB_USER"), + passwd=os.environ.get("DB_PASS"), + host=os.environ.get("DB_HOST")) def main(args): - # The file of the "main" .geno file for the group in question - # For example: BXD.geno or BXD.6.geno if converting to BXD individual genofiles - source_genofile = args[1] + # Directory in which .geno files are located + geno_dir = args[1] + + # Directory in which to output new files + out_dir = args[2] + + # The individuals group that we want to generate a .geno file for + target_file = geno_dir + args[3] - # The target individuals/samples group(s) we're generating the .geno files for - # This can be passed as either a specific .geno file, or as a JSON file - # containing a set of .geno files (and their corresponding file names and sample lists) - if ".json" in args[2]: - target_groups = json.load(args[2])['genofile'] + # The source group(s) we're generating the .geno files from + # This can be passed as either a specific .geno file (or set of files as multiple arguments), + # or as a JSON file containing a set of .geno files (and their corresponding file names and sample lists) + if ".json" in args[4]: + source_files = [geno_dir + genofile['location'] for genofile in json.load(args[4])['genofile']] else: - target_groups = [args[2]] + source_files = [geno_dir + group + ".geno" if ".geno" not in group else group for group in args[4:]] # Generate the output .geno files - generate_new_genofiles(source_genofile, strain_genotypes(source_genofile), target_groups) + for source_file in source_files: + generate_new_genofile(source_file, target_file) def get_strain_for_sample(sample): query = ( @@ -41,13 +48,11 @@ def get_strain_for_sample(sample): with conn.cursor() as cursor: return cursor.execute(query, {"name": name}).fetchone()[0] -def generate_new_genofiles(source_genofile, strain_genotypes, target_groups): - for group in target_groups: - base_samples = group_samples(source_genofile) - target_samples = group_samples(group) - strain_pos_map = map_strain_pos_to_target_group(base_samples, target_samples) - - new_genofile = app.config.get("GENENETWORK_FILES") + "/genotype/_" + group +def generate_new_genofiles(source_genofile, target_genofile): + base_samples = group_samples(source_genofile) + base_genotypes = strain_genotypes(source_genofile) + target_samples = group_samples(target_genofile) + strain_pos_map = map_strain_pos_to_target_group(base_samples, target_samples) def map_strain_pos_to_target_group(base_samples, target_samples): @@ -68,18 +73,13 @@ def map_strain_pos_to_target_group(base_samples, target_samples): return pos_map -def group_samples(target_group: str) -> List: +def group_samples(target_file: str) -> List: """ Get the group samples from its "dummy" .geno file (which still contains the sample list) """ - # Allow for inputting the target group as either the group name or .geno file - file_location = app.config.get("GENENETWORK_FILES") + "/genotype/" + target_group - if ".geno" not in target_group: - file_location += ".geno" - sample_list = [] - with open(file_location, "r") as target_geno: + with open(target_file, "r") as target_geno: for i, line in enumerate(target_geno): # Skip header lines if line[0] in ["#", "@"] or not len(line): @@ -110,8 +110,6 @@ def strain_genotypes(strain_genofile: str) -> List: ] """ - file_location = app.config.get("GENENETWORK_FILES") + "/genotype/" + strain_genofile - geno_start_col = None header_columns = [] sample_list = [] -- cgit v1.2.3 From 7e3b91d11ee59c34fc4d59c7ca94d6702ec7c5bd Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 9 Mar 2022 20:26:12 +0000 Subject: Generate JSON file for target genotypes Also store parents/type metadata from source genofiles --- wqflask/maintenance/gen_ind_genofiles.py | 41 +++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 6 deletions(-) (limited to 'wqflask/maintenance') diff --git a/wqflask/maintenance/gen_ind_genofiles.py b/wqflask/maintenance/gen_ind_genofiles.py index b781d7d1..9a97626d 100644 --- a/wqflask/maintenance/gen_ind_genofiles.py +++ b/wqflask/maintenance/gen_ind_genofiles.py @@ -33,9 +33,22 @@ def main(args): else: source_files = [geno_dir + group + ".geno" if ".geno" not in group else group for group in args[4:]] + if len(source_files) > 1: + # Generate a JSON file pointing to the new target genotype files, in situations where there are multiple source .geno files + target_json_loc = out_dir + args[3].split(".")[:-1] + ".json" + target_json = {'genofile': []} + # Generate the output .geno files for source_file in source_files: - generate_new_genofile(source_file, target_file) + filename, samples = generate_new_genofile(source_file, target_file) + + target_json['genofile'].append({ + 'location': filename.split("/")[-1], + 'title': filename.split("/")[-1], + 'sample_list': samples + }) + + json.dump(target_json, open(target_json_loc, "w")) def get_strain_for_sample(sample): query = ( @@ -67,7 +80,7 @@ def map_strain_pos_to_target_group(base_samples, target_samples): Returns: [0, 0, 1, 2, 2, 2] """ pos_map = [] - for i, sample in enumerate(target_samples): + for sample in target_samples: sample_strain = get_strain_for_sample(sample) pos_map.append(base_samples.index(sample_strain)) @@ -110,14 +123,28 @@ def strain_genotypes(strain_genofile: str) -> List: ] """ + geno_dict = {} + geno_start_col = None header_columns = [] sample_list = [] marker_genotypes = [] with open(file_location, "r") as source_geno: for i, line in enumerate(source_geno): - # Skip header lines - if line[0] in ["#", "@"] or not len(line): + if line[0] == "@": + if "@type" in line: + geno_dict['type'] = line.split(":")[1] + if "@mat" in line: + geno_dict['mat'] = line.split(":")[1] + elif "@pat" in line: + geno_dict['pat'] = line.split(":")[1] + elif "@het" in line: + geno_dict['het'] = line.split(":")[1] + elif "@unk" in line: + geno_dict['unk'] = line.split(":")[1] + + # Skip other header lines + if line[0] == "#" or not len(line): continue line_items = line.split("\t") @@ -140,11 +167,13 @@ def strain_genotypes(strain_genofile: str) -> List: 'Locus': line_items[header_columns.index("Locus")], 'Mb': line_items[header_columns.index("Mb")], 'cM': line_items[header_columns.index("cM")], - 'genotypes': list(zip(sample_list, [item.strip() for item in line_items][geno_start_col:])) + 'genotypes': [item.strip() for item in line_items][geno_start_col:] } marker_genotypes.append(this_marker) - return marker_genotypes + geno_dict['genotypes'] = marker_genotypes + + return geno_dict if __name__ == "__main__": print("command line arguments:\n\t%s" % sys.argv) -- cgit v1.2.3 From a51f95bea5fa9a3b767aaebf75adfa706cf7940f Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 10 Mar 2022 00:45:11 +0000 Subject: Add code generating the new genotype files Also made a large number of other fixes that proved necessary during testing --- wqflask/maintenance/gen_ind_genofiles.py | 114 +++++++++++++++++++++++-------- 1 file changed, 85 insertions(+), 29 deletions(-) (limited to 'wqflask/maintenance') diff --git a/wqflask/maintenance/gen_ind_genofiles.py b/wqflask/maintenance/gen_ind_genofiles.py index 9a97626d..e705119f 100644 --- a/wqflask/maintenance/gen_ind_genofiles.py +++ b/wqflask/maintenance/gen_ind_genofiles.py @@ -2,6 +2,7 @@ # python3 gen_ind_genofiles.py /home/zas1024/gn2-zach/genotype_files/genotype/ /home/zas1024/gn2-zach/new_geno/ BXD-Micturition.geno BXD.json # python3 gen_ind_genofiles.py /home/zas1024/gn2-zach/genotype_files/genotype/ /home/zas1024/gn2-zach/new_geno/ BXD-Micturition.geno BXD.2.geno BXD.4.geno BXD.5.geno +import json import os import sys from typing import List @@ -28,23 +29,37 @@ def main(args): # The source group(s) we're generating the .geno files from # This can be passed as either a specific .geno file (or set of files as multiple arguments), # or as a JSON file containing a set of .geno files (and their corresponding file names and sample lists) + geno_json = {} + source_files = [] if ".json" in args[4]: - source_files = [geno_dir + genofile['location'] for genofile in json.load(args[4])['genofile']] + geno_json = json.load(open(geno_dir + args[4], "r")) + par_f1s = { + "mat": geno_json['mat'], + "pat": geno_json['pat'], + "f1s": geno_json['f1s'] + } + + # List of file titles and locations from JSON + source_files = [{'title': genofile['title'], 'location': geno_dir + genofile['location']} for genofile in geno_json['genofile']] else: - source_files = [geno_dir + group + ".geno" if ".geno" not in group else group for group in args[4:]] + par_f1s = {} + # List of files directly taken from command line arguments, with titles just set to the filename + for group in args[4:]: + file_name = geno_dir + group + ".geno" if ".geno" not in group else group + source_files.append({'title': file_name[:-5], 'location': file_name}) if len(source_files) > 1: # Generate a JSON file pointing to the new target genotype files, in situations where there are multiple source .geno files - target_json_loc = out_dir + args[3].split(".")[:-1] + ".json" + target_json_loc = out_dir + ".".join(args[3].split(".")[:-1]) + ".json" target_json = {'genofile': []} # Generate the output .geno files for source_file in source_files: - filename, samples = generate_new_genofile(source_file, target_file) + filename, samples = generate_new_genofile(source_file['location'], target_file, par_f1s, out_dir) target_json['genofile'].append({ 'location': filename.split("/")[-1], - 'title': filename.split("/")[-1], + 'title': source_file['title'], 'sample_list': samples }) @@ -55,20 +70,59 @@ def get_strain_for_sample(sample): "SELECT CaseAttributeXRefNew.Value " "FROM CaseAttributeXRefNew, Strain " "WHERE CaseAttributeXRefNew.CaseAttributeId=11 " - "AND CaseAttributeXRef.New.StrainId = Strain.Id " + "AND CaseAttributeXRefNew.StrainId = Strain.Id " "AND Strain.Name = %(name)s" ) - with conn.cursor() as cursor: - return cursor.execute(query, {"name": name}).fetchone()[0] + with conn().cursor() as cursor: + cursor.execute(query, {"name": sample.strip()}) + return cursor.fetchone()[0] -def generate_new_genofiles(source_genofile, target_genofile): - base_samples = group_samples(source_genofile) - base_genotypes = strain_genotypes(source_genofile) +def generate_new_genofile(source_genofile, target_genofile, par_f1s, out_dir): + source_samples = group_samples(source_genofile) + source_genotypes = strain_genotypes(source_genofile) target_samples = group_samples(target_genofile) - strain_pos_map = map_strain_pos_to_target_group(base_samples, target_samples) + strain_pos_map = map_strain_pos_to_target_group(source_samples, target_samples, par_f1s) + if len(source_genofile.split("/")[-1].split(".")) > 2: + # The number in the source genofile; for example 4 in BXD.4.geno + source_num = source_genofile.split("/")[-1].split(".")[-2] + target_filename = ".".join(target_genofile.split("/")[-1].split(".")[:-1]) + "." + source_num + ".geno" + else: + target_filename = ".".join(target_genofile.split("/")[-1].split(".")[:-1]) + ".geno" + + file_location = out_dir + target_filename + + with open(file_location, "w") as fh: + for metadata in ["name", "type", "mat", "pat", "het", "unk"]: + fh.write("@" + metadata + ":" + source_genotypes[metadata] + "\n") + + header_line = ["Chr", "Locus", "cM", "Mb"] + target_samples + fh.write("\t".join(header_line)) + + for marker in source_genotypes['markers']: + line_items = [ + marker['Chr'], + marker['Locus'], + marker['cM'], + marker['Mb'] + ] + + for pos in strain_pos_map: + if isinstance(pos, int): + line_items.append(marker['genotypes'][pos]) + else: + if pos in ["mat", "pat"]: + line_items.append(source_genotypes[pos]) + elif pos == "f1s": + line_items.append("H") + else: + line_items.append("U") -def map_strain_pos_to_target_group(base_samples, target_samples): + fh.write("\t".join(line_items) + "\n") + + return file_location, target_samples + +def map_strain_pos_to_target_group(source_samples, target_samples, par_f1s): """ Retrieve corresponding strain position for each sample in the target group @@ -82,7 +136,14 @@ def map_strain_pos_to_target_group(base_samples, target_samples): pos_map = [] for sample in target_samples: sample_strain = get_strain_for_sample(sample) - pos_map.append(base_samples.index(sample_strain)) + if sample_strain in source_samples: + pos_map.append(source_samples.index(sample_strain)) + else: + val = "U" + for key in par_f1s.keys(): + if sample_strain in par_f1s[key]: + val = key + pos_map.append(val) return pos_map @@ -128,27 +189,21 @@ def strain_genotypes(strain_genofile: str) -> List: geno_start_col = None header_columns = [] sample_list = [] - marker_genotypes = [] - with open(file_location, "r") as source_geno: + markers = [] + with open(strain_genofile, "r") as source_geno: for i, line in enumerate(source_geno): if line[0] == "@": - if "@type" in line: - geno_dict['type'] = line.split(":")[1] - if "@mat" in line: - geno_dict['mat'] = line.split(":")[1] - elif "@pat" in line: - geno_dict['pat'] = line.split(":")[1] - elif "@het" in line: - geno_dict['het'] = line.split(":")[1] - elif "@unk" in line: - geno_dict['unk'] = line.split(":")[1] + metadata_type = line[1:].split(":")[0] + if metadata_type in ['name', 'type', 'mat', 'pat', 'het', 'unk']: + geno_dict[metadata_type] = line.split(":")[1].strip() + + continue # Skip other header lines if line[0] == "#" or not len(line): continue line_items = line.split("\t") - if "Chr" in line_items: # Header row # Get the first column index containing genotypes header_columns = line_items @@ -169,9 +224,10 @@ def strain_genotypes(strain_genofile: str) -> List: 'cM': line_items[header_columns.index("cM")], 'genotypes': [item.strip() for item in line_items][geno_start_col:] } - marker_genotypes.append(this_marker) - geno_dict['genotypes'] = marker_genotypes + markers.append(this_marker) + + geno_dict['markers'] = markers return geno_dict -- cgit v1.2.3 From f76bca81639027a87e2d2cc5697258714d7bf7d9 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 10 Mar 2022 00:50:14 +0000 Subject: Replace top comment with docstring --- wqflask/maintenance/gen_ind_genofiles.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'wqflask/maintenance') diff --git a/wqflask/maintenance/gen_ind_genofiles.py b/wqflask/maintenance/gen_ind_genofiles.py index e705119f..0c4efba0 100644 --- a/wqflask/maintenance/gen_ind_genofiles.py +++ b/wqflask/maintenance/gen_ind_genofiles.py @@ -1,6 +1,19 @@ -# Example commands: -# python3 gen_ind_genofiles.py /home/zas1024/gn2-zach/genotype_files/genotype/ /home/zas1024/gn2-zach/new_geno/ BXD-Micturition.geno BXD.json -# python3 gen_ind_genofiles.py /home/zas1024/gn2-zach/genotype_files/genotype/ /home/zas1024/gn2-zach/new_geno/ BXD-Micturition.geno BXD.2.geno BXD.4.geno BXD.5.geno +#!/usr/bin/env python3 +"""A script that generates the genotype files for groups of individuals, using an existing strain genotype file as a basis + +Example commands: +python3 gen_ind_genofiles.py + /home/zas1024/gn2-zach/genotype_files/genotype/ + /home/zas1024/gn2-zach/new_geno/ + BXD-Micturition.geno + BXD.json +python3 gen_ind_genofiles.py + /home/zas1024/gn2-zach/genotype_files/genotype + /home/zas1024/gn2-zach/new_geno/ + BXD-Micturition.geno + BXD.2.geno BXD.4.geno BXD.5.geno + +""" import json import os -- cgit v1.2.3 From 9126eda6159c5d605c41aae276f5dd9ba8df3f01 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 10 Mar 2022 00:51:07 +0000 Subject: Remove unnecessary print statement --- wqflask/maintenance/gen_ind_genofiles.py | 1 - 1 file changed, 1 deletion(-) (limited to 'wqflask/maintenance') diff --git a/wqflask/maintenance/gen_ind_genofiles.py b/wqflask/maintenance/gen_ind_genofiles.py index 0c4efba0..8b958efa 100644 --- a/wqflask/maintenance/gen_ind_genofiles.py +++ b/wqflask/maintenance/gen_ind_genofiles.py @@ -245,6 +245,5 @@ def strain_genotypes(strain_genofile: str) -> List: return geno_dict if __name__ == "__main__": - print("command line arguments:\n\t%s" % sys.argv) main(sys.argv) -- cgit v1.2.3 From ec1ca101b15421c83de6094984dcec985a395d71 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 17 Mar 2022 16:20:32 +0300 Subject: Create a db connection correctly * wqflask/maintenance/quantile_normalize.py: Fix how the cursor is created. --- wqflask/maintenance/quantile_normalize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'wqflask/maintenance') diff --git a/wqflask/maintenance/quantile_normalize.py b/wqflask/maintenance/quantile_normalize.py index 2e2b0ec3..90ec72de 100644 --- a/wqflask/maintenance/quantile_normalize.py +++ b/wqflask/maintenance/quantile_normalize.py @@ -100,7 +100,7 @@ def set_data(cursor, dataset_name): if __name__ == '__main__': with database_connection as conn: - with conn.cursor as cursor: + with conn.cursor() as cursor: success, _ = bulk(es, set_data(cursor, sys.argv[1])) response = es.search( -- cgit v1.2.3