From 983acfdfc523677b4d7501287a000b7fd52a2c39 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Mon, 30 Aug 2021 07:00:38 +0300 Subject: Implement module for interfacing with rust-qtlreaper Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * gn3/computations/heatmap.py: move `generate_traits_file` function to new module * gn3/computations/qtlreaper.py: new module to interface with the `rust-qtlreaper` utility. * gn3/settings.py: Provide setting for the path to the `rust-qtlreaper` utility * qtlfilesexport.py: Move `random_string` function to new module. Update to use functions in new module. Provide a module with functions to be used to interface with `rust-qtlreaper`. This module essentially contains all the functions that are needed to build the files needed for, and to run the qtlreaper utility. --- gn3/settings.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'gn3/settings.py') diff --git a/gn3/settings.py b/gn3/settings.py index f4866d5..d137370 100644 --- a/gn3/settings.py +++ b/gn3/settings.py @@ -24,3 +24,6 @@ GN2_BASE_URL = "http://www.genenetwork.org/" # biweight script BIWEIGHT_RSCRIPT = "~/genenetwork3/scripts/calculate_biweight.R" + +# qtlreaper command +REAPER_COMMAND = "{}/bin/qtlreaper".format(os.environ.get("GUIX_ENVIRONMENT")) -- cgit v1.2.3 From 6ab866183aeac8553fdcda9217e4445da2b4836b Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Tue, 31 Aug 2021 06:51:18 +0300 Subject: Provide utilities for genotype files Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * gn3/db/genotypes.py: New module * gn3/settings.py: Add new configuration variable * qtlfilesexport.py: Test out new code Add a module containing functions dealing with the genotype files. Add a configuration variable to point to the location of the genotype files. --- gn3/db/genotypes.py | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++ gn3/settings.py | 4 ++++ qtlfilesexport.py | 33 +++---------------------- 3 files changed, 76 insertions(+), 30 deletions(-) create mode 100644 gn3/db/genotypes.py (limited to 'gn3/settings.py') diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py new file mode 100644 index 0000000..610ddde --- /dev/null +++ b/gn3/db/genotypes.py @@ -0,0 +1,69 @@ +"""Genotype utilities""" + +import os +import gzip +from gn3.settings import GENOTYPE_FILES + +def build_genotype_file( + geno_name: str, base_dir: str = GENOTYPE_FILES, + extension: str = "geno"): + """Build the absolute path for the genotype file.""" + return "{}/{}.{}".format(os.path.abspath(base_dir), geno_name, extension) + +def load_genotype_samples(genotype_filename: str, file_type: str = "geno"): + """ + Load sample of strains from genotype files. + + DESCRIPTION: + Traits can contain a varied number of strains, some of which do not exist in + certain genotypes. In order to compute QTLs, GEMMAs, etc, we need to ensure + to pick only those strains that exist in the genotype under consideration + for the traits used in the computation. + + This function loads a list of samples from the genotype files for use in + filtering out unusable strains. + + + PARAMETERS: + genotype_filename: The absolute path to the genotype file to load the + samples from. + file_type: The type of file. Currently supported values are 'geno' and + 'plink'. + """ + file_type_fns = { + "geno": __load_genotype_samples_from_geno, + "plink": __load_genotype_samples_from_plink + } + return file_type_fns[file_type](genotype_filename) + +def __load_genotype_samples_from_geno(genotype_filename: str): + """ + Helper function for `load_genotype_samples` function. + + Loads samples from '.geno' files. + """ + gzipped_filename = "{}.gz".format(genotype_filename) + if os.path.isfile(gzipped_filename): + genofile = gzip.open(gzipped_filename) + else: + genofile = open(genotype_filename) + + for row in genofile: + line = row.strip() + if (not line) or (line.startswith(("#", "@"))): + continue + break + + headers = line.split("\t") + if headers[3] == "Mb": + return headers[4:] + return headers[3:] + +def __load_genotype_samples_from_plink(genotype_filename: str): + """ + Helper function for `load_genotype_samples` function. + + Loads samples from '.plink' files. + """ + genofile = open(genotype_filename) + return [line.split(" ")[1] for line in genofile] diff --git a/gn3/settings.py b/gn3/settings.py index d137370..a08f846 100644 --- a/gn3/settings.py +++ b/gn3/settings.py @@ -27,3 +27,7 @@ BIWEIGHT_RSCRIPT = "~/genenetwork3/scripts/calculate_biweight.R" # qtlreaper command REAPER_COMMAND = "{}/bin/qtlreaper".format(os.environ.get("GUIX_ENVIRONMENT")) + +# genotype files +GENOTYPE_FILES = os.environ.get( + "GENOTYPE_FILES", "{}/genotype_files/genotype".format(os.environ.get("HOME"))) diff --git a/qtlfilesexport.py b/qtlfilesexport.py index adc5e77..1db4ab6 100644 --- a/qtlfilesexport.py +++ b/qtlfilesexport.py @@ -11,6 +11,7 @@ from gn3.computations.slink import slink from gn3.db_utils import database_connector from gn3.computations.heatmap import export_trait_data from gn3.db.traits import retrieve_trait_data, retrieve_trait_info +from gn3.db.genotypes import build_genotype_file, load_genotype_samples from gn3.computations.qtlreaper import random_string, generate_traits_file from gn3.computations.heatmap import ( cluster_traits, @@ -41,36 +42,8 @@ def main(): retrieve_trait_info(threshold, fullname, conn) for fullname in trait_fullnames()] traits_data_list = [retrieve_trait_data(t, conn) for t in traits] - # strains = list(set([k for td in traits_data_list for k in td["data"].keys()])) - strains = [# Use only the strains in the BXD.geno genotype file - "BXD1", "BXD2", "BXD5", "BXD6", "BXD8", "BXD9", "BXD11", "BXD12", - "BXD13", "BXD14", "BXD15", "BXD16", "BXD18", "BXD19", "BXD20", "BXD21", - "BXD22", "BXD23", "BXD24", "BXD24a", "BXD25", "BXD27", "BXD28", "BXD29", - "BXD30", "BXD31", "BXD32", "BXD33", "BXD34", "BXD35", "BXD36", "BXD37", - "BXD38", "BXD39", "BXD40", "BXD41", "BXD42", "BXD43", "BXD44", "BXD45", - "BXD48", "BXD48a", "BXD49", "BXD50", "BXD51", "BXD52", "BXD53", "BXD54", - "BXD55", "BXD56", "BXD59", "BXD60", "BXD61", "BXD62", "BXD63", "BXD64", - "BXD65", "BXD65a", "BXD65b", "BXD66", "BXD67", "BXD68", "BXD69", - "BXD70", "BXD71", "BXD72", "BXD73", "BXD73a", "BXD73b", "BXD74", - "BXD75", "BXD76", "BXD77", "BXD78", "BXD79", "BXD81", "BXD83", "BXD84", - "BXD85", "BXD86", "BXD87", "BXD88", "BXD89", "BXD90", "BXD91", "BXD93", - "BXD94", "BXD95", "BXD98", "BXD99", "BXD100", "BXD101", "BXD102", - "BXD104", "BXD105", "BXD106", "BXD107", "BXD108", "BXD109", "BXD110", - "BXD111", "BXD112", "BXD113", "BXD114", "BXD115", "BXD116", "BXD117", - "BXD119", "BXD120", "BXD121", "BXD122", "BXD123", "BXD124", "BXD125", - "BXD126", "BXD127", "BXD128", "BXD128a", "BXD130", "BXD131", "BXD132", - "BXD133", "BXD134", "BXD135", "BXD136", "BXD137", "BXD138", "BXD139", - "BXD141", "BXD142", "BXD144", "BXD145", "BXD146", "BXD147", "BXD148", - "BXD149", "BXD150", "BXD151", "BXD152", "BXD153", "BXD154", "BXD155", - "BXD156", "BXD157", "BXD160", "BXD161", "BXD162", "BXD165", "BXD168", - "BXD169", "BXD170", "BXD171", "BXD172", "BXD173", "BXD174", "BXD175", - "BXD176", "BXD177", "BXD178", "BXD180", "BXD181", "BXD183", "BXD184", - "BXD186", "BXD187", "BXD188", "BXD189", "BXD190", "BXD191", "BXD192", - "BXD193", "BXD194", "BXD195", "BXD196", "BXD197", "BXD198", "BXD199", - "BXD200", "BXD201", "BXD202", "BXD203", "BXD204", "BXD205", "BXD206", - "BXD207", "BXD208", "BXD209", "BXD210", "BXD211", "BXD212", "BXD213", - "BXD214", "BXD215", "BXD216", "BXD217", "BXD218", "BXD219", "BXD220" - ] + genotype_filename = build_genotype_file(traits[0]["riset"]) + strains = load_genotype_samples(genotype_filename) exported_traits_data_list = [ export_trait_data(td, strains) for td in traits_data_list] slinked = slink(cluster_traits(exported_traits_data_list)) -- cgit v1.2.3 From 8442204492a28153e995f3147e06c9758cd3bd28 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 20 Sep 2021 08:43:38 +0300 Subject: Enable Cross-Origin Resource Sharing Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * gn3/api/heatmaps.py: Fix bugs in data parsing * gn3/app.py: enable CORS * gn3/settings.py: add flask-cors configurations * guix.scm: Add flask-cors dependency For easier testing of the heatmaps generation feature, this commit activates the cross-origin resource sharing for all "localhost" origins. --- gn3/api/heatmaps.py | 4 ++-- gn3/app.py | 7 +++++++ gn3/settings.py | 12 ++++++++++++ guix.scm | 3 ++- 4 files changed, 23 insertions(+), 3 deletions(-) (limited to 'gn3/settings.py') diff --git a/gn3/api/heatmaps.py b/gn3/api/heatmaps.py index eea3ebe..43ac580 100644 --- a/gn3/api/heatmaps.py +++ b/gn3/api/heatmaps.py @@ -15,9 +15,9 @@ def clustered_heatmaps(): "message": "You need to provide at least one trait name." }), 400 conn, _cursor = database_connector() - def setup_trait_fullname(trait): + def parse_trait_fullname(trait): name_parts = trait.split(":") return "{dataset_name}::{trait_name}".format( - dataset_name=trait[1], trait_name=trait[0]) + dataset_name=name_parts[1], trait_name=name_parts[0]) traits_fullnames = [parse_trait_fullname(trait) for trait in traits_names] return jsonify(build_heatmap(traits_fullnames, conn)), 200 diff --git a/gn3/app.py b/gn3/app.py index b4b08d0..6b4c57e 100644 --- a/gn3/app.py +++ b/gn3/app.py @@ -11,6 +11,7 @@ from gn3.api.heatmaps import heatmaps from gn3.api.correlation import correlation from gn3.api.data_entry import data_entry +from flask_cors import CORS def create_app(config: Union[Dict, str, None] = None) -> Flask: """Create a new flask object""" @@ -18,6 +19,12 @@ def create_app(config: Union[Dict, str, None] = None) -> Flask: # Load default configuration app.config.from_object("gn3.settings") + CORS( + app, + origins=app.config["CORS_ORIGINS"], + allow_headers=app.config["CORS_HEADERS"], + supports_credentials=True, intercept_exceptions=False) + # Load environment configuration if "GN3_CONF" in os.environ: app.config.from_envvar('GN3_CONF') diff --git a/gn3/settings.py b/gn3/settings.py index a08f846..9d7bba3 100644 --- a/gn3/settings.py +++ b/gn3/settings.py @@ -31,3 +31,15 @@ REAPER_COMMAND = "{}/bin/qtlreaper".format(os.environ.get("GUIX_ENVIRONMENT")) # genotype files GENOTYPE_FILES = os.environ.get( "GENOTYPE_FILES", "{}/genotype_files/genotype".format(os.environ.get("HOME"))) + +# CROSS-ORIGIN SETUP +CORS_ORIGINS = [ + "http://localhost:*", + "http://127.0.0.1:*" +] + +CORS_HEADERS = [ + "Content-Type", + "Authorization", + "Access-Control-Allow-Credentials" +] diff --git a/guix.scm b/guix.scm index 8e1cf79..fb97142 100644 --- a/guix.scm +++ b/guix.scm @@ -106,7 +106,8 @@ ("r-stringi" ,r-stringi) ("python-plotly" ,python-plotly) ("python-pandas" ,python-pandas) - ("rust-qtlreaper" ,rust-qtlreaper))) + ("rust-qtlreaper" ,rust-qtlreaper) + ("python-flask-cors" ,python-flask-cors))) (build-system python-build-system) (home-page "https://github.com/genenetwork/genenetwork3") (synopsis "GeneNetwork3 API for data science and machine learning.") -- cgit v1.2.3