diff options
-rw-r--r-- | gn3/api/correlation.py | 3 | ||||
-rw-r--r-- | gn3/api/heatmaps.py | 3 | ||||
-rw-r--r-- | gn3/computations/partial_correlations.py | 15 | ||||
-rw-r--r-- | gn3/computations/qtlreaper.py | 16 | ||||
-rw-r--r-- | gn3/data_helpers.py | 13 | ||||
-rw-r--r-- | gn3/db/correlations.py | 20 | ||||
-rw-r--r-- | gn3/db/datasets.py | 10 | ||||
-rw-r--r-- | gn3/db/genotypes.py | 44 | ||||
-rw-r--r-- | gn3/db/partial_correlations.py | 145 | ||||
-rw-r--r-- | gn3/db/traits.py | 101 | ||||
-rw-r--r-- | gn3/fs_helpers.py | 5 | ||||
-rw-r--r-- | gn3/heatmaps.py | 30 | ||||
-rw-r--r-- | gn3/settings.py | 4 | ||||
-rw-r--r-- | pytest.ini | 3 | ||||
-rw-r--r-- | tests/performance/perf_query.py | 6 | ||||
-rw-r--r-- | tests/unit/computations/test_correlation.py | 8 | ||||
-rw-r--r-- | tests/unit/computations/test_dictify_by_samples.py | 26 | ||||
-rw-r--r-- | tests/unit/db/test_datasets.py | 17 | ||||
-rw-r--r-- | tests/unit/test_data_helpers.py | 2 | ||||
-rw-r--r-- | tests/unit/test_db_utils.py | 2 | ||||
-rw-r--r-- | tests/unit/test_file_utils.py | 3 |
21 files changed, 222 insertions, 254 deletions
diff --git a/gn3/api/correlation.py b/gn3/api/correlation.py index cbe01d8..00b3ad5 100644 --- a/gn3/api/correlation.py +++ b/gn3/api/correlation.py @@ -1,9 +1,10 @@ """Endpoints for running correlations""" import json +from functools import reduce + from flask import jsonify from flask import Blueprint from flask import request -from functools import reduce from flask import make_response from gn3.computations.correlations import compute_all_sample_correlation diff --git a/gn3/api/heatmaps.py b/gn3/api/heatmaps.py index 633a061..b2511c3 100644 --- a/gn3/api/heatmaps.py +++ b/gn3/api/heatmaps.py @@ -27,8 +27,7 @@ def clustered_heatmaps(): conn, _cursor = database_connector() def parse_trait_fullname(trait): name_parts = trait.split(":") - return "{dataset_name}::{trait_name}".format( - dataset_name=name_parts[1], trait_name=name_parts[0]) + return f"{name_parts[1]}::{name_parts[0]}" traits_fullnames = [parse_trait_fullname(trait) for trait in traits_names] with io.StringIO() as io_str: diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index 3633a59..f7ddfd0 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -141,7 +141,7 @@ def find_identical_traits( return acc + ident[1] def __dictify_controls__(acc, control_item): - ckey = tuple("{:.3f}".format(item) for item in control_item[0]) + ckey = tuple("{item:.3f}" for item in control_item[0]) return {**acc, ckey: acc.get(ckey, tuple()) + (control_item[1],)} return (reduce(## for identical control traits @@ -181,8 +181,8 @@ def tissue_correlation( assert len(primary_trait_values) == len(target_trait_values), ( "The lengths of the `primary_trait_values` and `target_trait_values` " "must be equal") - assert method in method_fns.keys(), ( - "Method must be one of: {}".format(",".join(method_fns.keys()))) + assert method in method_fns, ( + "Method must be one of: {','.join(method_fns.keys())}") corr, pvalue = method_fns[method](primary_trait_values, target_trait_values) return (corr, pvalue) @@ -241,7 +241,7 @@ def partial_correlations_fast(# pylint: disable=[R0913, R0914] function in GeneNetwork1. """ assert method in ("spearman", "pearson") - with open(database_filename, "r") as dataset_file: + with open(database_filename, "r") as dataset_file: # pytest: disable=[W1514] dataset = tuple(dataset_file.readlines()) good_dataset_samples = good_dataset_samples_indexes( @@ -290,12 +290,15 @@ def build_data_frame( if isinstance(zdata[0], float): return x_y_df.join(pandas.DataFrame({"z": zdata})) interm_df = x_y_df.join(pandas.DataFrame( - {"z{}".format(i): val for i, val in enumerate(zdata)})) + {f"z{i}": val for i, val in enumerate(zdata)})) if interm_df.shape[1] == 3: return interm_df.rename(columns={"z0": "z"}) return interm_df def compute_trait_info(primary_vals, control_vals, target, method): + """ + Compute the correlation values for the given arguments. + """ targ_vals = target[0] targ_name = target[1] primary = [ @@ -629,7 +632,7 @@ def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911] "status": "not-found", "message": "None of the requested control traits were found."} for trait in cntrl_traits: - if trait["haveinfo"] == False: + if trait["haveinfo"] is False: warnings.warn( (f"Control traits {trait['trait_fullname']} was not found " "- continuing without it."), diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py index d1ff4ac..b61bdae 100644 --- a/gn3/computations/qtlreaper.py +++ b/gn3/computations/qtlreaper.py @@ -27,7 +27,7 @@ def generate_traits_file(samples, trait_values, traits_filename): ["{}\t{}".format( len(trait_values), "\t".join([str(i) for i in t])) for t in trait_values[-1:]]) - with open(traits_filename, "w") as outfile: + with open(traits_filename, "w", encoding="utf8") as outfile: outfile.writelines(data) def create_output_directory(path: str): @@ -68,13 +68,13 @@ def run_reaper( The function will raise a `subprocess.CalledProcessError` exception in case of any errors running the `qtlreaper` command. """ - create_output_directory("{}/qtlreaper".format(output_dir)) - output_filename = "{}/qtlreaper/main_output_{}.txt".format( - output_dir, random_string(10)) + create_output_directory(f"{output_dir}/qtlreaper") + output_filename = ( + f"{output_dir}/qtlreaper/main_output_{random_string(10)}.txt") output_list = ["--main_output", output_filename] if separate_nperm_output: - permu_output_filename: Union[None, str] = "{}/qtlreaper/permu_output_{}.txt".format( - output_dir, random_string(10)) + permu_output_filename: Union[None, str] = ( + f"{output_dir}/qtlreaper/permu_output_{random_string(10)}.txt") output_list = output_list + [ "--permu_output", permu_output_filename] # type: ignore[list-item] else: @@ -135,7 +135,7 @@ def parse_reaper_main_results(results_file): """ Parse the results file of running QTLReaper into a list of dicts. """ - with open(results_file, "r") as infile: + with open(results_file, "r", encoding="utf8") as infile: lines = infile.readlines() def __parse_column_float_value(value): @@ -164,7 +164,7 @@ def parse_reaper_permutation_results(results_file): """ Parse the results QTLReaper permutations into a list of values. """ - with open(results_file, "r") as infile: + with open(results_file, "r", encoding="utf8") as infile: lines = infile.readlines() return [float(line.strip()) for line in lines] diff --git a/gn3/data_helpers.py b/gn3/data_helpers.py index b72fbc5..268a0bb 100644 --- a/gn3/data_helpers.py +++ b/gn3/data_helpers.py @@ -5,9 +5,9 @@ data structures. from math import ceil from functools import reduce -from typing import Any, Tuple, Sequence, Optional +from typing import Any, Tuple, Sequence, Optional, Generator -def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...]: +def partition_all(num: int, items: Sequence[Any]) -> Generator: """ Given a sequence `items`, return a new sequence of the same type as `items` with the data partitioned into sections of `n` items per partition. @@ -19,10 +19,9 @@ def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...] return acc + ((start, start + num),) iterations = range(ceil(len(items) / num)) - return tuple([# type: ignore[misc] - tuple(items[start:stop]) for start, stop # type: ignore[has-type] - in reduce( - __compute_start_stop__, iterations, tuple())]) + for start, stop in reduce(# type: ignore[misc] + __compute_start_stop__, iterations, tuple()): + yield tuple(items[start:stop]) # type: ignore[has-type] def partition_by(partition_fn, items): """ @@ -49,4 +48,4 @@ def parse_csv_line( function in GeneNetwork1. """ return tuple( - col.strip("{} \t\n".format(quoting)) for col in line.split(delimiter)) + col.strip(f"{quoting} \t\n") for col in line.split(delimiter)) diff --git a/gn3/db/correlations.py b/gn3/db/correlations.py index d372607..3ae66ca 100644 --- a/gn3/db/correlations.py +++ b/gn3/db/correlations.py @@ -26,9 +26,9 @@ def get_filename(conn: Any, target_db_name: str, text_files_dir: str) -> Union[ (target_db_name,)) result = cursor.fetchone() if result: - filename = "ProbeSetFreezeId_{tid}_FullName_{fname}.txt".format( - tid=result[0], - fname=result[1].replace(' ', '_').replace('/', '_')) + filename = ( + f"ProbeSetFreezeId_{result[0]}_FullName_" + f"{result[1].replace(' ', '_').replace('/', '_')}.txt") full_filename = f"{text_files_dir}/{filename}" return ( os.path.exists(full_filename) and @@ -53,7 +53,7 @@ def build_temporary_literature_table( query = { "rat": "SELECT rat FROM GeneIDXRef WHERE mouse=%s", "human": "SELECT human FROM GeneIDXRef WHERE mouse=%d"} - if species in query.keys(): + if species in query: cursor.execute(query[species], row[1]) record = cursor.fetchone() if record: @@ -160,8 +160,10 @@ def fetch_symbol_value_pair_dict( symbol: data_id_dict.get(symbol) for symbol in symbol_list if data_id_dict.get(symbol) is not None } - query = "SELECT Id, value FROM TissueProbeSetData WHERE Id IN ({})".format( - ",".join(f"%(id{i})s" for i in range(len(data_ids.values())))) + data_ids_fields = (f"%(id{i})s" for i in range(len(data_ids.values()))) + query = ( + "SELECT Id, value FROM TissueProbeSetData " + f"WHERE Id IN ({','.join(data_ids_fields)})") with conn.cursor() as cursor: cursor.execute( query, @@ -408,12 +410,12 @@ def fetch_sample_ids( `web.webqtl.correlation.CorrelationPage.fetchAllDatabaseData` function in GeneNetwork1. """ + samples_fields = (f"%(s{i})s" for i in range(len(sample_names))) query = ( "SELECT Strain.Id FROM Strain, Species " - "WHERE Strain.Name IN ({}) " + f"WHERE Strain.Name IN ({','.join(samples_fields)}) " "AND Strain.SpeciesId=Species.Id " - "AND Species.name=%(species_name)s").format( - ",".join(f"%(s{i})s" for i in range(len(sample_names)))) + "AND Species.name=%(species_name)s") with conn.cursor() as cursor: cursor.execute( query, diff --git a/gn3/db/datasets.py b/gn3/db/datasets.py index 1d6cdf8..b19db53 100644 --- a/gn3/db/datasets.py +++ b/gn3/db/datasets.py @@ -83,8 +83,7 @@ def retrieve_geno_trait_dataset_name( cursor.fetchone())) def retrieve_dataset_name( - trait_type: str, threshold: int, trait_name: str, dataset_name: str, - conn: Any): + trait_type: str, threshold: int, dataset_name: str, conn: Any): """ Retrieve the name of a trait given the trait's name @@ -184,7 +183,6 @@ def retrieve_temp_trait_dataset(): """ Retrieve the dataset that relates to `Temp` traits """ - # pylint: disable=[C0330] return { "searchfield": ["name", "description"], "disfield": ["name", "description"], @@ -198,7 +196,6 @@ def retrieve_geno_trait_dataset(): """ Retrieve the dataset that relates to `Geno` traits """ - # pylint: disable=[C0330] return { "searchfield": ["name", "chr"], "disfield": ["name", "chr", "mb", "source2", "sequence"], @@ -209,7 +206,6 @@ def retrieve_publish_trait_dataset(): """ Retrieve the dataset that relates to `Publish` traits """ - # pylint: disable=[C0330] return { "searchfield": [ "name", "post_publication_description", "abstract", "title", @@ -228,7 +224,6 @@ def retrieve_probeset_trait_dataset(): """ Retrieve the dataset that relates to `ProbeSet` traits """ - # pylint: disable=[C0330] return { "searchfield": [ "name", "description", "probe_target_description", "symbol", @@ -259,8 +254,7 @@ def retrieve_trait_dataset(trait_type, trait, threshold, conn): "dataset_id": None, "dataset_name": trait["db"]["dataset_name"], **retrieve_dataset_name( - trait_type, threshold, trait["trait_name"], - trait["db"]["dataset_name"], conn) + trait_type, threshold, trait["db"]["dataset_name"], conn) } group = retrieve_group_fields( trait_type, trait["trait_name"], dataset_name_info, conn) diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py index 8f18cac..0e19a5f 100644 --- a/gn3/db/genotypes.py +++ b/gn3/db/genotypes.py @@ -2,7 +2,6 @@ import os import gzip -from typing import Union, TextIO from gn3.settings import GENOTYPE_FILES @@ -10,7 +9,7 @@ def build_genotype_file( geno_name: str, base_dir: str = GENOTYPE_FILES, extension: str = "geno"): """Build the absolute path for the genotype file.""" - return "{}/{}.{}".format(os.path.abspath(base_dir), geno_name, extension) + return f"{os.path.abspath(base_dir)}/{geno_name}.{extension}" def load_genotype_samples(genotype_filename: str, file_type: str = "geno"): """ @@ -44,22 +43,23 @@ def __load_genotype_samples_from_geno(genotype_filename: str): Loads samples from '.geno' files. """ - gzipped_filename = "{}.gz".format(genotype_filename) + def __remove_comments_and_empty_lines__(rows): + return( + line for line in rows + if line and not line.startswith(("#", "@"))) + + gzipped_filename = f"{genotype_filename}.gz" if os.path.isfile(gzipped_filename): - genofile: Union[TextIO, gzip.GzipFile] = gzip.open(gzipped_filename) + with gzip.open(gzipped_filename) as genofile: + rows = __remove_comments_and_empty_lines__(genofile.readlines()) else: - genofile = open(genotype_filename) - - for row in genofile: - line = row.strip() - if (not line) or (line.startswith(("#", "@"))): # type: ignore[arg-type] - continue - break + with open(genotype_filename, encoding="utf8") as genofile: + rows = __remove_comments_and_empty_lines__(genofile.readlines()) - headers = line.split("\t") # type: ignore[arg-type] + headers = next(rows).split() # type: ignore[arg-type] if headers[3] == "Mb": - return headers[4:] - return headers[3:] + return tuple(headers[4:]) + return tuple(headers[3:]) def __load_genotype_samples_from_plink(genotype_filename: str): """ @@ -67,8 +67,8 @@ def __load_genotype_samples_from_plink(genotype_filename: str): Loads samples from '.plink' files. """ - genofile = open(genotype_filename) - return [line.split(" ")[1] for line in genofile] + with open(genotype_filename, encoding="utf8") as genofile: + return tuple(line.split()[1] for line in genofile) def parse_genotype_labels(lines: list): """ @@ -129,7 +129,7 @@ def parse_genotype_marker(line: str, geno_obj: dict, parlist: tuple): alleles = marker_row[start_pos:] genotype = tuple( - (geno_table[allele] if allele in geno_table.keys() else "U") + (geno_table[allele] if allele in geno_table else "U") for allele in alleles) if len(parlist) > 0: genotype = (-1, 1) + genotype @@ -164,7 +164,7 @@ def parse_genotype_file(filename: str, parlist: tuple = tuple()): """ Parse the provided genotype file into a usable pytho3 data structure. """ - with open(filename, "r") as infile: + with open(filename, "r", encoding="utf8") as infile: contents = infile.readlines() lines = tuple(line for line in contents if @@ -175,10 +175,10 @@ def parse_genotype_file(filename: str, parlist: tuple = tuple()): data_lines = tuple(line for line in lines if not line.startswith("@")) header = parse_genotype_header(data_lines[0], parlist) geno_obj = dict(labels + header) - markers = tuple( - [parse_genotype_marker(line, geno_obj, parlist) - for line in data_lines[1:]]) + markers = ( + parse_genotype_marker(line, geno_obj, parlist) + for line in data_lines[1:]) chromosomes = tuple( dict(chromosome) for chromosome in - build_genotype_chromosomes(geno_obj, markers)) + build_genotype_chromosomes(geno_obj, tuple(markers))) return {**geno_obj, "chromosomes": chromosomes} diff --git a/gn3/db/partial_correlations.py b/gn3/db/partial_correlations.py index 0075cad..a28b111 100644 --- a/gn3/db/partial_correlations.py +++ b/gn3/db/partial_correlations.py @@ -48,9 +48,8 @@ def temp_traits_data(conn, traits): "FROM TempData, Temp, Strain " "WHERE TempData.StrainId = Strain.Id " "AND TempData.Id = Temp.DataId " - "AND Temp.name IN ({}) " - "ORDER BY Strain.Name").format( - ", ".join(["%s"] * len(traits))) + "AND Temp.name IN ({', '.join(['%s'] * len(traits))}) " + "ORDER BY Strain.Name") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( query, @@ -79,12 +78,11 @@ def publish_traits_data(conn, traits): "AND NStrain.StrainId = PublishData.StrainId) " "WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId " "AND PublishData.Id = PublishXRef.DataId " - "AND PublishXRef.Id IN ({trait_names}) " - "AND PublishFreeze.Id IN ({dataset_ids}) " + f"AND PublishXRef.Id IN ({', '.join(['%s'] * len(traits))}) " + "AND PublishFreeze.Id IN " + f"({', '.join(['%s'] * len(dataset_ids))}) " "AND PublishData.StrainId = Strain.Id " - "ORDER BY Strain.Name").format( - trait_names=", ".join(["%s"] * len(traits)), - dataset_ids=", ".join(["%s"] * len(dataset_ids))) + "ORDER BY Strain.Name") if len(dataset_ids) > 0: with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( @@ -109,19 +107,16 @@ def cellid_traits_data(conn, traits): "LEFT JOIN ProbeSE " "ON (ProbeSE.DataId = ProbeData.Id " "AND ProbeSE.StrainId = ProbeData.StrainId) " - "WHERE Probe.Name IN ({cellids}) " - "AND ProbeSet.Name IN ({trait_names}) " + f"WHERE Probe.Name IN ({', '.join(['%s'] * len(cellids))}) " + f"AND ProbeSet.Name IN ({', '.join(['%s'] * len(traits))}) " "AND Probe.ProbeSetId = ProbeSet.Id " "AND ProbeXRef.ProbeId = Probe.Id " "AND ProbeXRef.ProbeFreezeId = ProbeFreeze.Id " "AND ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id " - "AND ProbeSetFreeze.Name IN ({dataset_names}) " + f"AND ProbeSetFreeze.Name IN ({', '.join(['%s'] * len(dataset_names))}) " "AND ProbeXRef.DataId = ProbeData.Id " "AND ProbeData.StrainId = Strain.Id " - "ORDER BY Strain.Name").format( - cellids=", ".join(["%s"] * len(cellids)), - trait_names=", ".join(["%s"] * len(traits)), - dataset_names=", ".join(["%s"] * len(dataset_names))) + "ORDER BY Strain.Name") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( query, @@ -143,15 +138,13 @@ def probeset_traits_data(conn, traits): "LEFT JOIN ProbeSetSE ON " "(ProbeSetSE.DataId = ProbeSetData.Id " "AND ProbeSetSE.StrainId = ProbeSetData.StrainId) " - "WHERE ProbeSet.Name IN ({trait_names}) " + f"WHERE ProbeSet.Name IN ({', '.join(['%s'] * len(traits))})" "AND ProbeSetXRef.ProbeSetId = ProbeSet.Id " "AND ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id " - "AND ProbeSetFreeze.Name IN ({dataset_names}) " + f"AND ProbeSetFreeze.Name IN ({', '.join(['%s']*len(dataset_names))}) " "AND ProbeSetXRef.DataId = ProbeSetData.Id " "AND ProbeSetData.StrainId = Strain.Id " - "ORDER BY Strain.Name").format( - trait_names=", ".join(["%s"] * len(traits)), - dataset_names=", ".join(["%s"] * len(dataset_names))) + "ORDER BY Strain.Name") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( query, @@ -170,8 +163,7 @@ def species_ids(conn, traits): query = ( "SELECT Name AS `group`, SpeciesId AS species_id " "FROM InbredSet " - "WHERE Name IN ({groups})").format( - groups=", ".join(["%s"] * len(groups))) + f"WHERE Name IN ({', '.join(['%s'] * len(groups))})") if len(groups) > 0: with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute(query, groups) @@ -190,16 +182,14 @@ def geno_traits_data(conn, traits): "FROM (GenoData, GenoFreeze, Strain, Geno, GenoXRef) " "LEFT JOIN GenoSE ON " "(GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId) " - "WHERE Geno.SpeciesId IN ({species_ids}) " - "AND Geno.Name IN ({trait_names}) AND GenoXRef.GenoId = Geno.Id " + f"WHERE Geno.SpeciesId IN ({', '.join(['%s'] * len(sp_ids))}) " + f"AND Geno.Name IN ({', '.join(['%s'] * len(traits))}) " + "AND GenoXRef.GenoId = Geno.Id " "AND GenoXRef.GenoFreezeId = GenoFreeze.Id " - "AND GenoFreeze.Name IN ({dataset_names}) " + f"AND GenoFreeze.Name IN ({', '.join(['%s'] * len(dataset_names))}) " "AND GenoXRef.DataId = GenoData.Id " "AND GenoData.StrainId = Strain.Id " - "ORDER BY Strain.Name").format( - species_ids=", ".join(["%s"] * len(sp_ids)), - trait_names=", ".join(["%s"] * len(traits)), - dataset_names=", ".join(["%s"] * len(dataset_names))) + "ORDER BY Strain.Name") if len(sp_ids) > 0 and len(dataset_names) > 0: with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( @@ -309,18 +299,16 @@ def publish_traits_info( "PublishXRef.Sequence, Phenotype.Units, PublishXRef.comments") query = ( "SELECT " - "PublishXRef.Id AS trait_name, {columns} " + f"PublishXRef.Id AS trait_name, {columns} " "FROM " "PublishXRef, Publication, Phenotype, PublishFreeze " "WHERE " - "PublishXRef.Id IN ({trait_names}) " + f"PublishXRef.Id IN ({', '.join(['%s'] * len(traits))}) " "AND Phenotype.Id = PublishXRef.PhenotypeId " "AND Publication.Id = PublishXRef.PublicationId " "AND PublishXRef.InbredSetId = PublishFreeze.InbredSetId " - "AND PublishFreeze.Id IN ({trait_dataset_ids})").format( - columns=columns, - trait_names=", ".join(["%s"] * len(traits)), - trait_dataset_ids=", ".join(["%s"] * len(trait_dataset_ids))) + "AND PublishFreeze.Id IN " + f"({', '.join(['%s'] * len(trait_dataset_ids))})") if trait_dataset_ids: with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( @@ -337,25 +325,24 @@ def probeset_traits_info( Retrieve information for the probeset traits """ dataset_names = set(trait["db"]["dataset_name"] for trait in traits) - keys = ( - "name", "symbol", "description", "probe_target_description", "chr", - "mb", "alias", "geneid", "genbankid", "unigeneid", "omim", - "refseq_transcriptid", "blatseq", "targetseq", "chipid", "comments", - "strand_probe", "strand_gene", "probe_set_target_region", "proteinid", - "probe_set_specificity", "probe_set_blat_score", - "probe_set_blat_mb_start", "probe_set_blat_mb_end", "probe_set_strand", - "probe_set_note_by_rw", "flag") + columns = ", ".join( + [f"ProbeSet.{x}" for x in + ("name", "symbol", "description", "probe_target_description", "chr", + "mb", "alias", "geneid", "genbankid", "unigeneid", "omim", + "refseq_transcriptid", "blatseq", "targetseq", "chipid", "comments", + "strand_probe", "strand_gene", "probe_set_target_region", "proteinid", + "probe_set_specificity", "probe_set_blat_score", + "probe_set_blat_mb_start", "probe_set_blat_mb_end", + "probe_set_strand", "probe_set_note_by_rw", "flag")]) query = ( - "SELECT ProbeSet.Name AS trait_name, {columns} " + f"SELECT ProbeSet.Name AS trait_name, {columns} " "FROM ProbeSet INNER JOIN ProbeSetXRef " "ON ProbeSetXRef.ProbeSetId = ProbeSet.Id " "INNER JOIN ProbeSetFreeze " "ON ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId " - "WHERE ProbeSetFreeze.Name IN ({dataset_names}) " - "AND ProbeSet.Name IN ({trait_names})").format( - columns=", ".join(["ProbeSet.{}".format(x) for x in keys]), - dataset_names=", ".join(["%s"] * len(dataset_names)), - trait_names=", ".join(["%s"] * len(traits))) + "WHERE ProbeSetFreeze.Name IN " + f"({', '.join(['%s'] * len(dataset_names))}) " + f"AND ProbeSet.Name IN ({', '.join(['%s'] * len(traits))})") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( query, @@ -372,18 +359,16 @@ def geno_traits_info( This is a rework of the `gn3.db.traits.retrieve_geno_trait_info` function. """ dataset_names = set(trait["db"]["dataset_name"] for trait in traits) - keys = ("name", "chr", "mb", "source2", "sequence") + columns = ", ".join([ + f"Geno.{x}" for x in ("name", "chr", "mb", "source2", "sequence")]) query = ( "SELECT " - "Geno.Name AS trait_name, {columns} " + f"Geno.Name AS trait_name, {columns} " "FROM " "Geno INNER JOIN GenoXRef ON GenoXRef.GenoId = Geno.Id " "INNER JOIN GenoFreeze ON GenoFreeze.Id = GenoXRef.GenoFreezeId " - "WHERE GenoFreeze.Name IN ({dataset_names}) " - "AND Geno.Name IN ({trait_names})").format( - columns=", ".join(["Geno.{}".format(x) for x in keys]), - dataset_names=", ".join(["%s"] * len(dataset_names)), - trait_names=", ".join(["%s"] * len(traits))) + f"WHERE GenoFreeze.Name IN ({', '.join(['%s'] * len(dataset_names))}) " + f"AND Geno.Name IN ({', '.join(['%s'] * len(traits))})") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( query, @@ -399,12 +384,9 @@ def temp_traits_info( A rework of the `gn3.db.traits.retrieve_temp_trait_info` function. """ - keys = ("name", "description") query = ( - "SELECT Name as trait_name, {columns} FROM Temp " - "WHERE Name = ({trait_names})").format( - columns=", ".join(keys), - trait_names=", ".join(["%s"] * len(traits))) + "SELECT Name as trait_name, name, description FROM Temp " + "WHERE Name IN ({', '.join(['%s'] * len(traits))})") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( query, @@ -468,8 +450,7 @@ def publish_datasets_groups(conn: Any, dataset_names: Tuple[str]): "InbredSet.Id " "FROM InbredSet, PublishFreeze " "WHERE PublishFreeze.InbredSetId = InbredSet.Id " - "AND PublishFreeze.Name IN ({dataset_names})").format( - dataset_names=", ".join(["%s"] * len(dataset_names))) + "AND PublishFreeze.Name IN ({', '.join(['%s'] * len(dataset_names))})") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute(query, tuple(dataset_names)) return organise_groups_by_dataset(cursor.fetchall()) @@ -519,8 +500,7 @@ def probeset_datasets_groups(conn, dataset_names): "FROM InbredSet, ProbeSetFreeze, ProbeFreeze " "WHERE ProbeFreeze.InbredSetId = InbredSet.Id " "AND ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId " - "AND ProbeSetFreeze.Name IN ({names})").format( - names=", ".join(["%s"] * len(dataset_names))) + "AND ProbeSetFreeze.Name IN ({', '.join(['%s'] * len(dataset_names))})") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute(query, tuple(dataset_names)) return organise_groups_by_dataset(cursor.fetchall()) @@ -567,8 +547,7 @@ def geno_datasets_groups(conn, dataset_names): "SELECT GenoFreeze.Name AS dataset_name, InbredSet.Name, InbredSet.Id " "FROM InbredSet, GenoFreeze " "WHERE GenoFreeze.InbredSetId = InbredSet.Id " - "AND GenoFreeze.Name IN ({names})").format( - names=", ".join(["%s"] * len(dataset_names))) + "AND GenoFreeze.Name IN ({', '.join(['%s'] * len(dataset_names))})") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute(query, tuple(dataset_names)) return organise_groups_by_dataset(cursor.fetchall()) @@ -596,14 +575,13 @@ def temp_datasets_groups(conn, dataset_names): "SELECT Temp.Name AS dataset_name, InbredSet.Name, InbredSet.Id " "FROM InbredSet, Temp " "WHERE Temp.InbredSetId = InbredSet.Id " - "AND Temp.Name IN ({names})").format( - names=", ".join(["%s"] * len(dataset_names))) + "AND Temp.Name IN ({', '.join(['%s'] * len(dataset_names))})") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute(query, tuple(dataset_names)) return organise_groups_by_dataset(cursor.fetchall()) return {} -def temp_traits_datasets(conn: Any, threshold: int, traits: Tuple[Dict]): +def temp_traits_datasets(conn: Any, threshold: int, traits: Tuple[Dict]): #pylint: disable=[W0613] """ Retrieve datasets for 'Temp' traits. """ @@ -657,11 +635,9 @@ def set_publish_qtl_info(conn, qtl, traits): "SELECT PublishXRef.Id AS trait_name, PublishXRef.Locus, " "PublishXRef.LRS, PublishXRef.additive " "FROM PublishXRef, PublishFreeze " - "WHERE PublishXRef.Id IN ({trait_names}) " + f"WHERE PublishXRef.Id IN ({', '.join(['%s'] * len(traits))}) " "AND PublishXRef.InbredSetId = PublishFreeze.InbredSetId " - "AND PublishFreeze.Id IN ({dataset_ids})").format( - trait_names=", ".join(["%s"] * len(traits)), - dataset_ids=", ".join(["%s"] * len(dataset_ids))) + f"AND PublishFreeze.Id IN ({', '.join(['%s'] * len(dataset_ids))})") return query_qtl_info(conn, query, traits, tuple(dataset_ids)) return traits @@ -677,10 +653,9 @@ def set_probeset_qtl_info(conn, qtl, traits): "ProbeSetXRef.mean, ProbeSetXRef.additive " "FROM ProbeSetXRef, ProbeSet " "WHERE ProbeSetXRef.ProbeSetId = ProbeSet.Id " - " AND ProbeSet.Name IN ({trait_names}) " - "AND ProbeSetXRef.ProbeSetFreezeId IN ({dataset_ids})").format( - trait_names=", ".join(["%s"] * len(traits)), - dataset_ids=", ".join(["%s"] * len(dataset_ids))) + f"AND ProbeSet.Name IN ({', '.join(['%s'] * len(traits))}) " + "AND ProbeSetXRef.ProbeSetFreezeId IN " + f"({', '.join(['%s'] * len(dataset_ids))})") return query_qtl_info(conn, query, traits, tuple(dataset_ids)) return traits @@ -694,10 +669,8 @@ def set_sequence(conn, traits): "FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef " "WHERE ProbeSet.Id=ProbeSetXRef.ProbeSetId " "AND ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId " - "AND ProbeSet.Name IN ({trait_names}) " - "AND ProbeSetFreeze.Name IN ({dataset_names})").format( - trait_names=", ".join(["%s"] * len(traits)), - dataset_names=", ".join(["%s"] * len(dataset_names))) + f"AND ProbeSet.Name IN ({', '.join(['%s'] * len(traits))}) " + f"AND ProbeSetFreeze.Name IN ({', '.join(['%s'] * len(dataset_names))})") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( query, @@ -727,12 +700,10 @@ def set_homologene_id(conn, traits): "SELECT InbredSet.Name AS `group`, Homologene.GeneId AS geneid, " "HomologeneId " "FROM Homologene, Species, InbredSet " - "WHERE Homologene.GeneId IN ({geneids}) " - "AND InbredSet.Name IN ({groups}) " + f"WHERE Homologene.GeneId IN ({', '.join(['%s'] * len(geneids))}) " + f"AND InbredSet.Name IN ({', '.join(['%s'] * len(groups))}) " "AND InbredSet.SpeciesId = Species.Id " - "AND Species.TaxonomyId = Homologene.TaxonomyId").format( - geneids=", ".join(["%s"] * len(geneids)), - groups=", ".join(["%s"] * len(groups))) + "AND Species.TaxonomyId = Homologene.TaxonomyId") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute(query, (tuple(geneids) + tuple(groups))) results = { diff --git a/gn3/db/traits.py b/gn3/db/traits.py index a7e7e7c..90d1e9d 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -69,7 +69,7 @@ def export_trait_data( return accumulator + (trait_data["data"][sample]["ndata"], ) if dtype == "all": return accumulator + __export_all_types(trait_data["data"], sample) - raise KeyError("Type `%s` is incorrect" % dtype) + raise KeyError(f"Type `{dtype}` is incorrect") if var_exists and n_exists: return accumulator + (None, None, None) if var_exists or n_exists: @@ -125,9 +125,8 @@ def update_sample_data(conn: Any, #pylint: disable=[R0913] "PublishXRef.Id = %s AND " "PublishXRef.PhenotypeId = %s " "AND PublishData.StrainId = Strain.Id " - "AND Strain.Name = \"%s\"") % (trait_name, - phenotype_id, - str(strain_name))) + "AND Strain.Name = %s"), + (trait_name, phenotype_id, str(strain_name))) strain_id, data_id = cursor.fetchone() updated_published_data: int = 0 updated_se_data: int = 0 @@ -137,8 +136,8 @@ def update_sample_data(conn: Any, #pylint: disable=[R0913] # Update the PublishData table if value == "x": cursor.execute(("DELETE FROM PublishData " - "WHERE StrainId = %s AND Id = %s") - % (strain_id, data_id)) + "WHERE StrainId = %s AND Id = %s"), + (strain_id, data_id)) updated_published_data = cursor.rowcount else: cursor.execute(("UPDATE PublishData SET value = %s " @@ -148,19 +147,20 @@ def update_sample_data(conn: Any, #pylint: disable=[R0913] if not updated_published_data: cursor.execute( - "SELECT * FROM " - "PublishData WHERE StrainId = " - "%s AND Id = %s" % (strain_id, data_id)) + ("SELECT * FROM " + "PublishData WHERE StrainId = " + "%s AND Id = %s"), + (strain_id, data_id)) if not cursor.fetchone(): cursor.execute(("INSERT INTO PublishData (Id, StrainId, " - " value) VALUES (%s, %s, %s)") % + " value) VALUES (%s, %s, %s)"), (data_id, strain_id, value)) updated_published_data = cursor.rowcount # Update the PublishSE table if error == "x": cursor.execute(("DELETE FROM PublishSE " - "WHERE StrainId = %s AND DataId = %s") % + "WHERE StrainId = %s AND DataId = %s"), (strain_id, data_id)) updated_se_data = cursor.rowcount else: @@ -171,21 +171,22 @@ def update_sample_data(conn: Any, #pylint: disable=[R0913] updated_se_data = cursor.rowcount if not updated_se_data: cursor.execute( - "SELECT * FROM " - "PublishSE WHERE StrainId = " - "%s AND DataId = %s" % (strain_id, data_id)) + ("SELECT * FROM " + "PublishSE WHERE StrainId = " + "%s AND DataId = %s"), + (strain_id, data_id)) if not cursor.fetchone(): - cursor.execute(("INSERT INTO PublishSE (StrainId, DataId, " - " error) VALUES (%s, %s, %s)") % - (strain_id, data_id, - None if error == "x" else error)) + cursor.execute( + ("INSERT INTO PublishSE (StrainId, DataId, " + " error) VALUES (%s, %s, %s)"), + (strain_id, data_id, None if error == "x" else error)) updated_se_data = cursor.rowcount # Update the NStrain table if count == "x": cursor.execute(("DELETE FROM NStrain " - "WHERE StrainId = %s AND DataId = %s" % - (strain_id, data_id))) + "WHERE StrainId = %s AND DataId = %s"), + (strain_id, data_id)) updated_n_strains = cursor.rowcount else: cursor.execute(("UPDATE NStrain SET count = %s " @@ -194,13 +195,14 @@ def update_sample_data(conn: Any, #pylint: disable=[R0913] updated_n_strains = cursor.rowcount if not updated_n_strains: cursor.execute( - "SELECT * FROM " - "NStrain WHERE StrainId = " - "%s AND DataId = %s" % (strain_id, data_id)) + ("SELECT * FROM " + "NStrain WHERE StrainId = " + "%s AND DataId = %s"), + (strain_id, data_id)) if not cursor.fetchone(): cursor.execute(("INSERT INTO NStrain " "(StrainId, DataId, count) " - "VALUES (%s, %s, %s)") % + "VALUES (%s, %s, %s)"), (strain_id, data_id, count)) updated_n_strains = cursor.rowcount return (updated_published_data, @@ -237,9 +239,8 @@ def delete_sample_data(conn: Any, "PublishXRef.Id = %s AND " "PublishXRef.PhenotypeId = %s " "AND PublishData.StrainId = Strain.Id " - "AND Strain.Name = \"%s\"") % (trait_name, - phenotype_id, - str(strain_name))) + "AND Strain.Name = %s"), + (trait_name, phenotype_id, str(strain_name))) # Check if it exists if the data was already deleted: if _result := cursor.fetchone(): @@ -248,20 +249,20 @@ def delete_sample_data(conn: Any, # Only run if the strain_id and data_id exist if strain_id and data_id: cursor.execute(("DELETE FROM PublishData " - "WHERE StrainId = %s AND Id = %s") - % (strain_id, data_id)) + "WHERE StrainId = %s AND Id = %s"), + (strain_id, data_id)) deleted_published_data = cursor.rowcount # Delete the PublishSE table cursor.execute(("DELETE FROM PublishSE " - "WHERE StrainId = %s AND DataId = %s") % + "WHERE StrainId = %s AND DataId = %s"), (strain_id, data_id)) deleted_se_data = cursor.rowcount # Delete the NStrain table cursor.execute(("DELETE FROM NStrain " - "WHERE StrainId = %s AND DataId = %s" % - (strain_id, data_id))) + "WHERE StrainId = %s AND DataId = %s"), + (strain_id, data_id)) deleted_n_strains = cursor.rowcount except Exception as e: #pylint: disable=[C0103, W0612] conn.rollback() @@ -312,7 +313,7 @@ def insert_sample_data(conn: Any, #pylint: disable=[R0913] # Insert into the PublishSE table if error is specified if error and error != "x": cursor.execute(("INSERT INTO PublishSE (StrainId, DataId, " - " error) VALUES (%s, %s, %s)") % + " error) VALUES (%s, %s, %s)"), (strain_id, data_id, error)) inserted_se_data = cursor.rowcount @@ -320,7 +321,7 @@ def insert_sample_data(conn: Any, #pylint: disable=[R0913] if count and count != "x": cursor.execute(("INSERT INTO NStrain " "(StrainId, DataId, count) " - "VALUES (%s, %s, %s)") % + "VALUES (%s, %s, %s)"), (strain_id, data_id, count)) inserted_n_strains = cursor.rowcount except Exception as e: #pylint: disable=[C0103, W0612] @@ -356,14 +357,14 @@ def retrieve_publish_trait_info(trait_data_source: Dict[str, Any], conn: Any): "PublishXRef.comments") query = ( "SELECT " - "{columns} " + f"{columns} " "FROM " "PublishXRef, Publication, Phenotype " "WHERE " "PublishXRef.Id = %(trait_name)s AND " "Phenotype.Id = PublishXRef.PhenotypeId AND " "Publication.Id = PublishXRef.PublicationId AND " - "PublishXRef.InbredSetId = %(trait_dataset_id)s").format(columns=columns) + "PublishXRef.InbredSetId = %(trait_dataset_id)s") with conn.cursor() as cursor: cursor.execute( query, @@ -399,17 +400,16 @@ def retrieve_probeset_trait_info(trait_data_source: Dict[str, Any], conn: Any): "probe_set_specificity", "probe_set_blat_score", "probe_set_blat_mb_start", "probe_set_blat_mb_end", "probe_set_strand", "probe_set_note_by_rw", "flag") + columns = (f"ProbeSet.{x}" for x in keys) query = ( - "SELECT " - "{columns} " + f"SELECT {','.join(columns)} " "FROM " "ProbeSet, ProbeSetFreeze, ProbeSetXRef " "WHERE " "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND " "ProbeSetXRef.ProbeSetId = ProbeSet.Id AND " "ProbeSetFreeze.Name = %(trait_dataset_name)s AND " - "ProbeSet.Name = %(trait_name)s").format( - columns=", ".join(["ProbeSet.{}".format(x) for x in keys])) + "ProbeSet.Name = %(trait_name)s") with conn.cursor() as cursor: cursor.execute( query, @@ -425,16 +425,15 @@ def retrieve_geno_trait_info(trait_data_source: Dict[str, Any], conn: Any): https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L438-L449""" keys = ("name", "chr", "mb", "source2", "sequence") + columns = ", ".join(f"Geno.{x}" for x in keys) query = ( - "SELECT " - "{columns} " + f"SELECT {columns} " "FROM " - "Geno, GenoFreeze, GenoXRef " + "Geno INNER JOIN GenoXRef ON GenoXRef.GenoId = Geno.Id " + "INNER JOIN GenoFreeze ON GenoFreeze.Id = GenoXRef.GenoFreezeId " "WHERE " - "GenoXRef.GenoFreezeId = GenoFreeze.Id AND GenoXRef.GenoId = Geno.Id AND " "GenoFreeze.Name = %(trait_dataset_name)s AND " - "Geno.Name = %(trait_name)s").format( - columns=", ".join(["Geno.{}".format(x) for x in keys])) + "Geno.Name = %(trait_name)s") with conn.cursor() as cursor: cursor.execute( query, @@ -451,8 +450,8 @@ def retrieve_temp_trait_info(trait_data_source: Dict[str, Any], conn: Any): https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L450-452""" keys = ("name", "description") query = ( - "SELECT {columns} FROM Temp " - "WHERE Name = %(trait_name)s").format(columns=", ".join(keys)) + f"SELECT {', '.join(keys)} FROM Temp " + "WHERE Name = %(trait_name)s") with conn.cursor() as cursor: cursor.execute( query, @@ -577,7 +576,7 @@ def load_qtl_info(qtl, trait_type, trait_info, conn): "Publish": load_publish_qtl_info, "ProbeSet": load_probeset_qtl_info } - if trait_info["name"] not in qtl_info_functions.keys(): + if trait_info["name"] not in qtl_info_functions: return trait_info return qtl_info_functions[trait_type](trait_info, conn) @@ -947,8 +946,8 @@ def retrieve_trait_data(trait: dict, conn: Any, samplelist: Sequence[str] = tupl def generate_traits_filename(base_path: str = TMPDIR): """Generate a unique filename for use with generated traits files.""" - return "{}/traits_test_file_{}.txt".format( - os.path.abspath(base_path), random_string(10)) + return ( + f"{os.path.abspath(base_path)}/traits_test_file_{random_string(10)}.txt") def export_informative(trait_data: dict, inc_var: bool = False) -> tuple: diff --git a/gn3/fs_helpers.py b/gn3/fs_helpers.py index 73f6567..e2f7ee2 100644 --- a/gn3/fs_helpers.py +++ b/gn3/fs_helpers.py @@ -71,9 +71,8 @@ contents to TARGET_DIR/<dir-hash>. os.mkdir(os.path.join(target_dir, token)) gzipped_file.save(tar_target_loc) # Extract to "tar_target_loc/token" - tar = tarfile.open(tar_target_loc) - tar.extractall(path=os.path.join(target_dir, token)) - tar.close() + with tarfile.open(tar_target_loc) as tar: + tar.extractall(path=os.path.join(target_dir, token)) # pylint: disable=W0703 except Exception: return {"status": 128, "error": "gzip failed to unpack file"} diff --git a/gn3/heatmaps.py b/gn3/heatmaps.py index f0af409..91437bb 100644 --- a/gn3/heatmaps.py +++ b/gn3/heatmaps.py @@ -40,16 +40,15 @@ def trait_display_name(trait: Dict): if trait["db"]["dataset_type"] == "Temp": desc = trait["description"] if desc.find("PCA") >= 0: - return "%s::%s" % ( - trait["db"]["displayname"], - desc[desc.rindex(':')+1:].strip()) - return "%s::%s" % ( - trait["db"]["displayname"], - desc[:desc.index('entered')].strip()) - prefix = "%s::%s" % ( - trait["db"]["dataset_name"], trait["trait_name"]) + return ( + f'{trait["db"]["displayname"]}::' + f'{desc[desc.rindex(":")+1:].strip()}') + return ( + f'{trait["db"]["displayname"]}::' + f'{desc[:desc.index("entered")].strip()}') + prefix = f'{trait["db"]["dataset_name"]}::{trait["trait_name"]}' if trait["cellid"]: - return "%s::%s" % (prefix, trait["cellid"]) + return '{prefix}::{trait["cellid"]}' return prefix return trait["description"] @@ -132,8 +131,7 @@ def build_heatmap( traits_order = compute_traits_order(slinked) samples_and_values = retrieve_samples_and_values( traits_order, samples, exported_traits_data_list) - traits_filename = "{}/traits_test_file_{}.txt".format( - TMPDIR, random_string(10)) + traits_filename = f"{TMPDIR}/traits_test_file_{random_string(10)}.txt" generate_traits_file( samples_and_values[0][1], [t[2] for t in samples_and_values], @@ -310,7 +308,7 @@ def clustered_heatmap( vertical_spacing=0.010, horizontal_spacing=0.001, subplot_titles=["" if vertical else x_axis["label"]] + [ - "Chromosome: {}".format(chromo) if vertical else chromo + f"Chromosome: {chromo}" if vertical else chromo for chromo in x_axis_data],#+ x_axis_data, figure=ff.create_dendrogram( np.array(clustering_data), @@ -332,7 +330,7 @@ def clustered_heatmap( col=(1 if vertical else (i + 2))) axes_layouts = { - "{axis}axis{count}".format( + "{axis}axis{count}".format( # pylint: disable=[C0209] axis=("y" if vertical else "x"), count=(i+1 if i > 0 else "")): { "mirror": False, @@ -341,12 +339,10 @@ def clustered_heatmap( } for i in range(num_plots)} - print("vertical?: {} ==> {}".format("T" if vertical else "F", axes_layouts)) - fig.update_layout({ "width": 800 if vertical else 4000, "height": 4000 if vertical else 800, - "{}axis".format("x" if vertical else "y"): { + "{}axis".format("x" if vertical else "y"): { # pylint: disable=[C0209] "mirror": False, "ticks": "", "side": "top" if vertical else "left", @@ -354,7 +350,7 @@ def clustered_heatmap( "tickangle": 90 if vertical else 0, "ticklabelposition": "outside top" if vertical else "outside left" }, - "{}axis".format("y" if vertical else "x"): { + "{}axis".format("y" if vertical else "x"): { # pylint: disable=[C0209] "mirror": False, "showgrid": True, "title": "Distance", diff --git a/gn3/settings.py b/gn3/settings.py index 87e8f4b..6eec2a1 100644 --- a/gn3/settings.py +++ b/gn3/settings.py @@ -27,11 +27,11 @@ GN2_BASE_URL = "http://www.genenetwork.org/" # wgcna script WGCNA_RSCRIPT = "wgcna_analysis.R" # qtlreaper command -REAPER_COMMAND = "{}/bin/qtlreaper".format(os.environ.get("GUIX_ENVIRONMENT")) +REAPER_COMMAND = f"{os.environ.get('GUIX_ENVIRONMENT')}/bin/qtlreaper" # genotype files GENOTYPE_FILES = os.environ.get( - "GENOTYPE_FILES", "{}/genotype_files/genotype".format(os.environ.get("HOME"))) + "GENOTYPE_FILES", f"{os.environ.get('HOME')}/genotype_files/genotype") # CROSS-ORIGIN SETUP def parse_env_cors(default): @@ -4,4 +4,5 @@ markers = slow unit_test integration_test - performance_test
\ No newline at end of file + performance_test + under_dev
\ No newline at end of file diff --git a/tests/performance/perf_query.py b/tests/performance/perf_query.py index 12cb944..c22dcf5 100644 --- a/tests/performance/perf_query.py +++ b/tests/performance/perf_query.py @@ -42,7 +42,7 @@ def query_executor(query: str, def fetch_probeset_query(dataset_name: str): """contains queries for datasets""" - query = """SELECT * from ProbeSetData + query = f"""SELECT * from ProbeSetData where StrainID in (4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 17, 18, 19, 20, 21, 22, 24, 25, 26, 28, 29, 30, 31, 35, 36, 37, 39, 98, 99, 100, 103, @@ -53,8 +53,8 @@ def fetch_probeset_query(dataset_name: str): and id in (SELECT ProbeSetXRef.DataId FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id - and ProbeSetFreeze.Name = '{}' - and ProbeSet.Id = ProbeSetXRef.ProbeSetId)""".format(dataset_name) + and ProbeSetFreeze.Name = '{dataset_name}' + and ProbeSet.Id = ProbeSetXRef.ProbeSetId)""" return query diff --git a/tests/unit/computations/test_correlation.py b/tests/unit/computations/test_correlation.py index 69d4c52..267ced3 100644 --- a/tests/unit/computations/test_correlation.py +++ b/tests/unit/computations/test_correlation.py @@ -1,10 +1,10 @@ """Module contains the tests for correlation""" +import math from unittest import TestCase from unittest import mock +from collections import namedtuple import pytest -from collections import namedtuple -import math from numpy.testing import assert_almost_equal from gn3.computations.correlations import normalize_values @@ -58,12 +58,12 @@ class DataBase(QueryableMixin): """expects the expectede results value to be an array""" self.password = password self.db_name = db_name - self.__query_options = None + self.__query_options = None # pylint: disable=[W0238] self.results_generator(expected_results) def execute(self, query_options): """method to execute an sql query""" - self.__query_options = query_options + self.__query_options = query_options # pylint: disable=[W0238] return 1 def cursor(self): diff --git a/tests/unit/computations/test_dictify_by_samples.py b/tests/unit/computations/test_dictify_by_samples.py index 8a1332f..5cd3eca 100644 --- a/tests/unit/computations/test_dictify_by_samples.py +++ b/tests/unit/computations/test_dictify_by_samples.py @@ -1,7 +1,11 @@ +"""Property tests for `gn3.computations.partial_correlations.dictify_by_samples` + function""" from math import isnan -import pytest from collections.abc import Sequence + +import pytest from hypothesis import given, strategies as st + from gn3.computations.partial_correlations import dictify_by_samples @@ -53,22 +57,22 @@ def check_values(samples, values, variances, row): for smp, val, var in zip(samples, values, variances) if smp != "") -non_empty_samples = st.lists( +generated_non_empty_samples = st.lists( st.text(min_size=1, max_size=15).map( lambda s: s.strip())) -empty_samples = st.text( +generated_empty_samples = st.text( alphabet=" \t\n\r\f\v", min_size=1, max_size=15).filter( lambda s: len(s.strip()) == 0) -values = st.lists(st.floats()) -variances = st.lists(st.one_of(st.none(), st.floats())) -other = st.lists(st.integers()) +generated_values = st.lists(st.floats()) +generated_variances = st.lists(st.one_of(st.none(), st.floats())) +generated_other = st.lists(st.integers()) @pytest.mark.unit_test @given(svv=st.tuples( - st.lists(non_empty_samples), - st.lists(values), - st.lists(variances), - st.lists(other))) + st.lists(generated_non_empty_samples), + st.lists(generated_values), + st.lists(generated_variances), + st.lists(generated_other))) def test_dictifify_by_samples_with_nonempty_samples_strings(svv): """ Test for `dictify_by_samples`. @@ -94,7 +98,7 @@ def test_dictifify_by_samples_with_nonempty_samples_strings(svv): @pytest.mark.unit_test @given(svv=st.tuples( st.lists( - st.lists(empty_samples,min_size=1), + st.lists(generated_empty_samples,min_size=1), min_size=1), st.lists(st.lists(st.floats(), min_size=1), min_size=1), st.lists( diff --git a/tests/unit/db/test_datasets.py b/tests/unit/db/test_datasets.py index 0b24489..e4abd2f 100644 --- a/tests/unit/db/test_datasets.py +++ b/tests/unit/db/test_datasets.py @@ -15,14 +15,14 @@ class TestDatasetsDBFunctions(TestCase): @pytest.mark.unit_test def test_retrieve_dataset_name(self): """Test that the function is called correctly.""" - for trait_type, thresh, trait_name, dataset_name, columns, table, expected in [ - ["ProbeSet", 9, "probesetTraitName", "probesetDatasetName", + for trait_type, thresh, dataset_name, columns, table, expected in [ + ["ProbeSet", 9, "probesetDatasetName", "Id, Name, FullName, ShortName, DataScale", "ProbeSetFreeze", {"dataset_id": None, "dataset_name": "probesetDatasetName", "dataset_fullname": "probesetDatasetName"}], - ["Geno", 3, "genoTraitName", "genoDatasetName", + ["Geno", 3, "genoDatasetName", "Id, Name, FullName, ShortName", "GenoFreeze", {}], - ["Publish", 6, "publishTraitName", "publishDatasetName", + ["Publish", 6, "publishDatasetName", "Id, Name, FullName, ShortName", "PublishFreeze", {}]]: db_mock = mock.MagicMock() with self.subTest(trait_type=trait_type): @@ -30,16 +30,15 @@ class TestDatasetsDBFunctions(TestCase): cursor.fetchone.return_value = {} self.assertEqual( retrieve_dataset_name( - trait_type, thresh, trait_name, dataset_name, db_mock), + trait_type, thresh, dataset_name, db_mock), expected) cursor.execute.assert_called_once_with( - "SELECT {cols} " - "FROM {table} " + f"SELECT {columns} " + f"FROM {table} " "WHERE public > %(threshold)s AND " "(Name = %(name)s " "OR FullName = %(name)s " - "OR ShortName = %(name)s)".format( - table=table, cols=columns), + "OR ShortName = %(name)s)", {"threshold": thresh, "name": dataset_name}) @pytest.mark.unit_test diff --git a/tests/unit/test_data_helpers.py b/tests/unit/test_data_helpers.py index b6de42e..e7c3ae9 100644 --- a/tests/unit/test_data_helpers.py +++ b/tests/unit/test_data_helpers.py @@ -37,7 +37,7 @@ class TestDataHelpers(TestCase): (13, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], ((0, 1, 2, 3, 4, 5, 6, 7, 8, 9), ))): with self.subTest(n=count, items=items): - self.assertEqual(partition_all(count, items), expected) + self.assertEqual(tuple(partition_all(count, items)), expected) @pytest.mark.unit_test def test_parse_csv_line(self): diff --git a/tests/unit/test_db_utils.py b/tests/unit/test_db_utils.py index dd0cd5d..96ee68f 100644 --- a/tests/unit/test_db_utils.py +++ b/tests/unit/test_db_utils.py @@ -2,9 +2,9 @@ from unittest import TestCase from unittest import mock +from types import SimpleNamespace import pytest -from types import SimpleNamespace from gn3.db_utils import database_connector from gn3.db_utils import parse_db_url diff --git a/tests/unit/test_file_utils.py b/tests/unit/test_file_utils.py index 77fea88..7048d43 100644 --- a/tests/unit/test_file_utils.py +++ b/tests/unit/test_file_utils.py @@ -92,7 +92,8 @@ extracting the file""" test_dir = "/tmp/QmQPeNsJPyVWPFDVHb77w8G42Fvo15z4bG2X8D2GhfbSXc-test" if not os.path.exists(test_dir): os.mkdir(test_dir) - open(f"{test_dir}/genotype.txt", "a").close() + with open(f"{test_dir}/genotype.txt", "a", encoding="utf8"): + pass file_loc = cache_ipfs_file( ipfs_file=("/ipfs/" "QmQPeNsJPyVWPFDVHb" |