diff options
author | Frederick Muriuki Muriithi | 2022-02-21 16:23:06 +0300 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2022-02-21 16:23:06 +0300 |
commit | a35fce27875d9db80dce1976b6f8ee8c00ecfe0a (patch) | |
tree | 8e8f815a6e3d37348bdb8f253f5ec53f72dc2dbc /gn3/db/partial_correlations.py | |
parent | c84b07b8c5ac0a42c0fab929c75823b30b548191 (diff) | |
download | genenetwork3-a35fce27875d9db80dce1976b6f8ee8c00ecfe0a.tar.gz |
Fix a myriad of linter issues
* Use `with` in place of plain `open`
* Use f-strings in place of `str.format()`
* Remove string interpolation from queries - provide data as query parameters
* other minor fixes
Diffstat (limited to 'gn3/db/partial_correlations.py')
-rw-r--r-- | gn3/db/partial_correlations.py | 145 |
1 files changed, 58 insertions, 87 deletions
diff --git a/gn3/db/partial_correlations.py b/gn3/db/partial_correlations.py index 0075cad..a28b111 100644 --- a/gn3/db/partial_correlations.py +++ b/gn3/db/partial_correlations.py @@ -48,9 +48,8 @@ def temp_traits_data(conn, traits): "FROM TempData, Temp, Strain " "WHERE TempData.StrainId = Strain.Id " "AND TempData.Id = Temp.DataId " - "AND Temp.name IN ({}) " - "ORDER BY Strain.Name").format( - ", ".join(["%s"] * len(traits))) + "AND Temp.name IN ({', '.join(['%s'] * len(traits))}) " + "ORDER BY Strain.Name") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( query, @@ -79,12 +78,11 @@ def publish_traits_data(conn, traits): "AND NStrain.StrainId = PublishData.StrainId) " "WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId " "AND PublishData.Id = PublishXRef.DataId " - "AND PublishXRef.Id IN ({trait_names}) " - "AND PublishFreeze.Id IN ({dataset_ids}) " + f"AND PublishXRef.Id IN ({', '.join(['%s'] * len(traits))}) " + "AND PublishFreeze.Id IN " + f"({', '.join(['%s'] * len(dataset_ids))}) " "AND PublishData.StrainId = Strain.Id " - "ORDER BY Strain.Name").format( - trait_names=", ".join(["%s"] * len(traits)), - dataset_ids=", ".join(["%s"] * len(dataset_ids))) + "ORDER BY Strain.Name") if len(dataset_ids) > 0: with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( @@ -109,19 +107,16 @@ def cellid_traits_data(conn, traits): "LEFT JOIN ProbeSE " "ON (ProbeSE.DataId = ProbeData.Id " "AND ProbeSE.StrainId = ProbeData.StrainId) " - "WHERE Probe.Name IN ({cellids}) " - "AND ProbeSet.Name IN ({trait_names}) " + f"WHERE Probe.Name IN ({', '.join(['%s'] * len(cellids))}) " + f"AND ProbeSet.Name IN ({', '.join(['%s'] * len(traits))}) " "AND Probe.ProbeSetId = ProbeSet.Id " "AND ProbeXRef.ProbeId = Probe.Id " "AND ProbeXRef.ProbeFreezeId = ProbeFreeze.Id " "AND ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id " - "AND ProbeSetFreeze.Name IN ({dataset_names}) " + f"AND ProbeSetFreeze.Name IN ({', '.join(['%s'] * len(dataset_names))}) " "AND ProbeXRef.DataId = ProbeData.Id " "AND ProbeData.StrainId = Strain.Id " - "ORDER BY Strain.Name").format( - cellids=", ".join(["%s"] * len(cellids)), - trait_names=", ".join(["%s"] * len(traits)), - dataset_names=", ".join(["%s"] * len(dataset_names))) + "ORDER BY Strain.Name") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( query, @@ -143,15 +138,13 @@ def probeset_traits_data(conn, traits): "LEFT JOIN ProbeSetSE ON " "(ProbeSetSE.DataId = ProbeSetData.Id " "AND ProbeSetSE.StrainId = ProbeSetData.StrainId) " - "WHERE ProbeSet.Name IN ({trait_names}) " + f"WHERE ProbeSet.Name IN ({', '.join(['%s'] * len(traits))})" "AND ProbeSetXRef.ProbeSetId = ProbeSet.Id " "AND ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id " - "AND ProbeSetFreeze.Name IN ({dataset_names}) " + f"AND ProbeSetFreeze.Name IN ({', '.join(['%s']*len(dataset_names))}) " "AND ProbeSetXRef.DataId = ProbeSetData.Id " "AND ProbeSetData.StrainId = Strain.Id " - "ORDER BY Strain.Name").format( - trait_names=", ".join(["%s"] * len(traits)), - dataset_names=", ".join(["%s"] * len(dataset_names))) + "ORDER BY Strain.Name") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( query, @@ -170,8 +163,7 @@ def species_ids(conn, traits): query = ( "SELECT Name AS `group`, SpeciesId AS species_id " "FROM InbredSet " - "WHERE Name IN ({groups})").format( - groups=", ".join(["%s"] * len(groups))) + f"WHERE Name IN ({', '.join(['%s'] * len(groups))})") if len(groups) > 0: with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute(query, groups) @@ -190,16 +182,14 @@ def geno_traits_data(conn, traits): "FROM (GenoData, GenoFreeze, Strain, Geno, GenoXRef) " "LEFT JOIN GenoSE ON " "(GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId) " - "WHERE Geno.SpeciesId IN ({species_ids}) " - "AND Geno.Name IN ({trait_names}) AND GenoXRef.GenoId = Geno.Id " + f"WHERE Geno.SpeciesId IN ({', '.join(['%s'] * len(sp_ids))}) " + f"AND Geno.Name IN ({', '.join(['%s'] * len(traits))}) " + "AND GenoXRef.GenoId = Geno.Id " "AND GenoXRef.GenoFreezeId = GenoFreeze.Id " - "AND GenoFreeze.Name IN ({dataset_names}) " + f"AND GenoFreeze.Name IN ({', '.join(['%s'] * len(dataset_names))}) " "AND GenoXRef.DataId = GenoData.Id " "AND GenoData.StrainId = Strain.Id " - "ORDER BY Strain.Name").format( - species_ids=", ".join(["%s"] * len(sp_ids)), - trait_names=", ".join(["%s"] * len(traits)), - dataset_names=", ".join(["%s"] * len(dataset_names))) + "ORDER BY Strain.Name") if len(sp_ids) > 0 and len(dataset_names) > 0: with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( @@ -309,18 +299,16 @@ def publish_traits_info( "PublishXRef.Sequence, Phenotype.Units, PublishXRef.comments") query = ( "SELECT " - "PublishXRef.Id AS trait_name, {columns} " + f"PublishXRef.Id AS trait_name, {columns} " "FROM " "PublishXRef, Publication, Phenotype, PublishFreeze " "WHERE " - "PublishXRef.Id IN ({trait_names}) " + f"PublishXRef.Id IN ({', '.join(['%s'] * len(traits))}) " "AND Phenotype.Id = PublishXRef.PhenotypeId " "AND Publication.Id = PublishXRef.PublicationId " "AND PublishXRef.InbredSetId = PublishFreeze.InbredSetId " - "AND PublishFreeze.Id IN ({trait_dataset_ids})").format( - columns=columns, - trait_names=", ".join(["%s"] * len(traits)), - trait_dataset_ids=", ".join(["%s"] * len(trait_dataset_ids))) + "AND PublishFreeze.Id IN " + f"({', '.join(['%s'] * len(trait_dataset_ids))})") if trait_dataset_ids: with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( @@ -337,25 +325,24 @@ def probeset_traits_info( Retrieve information for the probeset traits """ dataset_names = set(trait["db"]["dataset_name"] for trait in traits) - keys = ( - "name", "symbol", "description", "probe_target_description", "chr", - "mb", "alias", "geneid", "genbankid", "unigeneid", "omim", - "refseq_transcriptid", "blatseq", "targetseq", "chipid", "comments", - "strand_probe", "strand_gene", "probe_set_target_region", "proteinid", - "probe_set_specificity", "probe_set_blat_score", - "probe_set_blat_mb_start", "probe_set_blat_mb_end", "probe_set_strand", - "probe_set_note_by_rw", "flag") + columns = ", ".join( + [f"ProbeSet.{x}" for x in + ("name", "symbol", "description", "probe_target_description", "chr", + "mb", "alias", "geneid", "genbankid", "unigeneid", "omim", + "refseq_transcriptid", "blatseq", "targetseq", "chipid", "comments", + "strand_probe", "strand_gene", "probe_set_target_region", "proteinid", + "probe_set_specificity", "probe_set_blat_score", + "probe_set_blat_mb_start", "probe_set_blat_mb_end", + "probe_set_strand", "probe_set_note_by_rw", "flag")]) query = ( - "SELECT ProbeSet.Name AS trait_name, {columns} " + f"SELECT ProbeSet.Name AS trait_name, {columns} " "FROM ProbeSet INNER JOIN ProbeSetXRef " "ON ProbeSetXRef.ProbeSetId = ProbeSet.Id " "INNER JOIN ProbeSetFreeze " "ON ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId " - "WHERE ProbeSetFreeze.Name IN ({dataset_names}) " - "AND ProbeSet.Name IN ({trait_names})").format( - columns=", ".join(["ProbeSet.{}".format(x) for x in keys]), - dataset_names=", ".join(["%s"] * len(dataset_names)), - trait_names=", ".join(["%s"] * len(traits))) + "WHERE ProbeSetFreeze.Name IN " + f"({', '.join(['%s'] * len(dataset_names))}) " + f"AND ProbeSet.Name IN ({', '.join(['%s'] * len(traits))})") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( query, @@ -372,18 +359,16 @@ def geno_traits_info( This is a rework of the `gn3.db.traits.retrieve_geno_trait_info` function. """ dataset_names = set(trait["db"]["dataset_name"] for trait in traits) - keys = ("name", "chr", "mb", "source2", "sequence") + columns = ", ".join([ + f"Geno.{x}" for x in ("name", "chr", "mb", "source2", "sequence")]) query = ( "SELECT " - "Geno.Name AS trait_name, {columns} " + f"Geno.Name AS trait_name, {columns} " "FROM " "Geno INNER JOIN GenoXRef ON GenoXRef.GenoId = Geno.Id " "INNER JOIN GenoFreeze ON GenoFreeze.Id = GenoXRef.GenoFreezeId " - "WHERE GenoFreeze.Name IN ({dataset_names}) " - "AND Geno.Name IN ({trait_names})").format( - columns=", ".join(["Geno.{}".format(x) for x in keys]), - dataset_names=", ".join(["%s"] * len(dataset_names)), - trait_names=", ".join(["%s"] * len(traits))) + f"WHERE GenoFreeze.Name IN ({', '.join(['%s'] * len(dataset_names))}) " + f"AND Geno.Name IN ({', '.join(['%s'] * len(traits))})") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( query, @@ -399,12 +384,9 @@ def temp_traits_info( A rework of the `gn3.db.traits.retrieve_temp_trait_info` function. """ - keys = ("name", "description") query = ( - "SELECT Name as trait_name, {columns} FROM Temp " - "WHERE Name = ({trait_names})").format( - columns=", ".join(keys), - trait_names=", ".join(["%s"] * len(traits))) + "SELECT Name as trait_name, name, description FROM Temp " + "WHERE Name IN ({', '.join(['%s'] * len(traits))})") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( query, @@ -468,8 +450,7 @@ def publish_datasets_groups(conn: Any, dataset_names: Tuple[str]): "InbredSet.Id " "FROM InbredSet, PublishFreeze " "WHERE PublishFreeze.InbredSetId = InbredSet.Id " - "AND PublishFreeze.Name IN ({dataset_names})").format( - dataset_names=", ".join(["%s"] * len(dataset_names))) + "AND PublishFreeze.Name IN ({', '.join(['%s'] * len(dataset_names))})") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute(query, tuple(dataset_names)) return organise_groups_by_dataset(cursor.fetchall()) @@ -519,8 +500,7 @@ def probeset_datasets_groups(conn, dataset_names): "FROM InbredSet, ProbeSetFreeze, ProbeFreeze " "WHERE ProbeFreeze.InbredSetId = InbredSet.Id " "AND ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId " - "AND ProbeSetFreeze.Name IN ({names})").format( - names=", ".join(["%s"] * len(dataset_names))) + "AND ProbeSetFreeze.Name IN ({', '.join(['%s'] * len(dataset_names))})") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute(query, tuple(dataset_names)) return organise_groups_by_dataset(cursor.fetchall()) @@ -567,8 +547,7 @@ def geno_datasets_groups(conn, dataset_names): "SELECT GenoFreeze.Name AS dataset_name, InbredSet.Name, InbredSet.Id " "FROM InbredSet, GenoFreeze " "WHERE GenoFreeze.InbredSetId = InbredSet.Id " - "AND GenoFreeze.Name IN ({names})").format( - names=", ".join(["%s"] * len(dataset_names))) + "AND GenoFreeze.Name IN ({', '.join(['%s'] * len(dataset_names))})") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute(query, tuple(dataset_names)) return organise_groups_by_dataset(cursor.fetchall()) @@ -596,14 +575,13 @@ def temp_datasets_groups(conn, dataset_names): "SELECT Temp.Name AS dataset_name, InbredSet.Name, InbredSet.Id " "FROM InbredSet, Temp " "WHERE Temp.InbredSetId = InbredSet.Id " - "AND Temp.Name IN ({names})").format( - names=", ".join(["%s"] * len(dataset_names))) + "AND Temp.Name IN ({', '.join(['%s'] * len(dataset_names))})") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute(query, tuple(dataset_names)) return organise_groups_by_dataset(cursor.fetchall()) return {} -def temp_traits_datasets(conn: Any, threshold: int, traits: Tuple[Dict]): +def temp_traits_datasets(conn: Any, threshold: int, traits: Tuple[Dict]): #pylint: disable=[W0613] """ Retrieve datasets for 'Temp' traits. """ @@ -657,11 +635,9 @@ def set_publish_qtl_info(conn, qtl, traits): "SELECT PublishXRef.Id AS trait_name, PublishXRef.Locus, " "PublishXRef.LRS, PublishXRef.additive " "FROM PublishXRef, PublishFreeze " - "WHERE PublishXRef.Id IN ({trait_names}) " + f"WHERE PublishXRef.Id IN ({', '.join(['%s'] * len(traits))}) " "AND PublishXRef.InbredSetId = PublishFreeze.InbredSetId " - "AND PublishFreeze.Id IN ({dataset_ids})").format( - trait_names=", ".join(["%s"] * len(traits)), - dataset_ids=", ".join(["%s"] * len(dataset_ids))) + f"AND PublishFreeze.Id IN ({', '.join(['%s'] * len(dataset_ids))})") return query_qtl_info(conn, query, traits, tuple(dataset_ids)) return traits @@ -677,10 +653,9 @@ def set_probeset_qtl_info(conn, qtl, traits): "ProbeSetXRef.mean, ProbeSetXRef.additive " "FROM ProbeSetXRef, ProbeSet " "WHERE ProbeSetXRef.ProbeSetId = ProbeSet.Id " - " AND ProbeSet.Name IN ({trait_names}) " - "AND ProbeSetXRef.ProbeSetFreezeId IN ({dataset_ids})").format( - trait_names=", ".join(["%s"] * len(traits)), - dataset_ids=", ".join(["%s"] * len(dataset_ids))) + f"AND ProbeSet.Name IN ({', '.join(['%s'] * len(traits))}) " + "AND ProbeSetXRef.ProbeSetFreezeId IN " + f"({', '.join(['%s'] * len(dataset_ids))})") return query_qtl_info(conn, query, traits, tuple(dataset_ids)) return traits @@ -694,10 +669,8 @@ def set_sequence(conn, traits): "FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef " "WHERE ProbeSet.Id=ProbeSetXRef.ProbeSetId " "AND ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId " - "AND ProbeSet.Name IN ({trait_names}) " - "AND ProbeSetFreeze.Name IN ({dataset_names})").format( - trait_names=", ".join(["%s"] * len(traits)), - dataset_names=", ".join(["%s"] * len(dataset_names))) + f"AND ProbeSet.Name IN ({', '.join(['%s'] * len(traits))}) " + f"AND ProbeSetFreeze.Name IN ({', '.join(['%s'] * len(dataset_names))})") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( query, @@ -727,12 +700,10 @@ def set_homologene_id(conn, traits): "SELECT InbredSet.Name AS `group`, Homologene.GeneId AS geneid, " "HomologeneId " "FROM Homologene, Species, InbredSet " - "WHERE Homologene.GeneId IN ({geneids}) " - "AND InbredSet.Name IN ({groups}) " + f"WHERE Homologene.GeneId IN ({', '.join(['%s'] * len(geneids))}) " + f"AND InbredSet.Name IN ({', '.join(['%s'] * len(groups))}) " "AND InbredSet.SpeciesId = Species.Id " - "AND Species.TaxonomyId = Homologene.TaxonomyId").format( - geneids=", ".join(["%s"] * len(geneids)), - groups=", ".join(["%s"] * len(groups))) + "AND Species.TaxonomyId = Homologene.TaxonomyId") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute(query, (tuple(geneids) + tuple(groups))) results = { |