about summary refs log tree commit diff
path: root/gn3/db/partial_correlations.py
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2022-02-21 16:23:06 +0300
committerFrederick Muriuki Muriithi2022-02-21 16:23:06 +0300
commita35fce27875d9db80dce1976b6f8ee8c00ecfe0a (patch)
tree8e8f815a6e3d37348bdb8f253f5ec53f72dc2dbc /gn3/db/partial_correlations.py
parentc84b07b8c5ac0a42c0fab929c75823b30b548191 (diff)
downloadgenenetwork3-a35fce27875d9db80dce1976b6f8ee8c00ecfe0a.tar.gz
Fix a myriad of linter issues
* Use `with` in place of plain `open`
* Use f-strings in place of `str.format()`
* Remove string interpolation from queries - provide data as query parameters
* other minor fixes
Diffstat (limited to 'gn3/db/partial_correlations.py')
-rw-r--r--gn3/db/partial_correlations.py145
1 files changed, 58 insertions, 87 deletions
diff --git a/gn3/db/partial_correlations.py b/gn3/db/partial_correlations.py
index 0075cad..a28b111 100644
--- a/gn3/db/partial_correlations.py
+++ b/gn3/db/partial_correlations.py
@@ -48,9 +48,8 @@ def temp_traits_data(conn, traits):
         "FROM TempData, Temp, Strain "
         "WHERE TempData.StrainId = Strain.Id "
         "AND TempData.Id = Temp.DataId "
-        "AND Temp.name IN ({}) "
-        "ORDER BY Strain.Name").format(
-            ", ".join(["%s"] * len(traits)))
+        "AND Temp.name IN ({', '.join(['%s'] * len(traits))}) "
+        "ORDER BY Strain.Name")
     with conn.cursor(cursorclass=DictCursor) as cursor:
         cursor.execute(
             query,
@@ -79,12 +78,11 @@ def publish_traits_data(conn, traits):
         "AND NStrain.StrainId = PublishData.StrainId) "
         "WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId "
         "AND PublishData.Id = PublishXRef.DataId "
-        "AND PublishXRef.Id  IN ({trait_names}) "
-        "AND PublishFreeze.Id IN ({dataset_ids}) "
+        f"AND PublishXRef.Id  IN ({', '.join(['%s'] * len(traits))}) "
+        "AND PublishFreeze.Id IN "
+        f"({', '.join(['%s'] * len(dataset_ids))}) "
         "AND PublishData.StrainId = Strain.Id "
-        "ORDER BY Strain.Name").format(
-            trait_names=", ".join(["%s"] * len(traits)),
-            dataset_ids=", ".join(["%s"] * len(dataset_ids)))
+        "ORDER BY Strain.Name")
     if len(dataset_ids) > 0:
         with conn.cursor(cursorclass=DictCursor) as cursor:
             cursor.execute(
@@ -109,19 +107,16 @@ def cellid_traits_data(conn, traits):
         "LEFT JOIN ProbeSE "
         "ON (ProbeSE.DataId = ProbeData.Id "
         "AND ProbeSE.StrainId = ProbeData.StrainId) "
-        "WHERE Probe.Name IN ({cellids}) "
-        "AND ProbeSet.Name IN ({trait_names}) "
+        f"WHERE Probe.Name IN ({', '.join(['%s'] * len(cellids))}) "
+        f"AND ProbeSet.Name IN ({', '.join(['%s'] * len(traits))}) "
         "AND Probe.ProbeSetId = ProbeSet.Id "
         "AND ProbeXRef.ProbeId = Probe.Id "
         "AND ProbeXRef.ProbeFreezeId = ProbeFreeze.Id "
         "AND ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id "
-        "AND ProbeSetFreeze.Name IN ({dataset_names}) "
+        f"AND ProbeSetFreeze.Name IN ({', '.join(['%s'] * len(dataset_names))}) "
         "AND ProbeXRef.DataId = ProbeData.Id "
         "AND ProbeData.StrainId = Strain.Id "
-        "ORDER BY Strain.Name").format(
-            cellids=", ".join(["%s"] * len(cellids)),
-            trait_names=", ".join(["%s"] * len(traits)),
-            dataset_names=", ".join(["%s"] * len(dataset_names)))
+        "ORDER BY Strain.Name")
     with conn.cursor(cursorclass=DictCursor) as cursor:
         cursor.execute(
             query,
@@ -143,15 +138,13 @@ def probeset_traits_data(conn, traits):
         "LEFT JOIN ProbeSetSE ON "
         "(ProbeSetSE.DataId = ProbeSetData.Id "
         "AND ProbeSetSE.StrainId = ProbeSetData.StrainId) "
-        "WHERE ProbeSet.Name IN ({trait_names}) "
+        f"WHERE ProbeSet.Name IN ({', '.join(['%s'] * len(traits))})"
         "AND ProbeSetXRef.ProbeSetId = ProbeSet.Id "
         "AND ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id "
-        "AND ProbeSetFreeze.Name IN ({dataset_names}) "
+        f"AND ProbeSetFreeze.Name IN ({', '.join(['%s']*len(dataset_names))}) "
         "AND ProbeSetXRef.DataId = ProbeSetData.Id "
         "AND ProbeSetData.StrainId = Strain.Id "
-        "ORDER BY Strain.Name").format(
-            trait_names=", ".join(["%s"] * len(traits)),
-            dataset_names=", ".join(["%s"] * len(dataset_names)))
+        "ORDER BY Strain.Name")
     with conn.cursor(cursorclass=DictCursor) as cursor:
         cursor.execute(
             query,
@@ -170,8 +163,7 @@ def species_ids(conn, traits):
     query = (
         "SELECT Name AS `group`, SpeciesId AS species_id "
         "FROM InbredSet "
-        "WHERE Name IN ({groups})").format(
-            groups=", ".join(["%s"] * len(groups)))
+        f"WHERE Name IN ({', '.join(['%s'] * len(groups))})")
     if len(groups) > 0:
         with conn.cursor(cursorclass=DictCursor) as cursor:
             cursor.execute(query, groups)
@@ -190,16 +182,14 @@ def geno_traits_data(conn, traits):
         "FROM (GenoData, GenoFreeze, Strain, Geno, GenoXRef) "
         "LEFT JOIN GenoSE ON "
         "(GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId) "
-        "WHERE Geno.SpeciesId IN ({species_ids}) "
-        "AND Geno.Name IN ({trait_names}) AND GenoXRef.GenoId = Geno.Id "
+        f"WHERE Geno.SpeciesId IN ({', '.join(['%s'] * len(sp_ids))}) "
+        f"AND Geno.Name IN ({', '.join(['%s'] * len(traits))}) "
+        "AND GenoXRef.GenoId = Geno.Id "
         "AND GenoXRef.GenoFreezeId = GenoFreeze.Id "
-        "AND GenoFreeze.Name IN ({dataset_names}) "
+        f"AND GenoFreeze.Name IN ({', '.join(['%s'] * len(dataset_names))}) "
         "AND GenoXRef.DataId = GenoData.Id "
         "AND GenoData.StrainId = Strain.Id "
-        "ORDER BY Strain.Name").format(
-            species_ids=", ".join(["%s"] * len(sp_ids)),
-            trait_names=", ".join(["%s"] * len(traits)),
-            dataset_names=", ".join(["%s"] * len(dataset_names)))
+        "ORDER BY Strain.Name")
     if len(sp_ids) > 0 and len(dataset_names) > 0:
         with conn.cursor(cursorclass=DictCursor) as cursor:
             cursor.execute(
@@ -309,18 +299,16 @@ def publish_traits_info(
         "PublishXRef.Sequence, Phenotype.Units, PublishXRef.comments")
     query = (
         "SELECT "
-        "PublishXRef.Id AS trait_name, {columns} "
+        f"PublishXRef.Id AS trait_name, {columns} "
         "FROM "
         "PublishXRef, Publication, Phenotype, PublishFreeze "
         "WHERE "
-        "PublishXRef.Id IN ({trait_names}) "
+        f"PublishXRef.Id IN ({', '.join(['%s'] * len(traits))}) "
         "AND Phenotype.Id = PublishXRef.PhenotypeId "
         "AND Publication.Id = PublishXRef.PublicationId "
         "AND PublishXRef.InbredSetId = PublishFreeze.InbredSetId "
-        "AND PublishFreeze.Id IN ({trait_dataset_ids})").format(
-            columns=columns,
-            trait_names=", ".join(["%s"] * len(traits)),
-            trait_dataset_ids=", ".join(["%s"] * len(trait_dataset_ids)))
+        "AND PublishFreeze.Id IN "
+        f"({', '.join(['%s'] * len(trait_dataset_ids))})")
     if trait_dataset_ids:
         with conn.cursor(cursorclass=DictCursor) as cursor:
             cursor.execute(
@@ -337,25 +325,24 @@ def probeset_traits_info(
     Retrieve information for the probeset traits
     """
     dataset_names = set(trait["db"]["dataset_name"] for trait in traits)
-    keys = (
-        "name", "symbol", "description", "probe_target_description", "chr",
-        "mb", "alias", "geneid", "genbankid", "unigeneid", "omim",
-        "refseq_transcriptid", "blatseq", "targetseq", "chipid", "comments",
-        "strand_probe", "strand_gene", "probe_set_target_region", "proteinid",
-        "probe_set_specificity", "probe_set_blat_score",
-        "probe_set_blat_mb_start", "probe_set_blat_mb_end", "probe_set_strand",
-        "probe_set_note_by_rw", "flag")
+    columns = ", ".join(
+        [f"ProbeSet.{x}" for x in
+         ("name", "symbol", "description", "probe_target_description", "chr",
+          "mb", "alias", "geneid", "genbankid", "unigeneid", "omim",
+          "refseq_transcriptid", "blatseq", "targetseq", "chipid", "comments",
+          "strand_probe", "strand_gene", "probe_set_target_region", "proteinid",
+          "probe_set_specificity", "probe_set_blat_score",
+          "probe_set_blat_mb_start", "probe_set_blat_mb_end",
+          "probe_set_strand", "probe_set_note_by_rw", "flag")])
     query = (
-        "SELECT ProbeSet.Name AS trait_name, {columns} "
+        f"SELECT ProbeSet.Name AS trait_name, {columns} "
         "FROM ProbeSet INNER JOIN ProbeSetXRef "
         "ON ProbeSetXRef.ProbeSetId = ProbeSet.Id "
         "INNER JOIN ProbeSetFreeze "
         "ON ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId "
-        "WHERE ProbeSetFreeze.Name IN ({dataset_names}) "
-        "AND ProbeSet.Name IN ({trait_names})").format(
-            columns=", ".join(["ProbeSet.{}".format(x) for x in keys]),
-            dataset_names=", ".join(["%s"] * len(dataset_names)),
-            trait_names=", ".join(["%s"] * len(traits)))
+        "WHERE ProbeSetFreeze.Name IN "
+        f"({', '.join(['%s'] * len(dataset_names))}) "
+        f"AND ProbeSet.Name IN ({', '.join(['%s'] * len(traits))})")
     with conn.cursor(cursorclass=DictCursor) as cursor:
         cursor.execute(
             query,
@@ -372,18 +359,16 @@ def geno_traits_info(
     This is a rework of the `gn3.db.traits.retrieve_geno_trait_info` function.
     """
     dataset_names = set(trait["db"]["dataset_name"] for trait in traits)
-    keys = ("name", "chr", "mb", "source2", "sequence")
+    columns = ", ".join([
+        f"Geno.{x}" for x in ("name", "chr", "mb", "source2", "sequence")])
     query = (
         "SELECT "
-        "Geno.Name AS trait_name, {columns} "
+        f"Geno.Name AS trait_name, {columns} "
         "FROM "
         "Geno INNER JOIN GenoXRef ON GenoXRef.GenoId = Geno.Id "
         "INNER JOIN GenoFreeze ON GenoFreeze.Id = GenoXRef.GenoFreezeId "
-        "WHERE GenoFreeze.Name IN ({dataset_names}) "
-        "AND Geno.Name IN ({trait_names})").format(
-            columns=", ".join(["Geno.{}".format(x) for x in keys]),
-            dataset_names=", ".join(["%s"] * len(dataset_names)),
-            trait_names=", ".join(["%s"] * len(traits)))
+        f"WHERE GenoFreeze.Name IN ({', '.join(['%s'] * len(dataset_names))}) "
+        f"AND Geno.Name IN ({', '.join(['%s'] * len(traits))})")
     with conn.cursor(cursorclass=DictCursor) as cursor:
         cursor.execute(
             query,
@@ -399,12 +384,9 @@ def temp_traits_info(
 
     A rework of the `gn3.db.traits.retrieve_temp_trait_info` function.
     """
-    keys = ("name", "description")
     query = (
-        "SELECT Name as trait_name, {columns} FROM Temp "
-        "WHERE Name = ({trait_names})").format(
-            columns=", ".join(keys),
-            trait_names=", ".join(["%s"] * len(traits)))
+        "SELECT Name as trait_name, name, description FROM Temp "
+        "WHERE Name IN ({', '.join(['%s'] * len(traits))})")
     with conn.cursor(cursorclass=DictCursor) as cursor:
         cursor.execute(
             query,
@@ -468,8 +450,7 @@ def publish_datasets_groups(conn: Any, dataset_names: Tuple[str]):
         "InbredSet.Id "
         "FROM InbredSet, PublishFreeze "
         "WHERE PublishFreeze.InbredSetId = InbredSet.Id "
-        "AND PublishFreeze.Name IN ({dataset_names})").format(
-            dataset_names=", ".join(["%s"] * len(dataset_names)))
+        "AND PublishFreeze.Name IN ({', '.join(['%s'] * len(dataset_names))})")
     with conn.cursor(cursorclass=DictCursor) as cursor:
         cursor.execute(query, tuple(dataset_names))
         return organise_groups_by_dataset(cursor.fetchall())
@@ -519,8 +500,7 @@ def probeset_datasets_groups(conn, dataset_names):
         "FROM InbredSet, ProbeSetFreeze, ProbeFreeze "
         "WHERE ProbeFreeze.InbredSetId = InbredSet.Id "
         "AND ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId "
-        "AND ProbeSetFreeze.Name IN ({names})").format(
-            names=", ".join(["%s"] * len(dataset_names)))
+        "AND ProbeSetFreeze.Name IN ({', '.join(['%s'] * len(dataset_names))})")
     with conn.cursor(cursorclass=DictCursor) as cursor:
         cursor.execute(query, tuple(dataset_names))
         return organise_groups_by_dataset(cursor.fetchall())
@@ -567,8 +547,7 @@ def geno_datasets_groups(conn, dataset_names):
         "SELECT GenoFreeze.Name AS dataset_name, InbredSet.Name, InbredSet.Id "
         "FROM InbredSet, GenoFreeze "
         "WHERE GenoFreeze.InbredSetId = InbredSet.Id "
-        "AND GenoFreeze.Name IN ({names})").format(
-            names=", ".join(["%s"] * len(dataset_names)))
+        "AND GenoFreeze.Name IN ({', '.join(['%s'] * len(dataset_names))})")
     with conn.cursor(cursorclass=DictCursor) as cursor:
         cursor.execute(query, tuple(dataset_names))
         return organise_groups_by_dataset(cursor.fetchall())
@@ -596,14 +575,13 @@ def temp_datasets_groups(conn, dataset_names):
         "SELECT Temp.Name AS dataset_name, InbredSet.Name, InbredSet.Id "
         "FROM InbredSet, Temp "
         "WHERE Temp.InbredSetId = InbredSet.Id "
-        "AND Temp.Name IN ({names})").format(
-            names=", ".join(["%s"] * len(dataset_names)))
+        "AND Temp.Name IN ({', '.join(['%s'] * len(dataset_names))})")
     with conn.cursor(cursorclass=DictCursor) as cursor:
         cursor.execute(query, tuple(dataset_names))
         return organise_groups_by_dataset(cursor.fetchall())
     return {}
 
-def temp_traits_datasets(conn: Any, threshold: int, traits: Tuple[Dict]):
+def temp_traits_datasets(conn: Any, threshold: int, traits: Tuple[Dict]): #pylint: disable=[W0613]
     """
     Retrieve datasets for 'Temp' traits.
     """
@@ -657,11 +635,9 @@ def set_publish_qtl_info(conn, qtl, traits):
             "SELECT PublishXRef.Id AS trait_name, PublishXRef.Locus, "
             "PublishXRef.LRS, PublishXRef.additive "
             "FROM PublishXRef, PublishFreeze "
-            "WHERE PublishXRef.Id IN ({trait_names}) "
+            f"WHERE PublishXRef.Id IN ({', '.join(['%s'] * len(traits))}) "
             "AND PublishXRef.InbredSetId = PublishFreeze.InbredSetId "
-            "AND PublishFreeze.Id IN ({dataset_ids})").format(
-                trait_names=", ".join(["%s"] * len(traits)),
-                dataset_ids=", ".join(["%s"] * len(dataset_ids)))
+            f"AND PublishFreeze.Id IN ({', '.join(['%s'] * len(dataset_ids))})")
         return query_qtl_info(conn, query, traits, tuple(dataset_ids))
     return traits
 
@@ -677,10 +653,9 @@ def set_probeset_qtl_info(conn, qtl, traits):
             "ProbeSetXRef.mean, ProbeSetXRef.additive "
             "FROM ProbeSetXRef, ProbeSet "
             "WHERE ProbeSetXRef.ProbeSetId = ProbeSet.Id "
-            " AND ProbeSet.Name IN ({trait_names}) "
-            "AND ProbeSetXRef.ProbeSetFreezeId IN ({dataset_ids})").format(
-                trait_names=", ".join(["%s"] * len(traits)),
-                dataset_ids=", ".join(["%s"] * len(dataset_ids)))
+            f"AND ProbeSet.Name IN ({', '.join(['%s'] * len(traits))}) "
+            "AND ProbeSetXRef.ProbeSetFreezeId IN "
+            f"({', '.join(['%s'] * len(dataset_ids))})")
         return query_qtl_info(conn, query, traits, tuple(dataset_ids))
     return traits
 
@@ -694,10 +669,8 @@ def set_sequence(conn, traits):
         "FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef "
         "WHERE ProbeSet.Id=ProbeSetXRef.ProbeSetId "
         "AND ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId "
-        "AND ProbeSet.Name IN ({trait_names}) "
-        "AND ProbeSetFreeze.Name IN ({dataset_names})").format(
-            trait_names=", ".join(["%s"] * len(traits)),
-            dataset_names=", ".join(["%s"] * len(dataset_names)))
+        f"AND ProbeSet.Name IN ({', '.join(['%s'] * len(traits))}) "
+        f"AND ProbeSetFreeze.Name IN ({', '.join(['%s'] * len(dataset_names))})")
     with conn.cursor(cursorclass=DictCursor) as cursor:
         cursor.execute(
             query,
@@ -727,12 +700,10 @@ def set_homologene_id(conn, traits):
             "SELECT InbredSet.Name AS `group`, Homologene.GeneId AS geneid, "
             "HomologeneId "
             "FROM Homologene, Species, InbredSet "
-            "WHERE Homologene.GeneId IN ({geneids}) "
-            "AND InbredSet.Name IN ({groups}) "
+            f"WHERE Homologene.GeneId IN ({', '.join(['%s'] * len(geneids))}) "
+            f"AND InbredSet.Name IN ({', '.join(['%s'] * len(groups))}) "
             "AND InbredSet.SpeciesId = Species.Id "
-            "AND Species.TaxonomyId = Homologene.TaxonomyId").format(
-                geneids=", ".join(["%s"] * len(geneids)),
-                groups=", ".join(["%s"] * len(groups)))
+            "AND Species.TaxonomyId = Homologene.TaxonomyId")
         with conn.cursor(cursorclass=DictCursor) as cursor:
             cursor.execute(query, (tuple(geneids) + tuple(groups)))
             results = {