diff options
Diffstat (limited to 'gn3/db')
-rw-r--r-- | gn3/db/correlations.py | 22 | ||||
-rw-r--r-- | gn3/db/datasets.py | 12 | ||||
-rw-r--r-- | gn3/db/traits.py | 48 |
3 files changed, 47 insertions, 35 deletions
diff --git a/gn3/db/correlations.py b/gn3/db/correlations.py index 254af10..5361a1e 100644 --- a/gn3/db/correlations.py +++ b/gn3/db/correlations.py @@ -157,11 +157,12 @@ def fetch_symbol_value_pair_dict( symbol: data_id_dict.get(symbol) for symbol in symbol_list if data_id_dict.get(symbol) is not None } - query = "SELECT Id, value FROM TissueProbeSetData WHERE Id IN %(data_ids)s" + query = "SELECT Id, value FROM TissueProbeSetData WHERE Id IN ({})".format( + ",".join(f"%(id{i})s" for i in range(len(data_ids.values())))) with conn.cursor() as cursor: cursor.execute( query, - data_ids=tuple(data_ids.values())) + **{f"id{i}": did for i, did in enumerate(data_ids.values())}) value_results = cursor.fetchall() return { key: tuple(row[1] for row in value_results if row[0] == key) @@ -406,21 +407,22 @@ def fetch_sample_ids( """ query = ( "SELECT Strain.Id FROM Strain, Species " - "WHERE Strain.Name IN %(samples_names)s " + "WHERE Strain.Name IN ({}) " "AND Strain.SpeciesId=Species.Id " - "AND Species.name=%(species_name)s") + "AND Species.name=%(species_name)s").format( + ",".join(f"%(s{i})s" for i in range(len(sample_names)))) with conn.cursor() as cursor: cursor.execute( query, { - "samples_names": tuple(sample_names), + **{f"s{i}": sname for i, sname in enumerate(sample_names)}, "species_name": species_name }) return tuple(row[0] for row in cursor.fetchall()) def build_query_sgo_lit_corr( db_type: str, temp_table: str, sample_id_columns: str, - joins: Tuple[str, ...]) -> str: + joins: Tuple[str, ...]) -> Tuple[str, int]: """ Build query for `SGO Literature Correlation` data, when querying the given `temp_table` temporary table. @@ -483,14 +485,14 @@ def fetch_all_database_data(# pylint: disable=[R0913, R0914] sample_id_columns = ", ".join(f"T{smpl}.value" for smpl in sample_ids) if db_type == "Publish": joins = tuple( - ("LEFT JOIN PublishData AS T{item} " - "ON T{item}.Id = PublishXRef.DataId " - "AND T{item}.StrainId = %(T{item}_sample_id)s") + (f"LEFT JOIN PublishData AS T{item} " + f"ON T{item}.Id = PublishXRef.DataId " + f"AND T{item}.StrainId = %(T{item}_sample_id)s") for item in sample_ids) return ( ("SELECT PublishXRef.Id, " + sample_id_columns + - "FROM (PublishXRef, PublishFreeze) " + + " FROM (PublishXRef, PublishFreeze) " + " ".join(joins) + " WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId " "AND PublishFreeze.Name = %(db_name)s"), diff --git a/gn3/db/datasets.py b/gn3/db/datasets.py index c50e148..a41e228 100644 --- a/gn3/db/datasets.py +++ b/gn3/db/datasets.py @@ -3,7 +3,7 @@ This module contains functions relating to specific trait dataset manipulation """ import re from string import Template -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional from SPARQLWrapper import JSON, SPARQLWrapper from gn3.settings import SPARQL_ENDPOINT @@ -297,7 +297,7 @@ def retrieve_trait_dataset(trait_type, trait, threshold, conn): **group } -def sparql_query(query: str) -> Dict[str, Any]: +def sparql_query(query: str) -> List[Dict[str, Any]]: """Run a SPARQL query and return the bound variables.""" sparql = SPARQLWrapper(SPARQL_ENDPOINT) sparql.setQuery(query) @@ -328,7 +328,7 @@ WHERE { OPTIONAL { ?dataset gn:geoSeries ?geo_series } . } """, - """ + """ PREFIX gn: <http://genenetwork.org/> SELECT ?platform_name ?normalization_name ?species_name ?inbred_set_name ?tissue_name WHERE { @@ -341,7 +341,7 @@ WHERE { OPTIONAL { ?dataset gn:datasetOfPlatform / gn:name ?platform_name } . } """, - """ + """ PREFIX gn: <http://genenetwork.org/> SELECT ?specifics ?summary ?about_cases ?about_tissue ?about_platform ?about_data_processing ?notes ?experiment_design ?contributors @@ -362,8 +362,8 @@ WHERE { OPTIONAL { ?dataset gn:acknowledgment ?acknowledgment . } } """] - result = {'accession_id': accession_id, - 'investigator': {}} + result: Dict[str, Any] = {'accession_id': accession_id, + 'investigator': {}} query_result = {} for query in queries: if sparql_result := sparql_query(Template(query).substitute(accession_id=accession_id)): diff --git a/gn3/db/traits.py b/gn3/db/traits.py index 7994aef..338b320 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -1,6 +1,5 @@ """This class contains functions relating to trait data manipulation""" import os -import MySQLdb from functools import reduce from typing import Any, Dict, Union, Sequence @@ -111,7 +110,6 @@ def get_trait_csv_sample_data(conn: Any, def update_sample_data(conn: Any, #pylint: disable=[R0913] - trait_name: str, strain_name: str, phenotype_id: int, @@ -204,25 +202,30 @@ def delete_sample_data(conn: Any, "AND Strain.Name = \"%s\"") % (trait_name, phenotype_id, str(strain_name))) - strain_id, data_id = cursor.fetchone() - cursor.execute(("DELETE FROM PublishData " + # Check if it exists if the data was already deleted: + if _result := cursor.fetchone(): + strain_id, data_id = _result + + # Only run if the strain_id and data_id exist + if strain_id and data_id: + cursor.execute(("DELETE FROM PublishData " "WHERE StrainId = %s AND Id = %s") - % (strain_id, data_id)) - deleted_published_data = cursor.rowcount - - # Delete the PublishSE table - cursor.execute(("DELETE FROM PublishSE " - "WHERE StrainId = %s AND DataId = %s") % - (strain_id, data_id)) - deleted_se_data = cursor.rowcount - - # Delete the NStrain table - cursor.execute(("DELETE FROM NStrain " - "WHERE StrainId = %s AND DataId = %s" % - (strain_id, data_id))) - deleted_n_strains = cursor.rowcount - except Exception as e: #pylint: disable=[C0103, W0612] + % (strain_id, data_id)) + deleted_published_data = cursor.rowcount + + # Delete the PublishSE table + cursor.execute(("DELETE FROM PublishSE " + "WHERE StrainId = %s AND DataId = %s") % + (strain_id, data_id)) + deleted_se_data = cursor.rowcount + + # Delete the NStrain table + cursor.execute(("DELETE FROM NStrain " + "WHERE StrainId = %s AND DataId = %s" % + (strain_id, data_id))) + deleted_n_strains = cursor.rowcount + except Exception as e: #pylint: disable=[C0103, W0612] conn.rollback() raise MySQLdb.Error conn.commit() @@ -255,6 +258,13 @@ def insert_sample_data(conn: Any, #pylint: disable=[R0913] (strain_name,)) strain_id = cursor.fetchone() + # Return early if an insert already exists! + cursor.execute("SELECT Id FROM PublishData where Id = %s " + "AND StrainId = %s", + (data_id, strain_id)) + if cursor.fetchone(): # This strain already exists + return (0, 0, 0) + # Insert the PublishData table cursor.execute(("INSERT INTO PublishData (Id, StrainId, value)" "VALUES (%s, %s, %s)"), |