aboutsummaryrefslogtreecommitdiff
path: root/gn3/db
diff options
context:
space:
mode:
Diffstat (limited to 'gn3/db')
-rw-r--r--gn3/db/correlations.py22
-rw-r--r--gn3/db/datasets.py12
-rw-r--r--gn3/db/traits.py48
3 files changed, 47 insertions, 35 deletions
diff --git a/gn3/db/correlations.py b/gn3/db/correlations.py
index 254af10..5361a1e 100644
--- a/gn3/db/correlations.py
+++ b/gn3/db/correlations.py
@@ -157,11 +157,12 @@ def fetch_symbol_value_pair_dict(
symbol: data_id_dict.get(symbol) for symbol in symbol_list
if data_id_dict.get(symbol) is not None
}
- query = "SELECT Id, value FROM TissueProbeSetData WHERE Id IN %(data_ids)s"
+ query = "SELECT Id, value FROM TissueProbeSetData WHERE Id IN ({})".format(
+ ",".join(f"%(id{i})s" for i in range(len(data_ids.values()))))
with conn.cursor() as cursor:
cursor.execute(
query,
- data_ids=tuple(data_ids.values()))
+ **{f"id{i}": did for i, did in enumerate(data_ids.values())})
value_results = cursor.fetchall()
return {
key: tuple(row[1] for row in value_results if row[0] == key)
@@ -406,21 +407,22 @@ def fetch_sample_ids(
"""
query = (
"SELECT Strain.Id FROM Strain, Species "
- "WHERE Strain.Name IN %(samples_names)s "
+ "WHERE Strain.Name IN ({}) "
"AND Strain.SpeciesId=Species.Id "
- "AND Species.name=%(species_name)s")
+ "AND Species.name=%(species_name)s").format(
+ ",".join(f"%(s{i})s" for i in range(len(sample_names))))
with conn.cursor() as cursor:
cursor.execute(
query,
{
- "samples_names": tuple(sample_names),
+ **{f"s{i}": sname for i, sname in enumerate(sample_names)},
"species_name": species_name
})
return tuple(row[0] for row in cursor.fetchall())
def build_query_sgo_lit_corr(
db_type: str, temp_table: str, sample_id_columns: str,
- joins: Tuple[str, ...]) -> str:
+ joins: Tuple[str, ...]) -> Tuple[str, int]:
"""
Build query for `SGO Literature Correlation` data, when querying the given
`temp_table` temporary table.
@@ -483,14 +485,14 @@ def fetch_all_database_data(# pylint: disable=[R0913, R0914]
sample_id_columns = ", ".join(f"T{smpl}.value" for smpl in sample_ids)
if db_type == "Publish":
joins = tuple(
- ("LEFT JOIN PublishData AS T{item} "
- "ON T{item}.Id = PublishXRef.DataId "
- "AND T{item}.StrainId = %(T{item}_sample_id)s")
+ (f"LEFT JOIN PublishData AS T{item} "
+ f"ON T{item}.Id = PublishXRef.DataId "
+ f"AND T{item}.StrainId = %(T{item}_sample_id)s")
for item in sample_ids)
return (
("SELECT PublishXRef.Id, " +
sample_id_columns +
- "FROM (PublishXRef, PublishFreeze) " +
+ " FROM (PublishXRef, PublishFreeze) " +
" ".join(joins) +
" WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId "
"AND PublishFreeze.Name = %(db_name)s"),
diff --git a/gn3/db/datasets.py b/gn3/db/datasets.py
index c50e148..a41e228 100644
--- a/gn3/db/datasets.py
+++ b/gn3/db/datasets.py
@@ -3,7 +3,7 @@ This module contains functions relating to specific trait dataset manipulation
"""
import re
from string import Template
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional
from SPARQLWrapper import JSON, SPARQLWrapper
from gn3.settings import SPARQL_ENDPOINT
@@ -297,7 +297,7 @@ def retrieve_trait_dataset(trait_type, trait, threshold, conn):
**group
}
-def sparql_query(query: str) -> Dict[str, Any]:
+def sparql_query(query: str) -> List[Dict[str, Any]]:
"""Run a SPARQL query and return the bound variables."""
sparql = SPARQLWrapper(SPARQL_ENDPOINT)
sparql.setQuery(query)
@@ -328,7 +328,7 @@ WHERE {
OPTIONAL { ?dataset gn:geoSeries ?geo_series } .
}
""",
- """
+ """
PREFIX gn: <http://genenetwork.org/>
SELECT ?platform_name ?normalization_name ?species_name ?inbred_set_name ?tissue_name
WHERE {
@@ -341,7 +341,7 @@ WHERE {
OPTIONAL { ?dataset gn:datasetOfPlatform / gn:name ?platform_name } .
}
""",
- """
+ """
PREFIX gn: <http://genenetwork.org/>
SELECT ?specifics ?summary ?about_cases ?about_tissue ?about_platform
?about_data_processing ?notes ?experiment_design ?contributors
@@ -362,8 +362,8 @@ WHERE {
OPTIONAL { ?dataset gn:acknowledgment ?acknowledgment . }
}
"""]
- result = {'accession_id': accession_id,
- 'investigator': {}}
+ result: Dict[str, Any] = {'accession_id': accession_id,
+ 'investigator': {}}
query_result = {}
for query in queries:
if sparql_result := sparql_query(Template(query).substitute(accession_id=accession_id)):
diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index 7994aef..338b320 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -1,6 +1,5 @@
"""This class contains functions relating to trait data manipulation"""
import os
-import MySQLdb
from functools import reduce
from typing import Any, Dict, Union, Sequence
@@ -111,7 +110,6 @@ def get_trait_csv_sample_data(conn: Any,
def update_sample_data(conn: Any, #pylint: disable=[R0913]
-
trait_name: str,
strain_name: str,
phenotype_id: int,
@@ -204,25 +202,30 @@ def delete_sample_data(conn: Any,
"AND Strain.Name = \"%s\"") % (trait_name,
phenotype_id,
str(strain_name)))
- strain_id, data_id = cursor.fetchone()
- cursor.execute(("DELETE FROM PublishData "
+ # Check if it exists if the data was already deleted:
+ if _result := cursor.fetchone():
+ strain_id, data_id = _result
+
+ # Only run if the strain_id and data_id exist
+ if strain_id and data_id:
+ cursor.execute(("DELETE FROM PublishData "
"WHERE StrainId = %s AND Id = %s")
- % (strain_id, data_id))
- deleted_published_data = cursor.rowcount
-
- # Delete the PublishSE table
- cursor.execute(("DELETE FROM PublishSE "
- "WHERE StrainId = %s AND DataId = %s") %
- (strain_id, data_id))
- deleted_se_data = cursor.rowcount
-
- # Delete the NStrain table
- cursor.execute(("DELETE FROM NStrain "
- "WHERE StrainId = %s AND DataId = %s" %
- (strain_id, data_id)))
- deleted_n_strains = cursor.rowcount
- except Exception as e: #pylint: disable=[C0103, W0612]
+ % (strain_id, data_id))
+ deleted_published_data = cursor.rowcount
+
+ # Delete the PublishSE table
+ cursor.execute(("DELETE FROM PublishSE "
+ "WHERE StrainId = %s AND DataId = %s") %
+ (strain_id, data_id))
+ deleted_se_data = cursor.rowcount
+
+ # Delete the NStrain table
+ cursor.execute(("DELETE FROM NStrain "
+ "WHERE StrainId = %s AND DataId = %s" %
+ (strain_id, data_id)))
+ deleted_n_strains = cursor.rowcount
+ except Exception as e: #pylint: disable=[C0103, W0612]
conn.rollback()
raise MySQLdb.Error
conn.commit()
@@ -255,6 +258,13 @@ def insert_sample_data(conn: Any, #pylint: disable=[R0913]
(strain_name,))
strain_id = cursor.fetchone()
+ # Return early if an insert already exists!
+ cursor.execute("SELECT Id FROM PublishData where Id = %s "
+ "AND StrainId = %s",
+ (data_id, strain_id))
+ if cursor.fetchone(): # This strain already exists
+ return (0, 0, 0)
+
# Insert the PublishData table
cursor.execute(("INSERT INTO PublishData (Id, StrainId, value)"
"VALUES (%s, %s, %s)"),