3 files changed, 47 insertions, 35 deletions
diff --git a/gn3/db/correlations.py b/gn3/db/correlations.py
index 254af10..5361a1e 100644
--- a/gn3/db/correlations.py
+++ b/gn3/db/correlations.py
@@ -157,11 +157,12 @@ def fetch_symbol_value_pair_dict(
         symbol: data_id_dict.get(symbol) for symbol in symbol_list
         if data_id_dict.get(symbol) is not None
     }
-    query = "SELECT Id, value FROM TissueProbeSetData WHERE Id IN %(data_ids)s"
+    query = "SELECT Id, value FROM TissueProbeSetData WHERE Id IN ({})".format(
+        ",".join(f"%(id{i})s" for i in range(len(data_ids.values()))))
     with conn.cursor() as cursor:
         cursor.execute(
             query,
-            data_ids=tuple(data_ids.values()))
+            **{f"id{i}": did for i, did in enumerate(data_ids.values())})
         value_results = cursor.fetchall()
         return {
             key: tuple(row[1] for row in value_results if row[0] == key)
@@ -406,21 +407,22 @@ def fetch_sample_ids(
     """
     query = (
         "SELECT Strain.Id FROM Strain, Species "
-        "WHERE Strain.Name IN %(samples_names)s "
+        "WHERE Strain.Name IN ({}) "
         "AND Strain.SpeciesId=Species.Id "
-        "AND Species.name=%(species_name)s")
+        "AND Species.name=%(species_name)s").format(
+            ",".join(f"%(s{i})s" for i in range(len(sample_names))))
     with conn.cursor() as cursor:
         cursor.execute(
             query,
             {
-                "samples_names": tuple(sample_names),
+                **{f"s{i}": sname for i, sname in enumerate(sample_names)},
                 "species_name": species_name
             })
         return tuple(row[0] for row in cursor.fetchall())
 
 def build_query_sgo_lit_corr(
         db_type: str, temp_table: str, sample_id_columns: str,
-        joins: Tuple[str, ...]) -> str:
+        joins: Tuple[str, ...]) -> Tuple[str, int]:
     """
     Build query for `SGO Literature Correlation` data, when querying the given
     `temp_table` temporary table.
@@ -483,14 +485,14 @@ def fetch_all_database_data(# pylint: disable=[R0913, R0914]
         sample_id_columns = ", ".join(f"T{smpl}.value" for smpl in sample_ids)
         if db_type == "Publish":
             joins = tuple(
-                ("LEFT JOIN PublishData AS T{item} "
-                 "ON T{item}.Id = PublishXRef.DataId "
-                 "AND T{item}.StrainId = %(T{item}_sample_id)s")
+                (f"LEFT JOIN PublishData AS T{item} "
+                 f"ON T{item}.Id = PublishXRef.DataId "
+                 f"AND T{item}.StrainId = %(T{item}_sample_id)s")
                 for item in sample_ids)
             return (
                 ("SELECT PublishXRef.Id, " +
                  sample_id_columns +
-                 "FROM (PublishXRef, PublishFreeze) " +
+                 " FROM (PublishXRef, PublishFreeze) " +
                  " ".join(joins) +
                  " WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId "
                  "AND PublishFreeze.Name = %(db_name)s"),
diff --git a/gn3/db/datasets.py b/gn3/db/datasets.py
index c50e148..a41e228 100644
--- a/gn3/db/datasets.py
+++ b/gn3/db/datasets.py
@@ -3,7 +3,7 @@ This module contains functions relating to specific trait dataset manipulation
 """
 import re
 from string import Template
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional
 from SPARQLWrapper import JSON, SPARQLWrapper
 from gn3.settings import SPARQL_ENDPOINT
 
@@ -297,7 +297,7 @@ def retrieve_trait_dataset(trait_type, trait, threshold, conn):
         **group
     }
 
-def sparql_query(query: str) -> Dict[str, Any]:
+def sparql_query(query: str) -> List[Dict[str, Any]]:
     """Run a SPARQL query and return the bound variables."""
     sparql = SPARQLWrapper(SPARQL_ENDPOINT)
     sparql.setQuery(query)
@@ -328,7 +328,7 @@ WHERE {
   OPTIONAL { ?dataset gn:geoSeries ?geo_series } .
 }
 """,
-             """
+               """
 PREFIX gn: <http://genenetwork.org/>
 SELECT ?platform_name ?normalization_name ?species_name ?inbred_set_name ?tissue_name
 WHERE {
@@ -341,7 +341,7 @@ WHERE {
   OPTIONAL { ?dataset gn:datasetOfPlatform / gn:name ?platform_name } .
 }
 """,
-             """
+               """
 PREFIX gn: <http://genenetwork.org/>
 SELECT ?specifics ?summary ?about_cases ?about_tissue ?about_platform
        ?about_data_processing ?notes ?experiment_design ?contributors
@@ -362,8 +362,8 @@ WHERE {
   OPTIONAL { ?dataset gn:acknowledgment ?acknowledgment . }
 }
 """]
-    result = {'accession_id': accession_id,
-              'investigator': {}}
+    result: Dict[str, Any] = {'accession_id': accession_id,
+                              'investigator': {}}
     query_result = {}
     for query in queries:
         if sparql_result := sparql_query(Template(query).substitute(accession_id=accession_id)):
diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index 7994aef..338b320 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -1,6 +1,5 @@
 """This class contains functions relating to trait data manipulation"""
 import os
-import MySQLdb
 from functools import reduce
 from typing import Any, Dict, Union, Sequence
 
@@ -111,7 +110,6 @@ def get_trait_csv_sample_data(conn: Any,
 
 
 def update_sample_data(conn: Any, #pylint: disable=[R0913]
-
                        trait_name: str,
                        strain_name: str,
                        phenotype_id: int,
@@ -204,25 +202,30 @@ def delete_sample_data(conn: Any,
                  "AND Strain.Name = \"%s\"") % (trait_name,
                                                 phenotype_id,
                                                 str(strain_name)))
-            strain_id, data_id = cursor.fetchone()
 
-            cursor.execute(("DELETE FROM PublishData "
+            # Check if it exists if the data was already deleted:
+            if _result := cursor.fetchone():
+                strain_id, data_id = _result
+
+            # Only run if the strain_id and data_id exist
+            if strain_id and data_id:
+                cursor.execute(("DELETE FROM PublishData "
                             "WHERE StrainId = %s AND Id = %s")
-                           % (strain_id, data_id))
-            deleted_published_data = cursor.rowcount
-
-            # Delete the PublishSE table
-            cursor.execute(("DELETE FROM PublishSE "
-                            "WHERE StrainId = %s AND DataId = %s") %
-                           (strain_id, data_id))
-            deleted_se_data = cursor.rowcount
-
-            # Delete the NStrain table
-            cursor.execute(("DELETE FROM NStrain "
-                            "WHERE StrainId = %s AND DataId = %s" %
-                            (strain_id, data_id)))
-            deleted_n_strains = cursor.rowcount
-        except Exception as e: #pylint: disable=[C0103, W0612]
+                               % (strain_id, data_id))
+                deleted_published_data = cursor.rowcount
+
+                # Delete the PublishSE table
+                cursor.execute(("DELETE FROM PublishSE "
+                                "WHERE StrainId = %s AND DataId = %s") %
+                               (strain_id, data_id))
+                deleted_se_data = cursor.rowcount
+
+                # Delete the NStrain table
+                cursor.execute(("DELETE FROM NStrain "
+                                "WHERE StrainId = %s AND DataId = %s" %
+                                (strain_id, data_id)))
+                deleted_n_strains = cursor.rowcount
+        except Exception as e:  #pylint: disable=[C0103, W0612]
             conn.rollback()
             raise MySQLdb.Error
         conn.commit()
@@ -255,6 +258,13 @@ def insert_sample_data(conn: Any, #pylint: disable=[R0913]
                            (strain_name,))
             strain_id = cursor.fetchone()
 
+            # Return early if an insert already exists!
+            cursor.execute("SELECT Id FROM PublishData where Id = %s "
+                           "AND StrainId = %s",
+                           (data_id, strain_id))
+            if cursor.fetchone():  # This strain already exists
+                return (0, 0, 0)
+
             # Insert the PublishData table
             cursor.execute(("INSERT INTO PublishData (Id, StrainId, value)"
                             "VALUES (%s, %s, %s)"),