about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/phenotypes_bulk_edit.py15
1 files changed, 10 insertions, 5 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py
index 269f15e..b647199 100644
--- a/scripts/phenotypes_bulk_edit.py
+++ b/scripts/phenotypes_bulk_edit.py
@@ -82,7 +82,6 @@ def descriptions_differences(file_data, db_data) -> dict[str, str]:
     return diff
 
 
-def compute_differences(conn, file_contents, pheno_ids, pheno_xref_ids) -> tuple[tuple[dict, ...], tuple[dict, ...], tuple[dict, ...]]:
 def __fetch_publications__(conn, ids):
     """Fetch publication from database by ID."""
     paramstr = ",".join(["(%s, %s)"] * len(ids))
@@ -146,6 +145,7 @@ def publications_differences(file_data, db_data, pubmed_ids):
     pass
 
 
+def compute_differences(conn, file_contents, pheno_ids, pheno_xref_ids, pubmed_ids) -> tuple[tuple[dict, ...], tuple[dict, ...], tuple[dict, ...]]:
     """Compute differences between data in DB and edited data."""
     logger.info("Computing differences.")
     # 1. Basic Phenotype data differences
@@ -229,16 +229,21 @@ def run(conn, job):
     """Process the data and update it."""
     file_contents = tuple(sorted(read_file(Path(job["metadata"]["edit-file"])),
                                  key=lambda item: item["phenotype_id"]))
-    pheno_ids, pheno_xref_ids = reduce(
+    pheno_ids, pheno_xref_ids, pubmed_ids = reduce(
         lambda coll, curr: (
             coll[0] + (curr["phenotype_id"],),
-            coll[1] + ((curr["phenotype_id"], curr["xref_id"]),)),
+            coll[1] + ((curr["phenotype_id"], curr["xref_id"]),),
+            coll[2].union(set([curr["PubMed_ID"]]))),
         file_contents,
-        (tuple(), tuple()))
+        (tuple(), tuple(), set([None])))
     check_ids(conn, pheno_xref_ids)
     check_for_mandatory_fields()
     # stop running here if any errors are found.
-    compute_differences(conn, file_contents, pheno_ids, pheno_xref_ids)
+    compute_differences(conn,
+                        file_contents,
+                        pheno_ids,
+                        pheno_xref_ids,
+                        pubmed_ids)
     update_descriptions()
     link_publications()
     update_values()