aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--scripts/phenotypes_bulk_edit.py15
1 files changed, 10 insertions, 5 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py
index 269f15e..b647199 100644
--- a/scripts/phenotypes_bulk_edit.py
+++ b/scripts/phenotypes_bulk_edit.py
@@ -82,7 +82,6 @@ def descriptions_differences(file_data, db_data) -> dict[str, str]:
return diff
-def compute_differences(conn, file_contents, pheno_ids, pheno_xref_ids) -> tuple[tuple[dict, ...], tuple[dict, ...], tuple[dict, ...]]:
def __fetch_publications__(conn, ids):
"""Fetch publication from database by ID."""
paramstr = ",".join(["(%s, %s)"] * len(ids))
@@ -146,6 +145,7 @@ def publications_differences(file_data, db_data, pubmed_ids):
pass
+def compute_differences(conn, file_contents, pheno_ids, pheno_xref_ids, pubmed_ids) -> tuple[tuple[dict, ...], tuple[dict, ...], tuple[dict, ...]]:
"""Compute differences between data in DB and edited data."""
logger.info("Computing differences.")
# 1. Basic Phenotype data differences
@@ -229,16 +229,21 @@ def run(conn, job):
"""Process the data and update it."""
file_contents = tuple(sorted(read_file(Path(job["metadata"]["edit-file"])),
key=lambda item: item["phenotype_id"]))
- pheno_ids, pheno_xref_ids = reduce(
+ pheno_ids, pheno_xref_ids, pubmed_ids = reduce(
lambda coll, curr: (
coll[0] + (curr["phenotype_id"],),
- coll[1] + ((curr["phenotype_id"], curr["xref_id"]),)),
+ coll[1] + ((curr["phenotype_id"], curr["xref_id"]),),
+ coll[2].union(set([curr["PubMed_ID"]]))),
file_contents,
- (tuple(), tuple()))
+ (tuple(), tuple(), set([None])))
check_ids(conn, pheno_xref_ids)
check_for_mandatory_fields()
# stop running here if any errors are found.
- compute_differences(conn, file_contents, pheno_ids, pheno_xref_ids)
+ compute_differences(conn,
+ file_contents,
+ pheno_ids,
+ pheno_xref_ids,
+ pubmed_ids)
update_descriptions()
link_publications()
update_values()