diff options
-rw-r--r-- | scripts/phenotypes_bulk_edit.py | 15 |
1 files changed, 10 insertions, 5 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py index 269f15e..b647199 100644 --- a/scripts/phenotypes_bulk_edit.py +++ b/scripts/phenotypes_bulk_edit.py @@ -82,7 +82,6 @@ def descriptions_differences(file_data, db_data) -> dict[str, str]: return diff -def compute_differences(conn, file_contents, pheno_ids, pheno_xref_ids) -> tuple[tuple[dict, ...], tuple[dict, ...], tuple[dict, ...]]: def __fetch_publications__(conn, ids): """Fetch publication from database by ID.""" paramstr = ",".join(["(%s, %s)"] * len(ids)) @@ -146,6 +145,7 @@ def publications_differences(file_data, db_data, pubmed_ids): pass +def compute_differences(conn, file_contents, pheno_ids, pheno_xref_ids, pubmed_ids) -> tuple[tuple[dict, ...], tuple[dict, ...], tuple[dict, ...]]: """Compute differences between data in DB and edited data.""" logger.info("Computing differences.") # 1. Basic Phenotype data differences @@ -229,16 +229,21 @@ def run(conn, job): """Process the data and update it.""" file_contents = tuple(sorted(read_file(Path(job["metadata"]["edit-file"])), key=lambda item: item["phenotype_id"])) - pheno_ids, pheno_xref_ids = reduce( + pheno_ids, pheno_xref_ids, pubmed_ids = reduce( lambda coll, curr: ( coll[0] + (curr["phenotype_id"],), - coll[1] + ((curr["phenotype_id"], curr["xref_id"]),)), + coll[1] + ((curr["phenotype_id"], curr["xref_id"]),), + coll[2].union(set([curr["PubMed_ID"]]))), file_contents, - (tuple(), tuple())) + (tuple(), tuple(), set([None]))) check_ids(conn, pheno_xref_ids) check_for_mandatory_fields() # stop running here if any errors are found. - compute_differences(conn, file_contents, pheno_ids, pheno_xref_ids) + compute_differences(conn, + file_contents, + pheno_ids, + pheno_xref_ids, + pubmed_ids) update_descriptions() link_publications() update_values() |