diff options
author | Frederick Muriuki Muriithi | 2025-03-26 16:07:31 -0500 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2025-03-26 16:07:31 -0500 |
commit | d974f684ef9f73ba2caf7921f7effd24db163d21 (patch) | |
tree | 75ffd600f1e6acd4d3d38cc86315c9d1cb5944ee /scripts | |
parent | 9337909f5d9e3eb2b094988d0083b6f37a47c8cd (diff) | |
download | gn-uploader-d974f684ef9f73ba2caf7921f7effd24db163d21.tar.gz |
Pass PubMed IDs from the file to difference computation function.
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/phenotypes_bulk_edit.py | 15 |
1 files changed, 10 insertions, 5 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py index 269f15e..b647199 100644 --- a/scripts/phenotypes_bulk_edit.py +++ b/scripts/phenotypes_bulk_edit.py @@ -82,7 +82,6 @@ def descriptions_differences(file_data, db_data) -> dict[str, str]: return diff -def compute_differences(conn, file_contents, pheno_ids, pheno_xref_ids) -> tuple[tuple[dict, ...], tuple[dict, ...], tuple[dict, ...]]: def __fetch_publications__(conn, ids): """Fetch publication from database by ID.""" paramstr = ",".join(["(%s, %s)"] * len(ids)) @@ -146,6 +145,7 @@ def publications_differences(file_data, db_data, pubmed_ids): pass +def compute_differences(conn, file_contents, pheno_ids, pheno_xref_ids, pubmed_ids) -> tuple[tuple[dict, ...], tuple[dict, ...], tuple[dict, ...]]: """Compute differences between data in DB and edited data.""" logger.info("Computing differences.") # 1. Basic Phenotype data differences @@ -229,16 +229,21 @@ def run(conn, job): """Process the data and update it.""" file_contents = tuple(sorted(read_file(Path(job["metadata"]["edit-file"])), key=lambda item: item["phenotype_id"])) - pheno_ids, pheno_xref_ids = reduce( + pheno_ids, pheno_xref_ids, pubmed_ids = reduce( lambda coll, curr: ( coll[0] + (curr["phenotype_id"],), - coll[1] + ((curr["phenotype_id"], curr["xref_id"]),)), + coll[1] + ((curr["phenotype_id"], curr["xref_id"]),), + coll[2].union(set([curr["PubMed_ID"]]))), file_contents, - (tuple(), tuple())) + (tuple(), tuple(), set([None]))) check_ids(conn, pheno_xref_ids) check_for_mandatory_fields() # stop running here if any errors are found. - compute_differences(conn, file_contents, pheno_ids, pheno_xref_ids) + compute_differences(conn, + file_contents, + pheno_ids, + pheno_xref_ids, + pubmed_ids) update_descriptions() link_publications() update_values() |