diff options
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/phenotypes_bulk_edit.py | 42 |
1 files changed, 8 insertions, 34 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py index 20fa66b..6d277e5 100644 --- a/scripts/phenotypes_bulk_edit.py +++ b/scripts/phenotypes_bulk_edit.py @@ -13,6 +13,7 @@ from MySQLdb.cursors import DictCursor from gn_libs import jobs, mysqldb, sqlite3 import uploader.publications.pubmed as pmed +from uploader.publications.misc import publications_differences from uploader.publications.models import fetch_phenotype_publications logging.basicConfig( format="%(asctime)s — %(filename)s:%(lineno)s — %(levelname)s: %(message)s") @@ -85,36 +86,6 @@ def descriptions_differences(file_data, db_data) -> dict[str, str]: return diff -def publications_differences(conn, file_data, db_data, pubmed_ids) -> dict: - """Compute differences in the publications.""" - logger.info("Computing differences in publications.") - assert len(file_data) == len(db_data), "Publication counts differ!" - db_pubmed_ids = reduce(lambda coll, curr: coll.union(set([curr["PubMed_ID"]])), - db_data, - set([None])) - - pubmedid_to_id_map = { - f"{row['PhenotypeId']}::{row['xref_id']}": row["PublicationId"] for row in db_data - } - new_pubmed_ids = tuple(pubmed_ids.difference(db_pubmed_ids)) - new_publications = __save_new_publications__( - conn, pmed.fetch_publications(new_pubmed_ids), new_pubmed_ids) - new_pubmedid_to_id_map = { - row["PubMed_ID"]: new_publications.get( - row["PubMed_ID"], pubmedid_to_id_map[f"{row['phenotype_id']}::{row['xref_id']}"]) - for row in file_data - } - - return tuple( - item for item in ({ - "PhenotypeId": row["phenotype_id"], - "xref_id": row["xref_id"], - "PublicationId": new_pubmedid_to_id_map.get( - row["PubMed_ID"], pubmedid_to_id_map[f"{row['phenotype_id']}::{row['xref_id']}"]) - } for row in file_data) - if item["PublicationId"] != pubmedid_to_id_map[f"{item['PhenotypeId']}::{item['xref_id']}"]) - - def compute_differences( conn, file_contents, @@ -129,12 +100,15 @@ def compute_differences( desc_diff = descriptions_differences( file_contents, __fetch_phenotypes__(conn, pheno_ids)) logger.debug("DESCRIPTIONS DIFFERENCES: %s", desc_diff) + # b. Publications differences + db_publications = fetch_phenotype_publications(conn, pheno_xref_ids) + new_publications = create_new_publications( + conn, pmed.fetch_publications(new_pubmed_ids)) pub_diff = publications_differences( - conn, - file_contents, - fetch_phenotype_publications(conn, pheno_xref_ids), - pubmed_ids) + file_contents, db_publications, { + row["PubMed_ID"]: row["PublicationId"] + for row in db_publications + new_publications}) logger.debug("Publications diff: %s", pub_diff) # 2. Data differences # data_diff = data_differences(...) |