diff options
Diffstat (limited to 'uploader')
-rw-r--r-- | uploader/__init__.py | 2 | ||||
-rw-r--r-- | uploader/phenotypes/misc.py | 26 | ||||
-rw-r--r-- | uploader/phenotypes/views.py | 52 | ||||
-rw-r--r-- | uploader/publications/misc.py | 19 | ||||
-rw-r--r-- | uploader/publications/models.py | 60 | ||||
-rw-r--r-- | uploader/publications/pubmed.py | 8 |
6 files changed, 141 insertions, 26 deletions
diff --git a/uploader/__init__.py b/uploader/__init__.py index e25fc5b..23e66c1 100644 --- a/uploader/__init__.py +++ b/uploader/__init__.py @@ -54,7 +54,7 @@ def setup_logging(app: Flask) -> Flask: return __log_gunicorn__(app) if bool(software) else __log_dev__(app) -def create_app(config: dir): +def create_app(config: dict = {}): """The application factory. config: dict diff --git a/uploader/phenotypes/misc.py b/uploader/phenotypes/misc.py new file mode 100644 index 0000000..cbe3b7f --- /dev/null +++ b/uploader/phenotypes/misc.py @@ -0,0 +1,26 @@ +"""Miscellaneous functions handling phenotypes and phenotypes data.""" +import logging + +logger = logging.getLogger(__name__) + + +def phenotypes_data_differences( + filedata: tuple[dict, ...], dbdata: tuple[dict, ...] +) -> tuple[dict, ...]: + """Compute differences between file data and db data""" + diff = tuple() + for filerow, dbrow in zip( + sorted(filedata, key=lambda item: (item["phenotype_id"], item["xref_id"])), + sorted(dbdata, key=lambda item: (item["PhenotypeId"], item["xref_id"]))): + for samplename, value in filerow["data"].items(): + if value != dbrow["data"].get(samplename, {}).get("value"): + diff = diff + ({ + "PhenotypeId": filerow["phenotype_id"], + "xref_id": filerow["xref_id"], + "DataId": dbrow["DataId"], + "StrainId": dbrow["data"].get(samplename, {}).get("StrainId"), + "StrainName": samplename, + "value": value + },) + + return diff diff --git a/uploader/phenotypes/views.py b/uploader/phenotypes/views.py index 9c737fc..a18c44d 100644 --- a/uploader/phenotypes/views.py +++ b/uploader/phenotypes/views.py @@ -868,6 +868,17 @@ def process_phenotype_data_for_download(pheno: dict) -> dict: } +BULK_EDIT_COMMON_FIELDNAMES = [ + "UniqueIdentifier", + "Post_publication_description", + "Pre_publication_abbreviation", + "Pre_publication_description", + "Original_description", + "Post_publication_abbreviation", + "PubMed_ID" +] + + @phenotypesbp.route( "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" "/<int:dataset_id>/edit-download", @@ -915,15 +926,9 @@ def edit_download_phenotype_data(# pylint: disable=[unused-argument] "comment line. This line, and all the lines above it, are " "all comment lines. Comment lines will be ignored.\n") writer = csv.DictWriter(outfile, - fieldnames=[ - "UniqueIdentifier", - "Post_publication_description", - "Pre_publication_abbreviation", - "Pre_publication_description", - "Original_description", - "Post_publication_abbreviation", - "PubMed_ID" - ] + samples_list, + fieldnames= ( + BULK_EDIT_COMMON_FIELDNAMES + + samples_list), dialect="excel-tab") writer.writeheader() writer.writerows(data) @@ -967,23 +972,28 @@ def edit_upload_phenotype_data(# pylint: disable=[unused-argument] jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"] with sqlite3.connection(jobs_db) as conn: job_id = uuid.uuid4() + job_cmd = [ + sys.executable, "-u", + "-m", "scripts.phenotypes_bulk_edit", + app.config["SQL_URI"], + jobs_db, + str(job_id), + "--log-level", + logging.getLevelName( + app.logger.getEffectiveLevel() + ).lower() + ] + app.logger.debug("Phenotype-edit, bulk-upload command: %s", job_cmd) _job = gnlibs_jobs.launch_job( gnlibs_jobs.initialise_job(conn, job_id, - [ - sys.executable, "-u", - "-m", "scripts.phenotypes_bulk_edit", - app.config["SQL_URI"], - jobs_db, - str(job_id), - "--log-level", - logging.getLevelName( - app.logger.getEffectiveLevel() - ).lower() - ], + job_cmd, "phenotype-bulk-edit", extra_meta = { - "edit-file": str(edit_file) + "edit-file": str(edit_file), + "species-id": species["SpeciesId"], + "population-id": population["Id"], + "dataset-id": dataset["Id"] }), jobs_db, f"{app.config['UPLOAD_FOLDER']}/job_errors", diff --git a/uploader/publications/misc.py b/uploader/publications/misc.py index d93ecdd..fca6f71 100644 --- a/uploader/publications/misc.py +++ b/uploader/publications/misc.py @@ -4,7 +4,22 @@ def publications_differences( filedata: tuple[dict, ...], dbdata: tuple[dict, ...], - pubmedid2pubidmap: dict[str, int] + pubmedid2pubidmap: tuple[dict, ...] ) -> tuple[dict, ...]: """Compute the differences between file data and db data""" - return tuple() + diff = tuple() + for filerow, dbrow in zip( + sorted(filedata, key=lambda item: ( + item["phenotype_id"], item["xref_id"])), + sorted(dbdata, key=lambda item: ( + item["PhenotypeId"], item["xref_id"]))): + if filerow["PubMed_ID"] == dbrow["PubMed_ID"]: + continue + + newpubmed = filerow["PubMed_ID"] + diff = diff + ({ + **dbrow, + "PubMed_ID": newpubmed, + "PublicationId": pubmedid2pubidmap.get(newpubmed)},) + + return diff diff --git a/uploader/publications/models.py b/uploader/publications/models.py index 89da06c..3fc9542 100644 --- a/uploader/publications/models.py +++ b/uploader/publications/models.py @@ -1,7 +1,17 @@ """Module to handle persistence and retrieval of publication to/from MariaDB""" +import logging + +from MySQLdb.cursors import DictCursor + +from gn_libs.mysqldb import Connection, debug_query + +logger = logging.getLogger(__name__) + def fetch_phenotype_publications( - conn, ids: tuple[tuple[int, int], ...]) -> tuple[dict, ...]: + conn: Connection, + ids: tuple[tuple[int, int], ...] +) -> tuple[dict, ...]: """Fetch publication from database by ID.""" paramstr = ",".join(["(%s, %s)"] * len(ids)) query = ( @@ -13,3 +23,51 @@ def fetch_phenotype_publications( with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute(query, tuple(item for row in ids for item in row)) return tuple(dict(row) for row in cursor.fetchall()) + + +def create_new_publications( + conn: Connection, + publications: tuple[dict, ...] +) -> tuple[dict, ...]: + if len(publications) > 0: + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.executemany( + ("INSERT INTO " + "Publication( " + "PubMed_ID, Abstract, Authors, Title, Journal, Volume, Pages, " + "Month, Year" + ") " + "VALUES(" + "%(pubmed_id)s, %(abstract)s, %(authors)s, %(title)s, " + "%(journal)s, %(volume)s, %(pages)s, %(month)s, %(year)s" + ") " + "ON DUPLICATE KEY UPDATE " + "Abstract=VALUES(Abstract), Authors=VALUES(Authors), " + "Title=VALUES(Title), Journal=VALUES(Journal), " + "Volume=VALUES(Volume), Pages=VALUES(pages), " + "Month=VALUES(Month), Year=VALUES(Year) " + "RETURNING *"), + publications) + return tuple({ + **row, "PublicationId": row["Id"] + } for row in cursor.fetchall()) + return tuple() + + +def update_publications(conn: Connection , publications: tuple[dict, ...]) -> tuple[dict, ...]: + """Update details for multiple publications""" + if len(publications) > 0: + with conn.cursor(cursorclass=DictCursor) as cursor: + logger.debug("UPDATING PUBLICATIONS: %s", publications) + cursor.executemany( + ("UPDATE Publication SET " + "PubMed_ID=%(pubmed_id)s, Abstract=%(abstract)s, " + "Authors=%(authors)s, Title=%(title)s, Journal=%(journal)s, " + "Volume=%(volume)s, Pages=%(pages)s, Month=%(month)s, " + "Year=%(year)s " + "WHERE Id=%(publication_id)s"), + publications) + debug_query(cursor, logger) + return publications + return tuple() + return tuple() diff --git a/uploader/publications/pubmed.py b/uploader/publications/pubmed.py index d984d99..ed9b652 100644 --- a/uploader/publications/pubmed.py +++ b/uploader/publications/pubmed.py @@ -1,4 +1,10 @@ """Module to interact with NCBI's PubMed""" +import logging + +import requests +from lxml import etree + +logger = logging.getLogger(__name__) def __pub_date__(pubdate: etree.Element): @@ -44,7 +50,7 @@ def __abstract__(article: etree.Element) -> str: def __article__(pubmed_article: etree.Element) -> dict: article = pubmed_article.find("MedlineCitation/Article") return { - "pubmed_id": pubmed_article.find("MedlineCitation/PMID").text, + "pubmed_id": int(pubmed_article.find("MedlineCitation/PMID").text), "title": article.find("ArticleTitle").text, **__journal__(article.find("Journal")), "abstract": __abstract__(article), |