aboutsummaryrefslogtreecommitdiff
path: root/uploader
diff options
context:
space:
mode:
Diffstat (limited to 'uploader')
-rw-r--r--uploader/__init__.py2
-rw-r--r--uploader/phenotypes/misc.py26
-rw-r--r--uploader/phenotypes/views.py52
-rw-r--r--uploader/publications/misc.py19
-rw-r--r--uploader/publications/models.py60
-rw-r--r--uploader/publications/pubmed.py8
6 files changed, 141 insertions, 26 deletions
diff --git a/uploader/__init__.py b/uploader/__init__.py
index e25fc5b..23e66c1 100644
--- a/uploader/__init__.py
+++ b/uploader/__init__.py
@@ -54,7 +54,7 @@ def setup_logging(app: Flask) -> Flask:
return __log_gunicorn__(app) if bool(software) else __log_dev__(app)
-def create_app(config: dir):
+def create_app(config: dict = {}):
"""The application factory.
config: dict
diff --git a/uploader/phenotypes/misc.py b/uploader/phenotypes/misc.py
new file mode 100644
index 0000000..cbe3b7f
--- /dev/null
+++ b/uploader/phenotypes/misc.py
@@ -0,0 +1,26 @@
+"""Miscellaneous functions handling phenotypes and phenotypes data."""
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def phenotypes_data_differences(
+ filedata: tuple[dict, ...], dbdata: tuple[dict, ...]
+) -> tuple[dict, ...]:
+ """Compute differences between file data and db data"""
+ diff = tuple()
+ for filerow, dbrow in zip(
+ sorted(filedata, key=lambda item: (item["phenotype_id"], item["xref_id"])),
+ sorted(dbdata, key=lambda item: (item["PhenotypeId"], item["xref_id"]))):
+ for samplename, value in filerow["data"].items():
+ if value != dbrow["data"].get(samplename, {}).get("value"):
+ diff = diff + ({
+ "PhenotypeId": filerow["phenotype_id"],
+ "xref_id": filerow["xref_id"],
+ "DataId": dbrow["DataId"],
+ "StrainId": dbrow["data"].get(samplename, {}).get("StrainId"),
+ "StrainName": samplename,
+ "value": value
+ },)
+
+ return diff
diff --git a/uploader/phenotypes/views.py b/uploader/phenotypes/views.py
index 9c737fc..a18c44d 100644
--- a/uploader/phenotypes/views.py
+++ b/uploader/phenotypes/views.py
@@ -868,6 +868,17 @@ def process_phenotype_data_for_download(pheno: dict) -> dict:
}
+BULK_EDIT_COMMON_FIELDNAMES = [
+ "UniqueIdentifier",
+ "Post_publication_description",
+ "Pre_publication_abbreviation",
+ "Pre_publication_description",
+ "Original_description",
+ "Post_publication_abbreviation",
+ "PubMed_ID"
+]
+
+
@phenotypesbp.route(
"<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
"/<int:dataset_id>/edit-download",
@@ -915,15 +926,9 @@ def edit_download_phenotype_data(# pylint: disable=[unused-argument]
"comment line. This line, and all the lines above it, are "
"all comment lines. Comment lines will be ignored.\n")
writer = csv.DictWriter(outfile,
- fieldnames=[
- "UniqueIdentifier",
- "Post_publication_description",
- "Pre_publication_abbreviation",
- "Pre_publication_description",
- "Original_description",
- "Post_publication_abbreviation",
- "PubMed_ID"
- ] + samples_list,
+ fieldnames= (
+ BULK_EDIT_COMMON_FIELDNAMES +
+ samples_list),
dialect="excel-tab")
writer.writeheader()
writer.writerows(data)
@@ -967,23 +972,28 @@ def edit_upload_phenotype_data(# pylint: disable=[unused-argument]
jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"]
with sqlite3.connection(jobs_db) as conn:
job_id = uuid.uuid4()
+ job_cmd = [
+ sys.executable, "-u",
+ "-m", "scripts.phenotypes_bulk_edit",
+ app.config["SQL_URI"],
+ jobs_db,
+ str(job_id),
+ "--log-level",
+ logging.getLevelName(
+ app.logger.getEffectiveLevel()
+ ).lower()
+ ]
+ app.logger.debug("Phenotype-edit, bulk-upload command: %s", job_cmd)
_job = gnlibs_jobs.launch_job(
gnlibs_jobs.initialise_job(conn,
job_id,
- [
- sys.executable, "-u",
- "-m", "scripts.phenotypes_bulk_edit",
- app.config["SQL_URI"],
- jobs_db,
- str(job_id),
- "--log-level",
- logging.getLevelName(
- app.logger.getEffectiveLevel()
- ).lower()
- ],
+ job_cmd,
"phenotype-bulk-edit",
extra_meta = {
- "edit-file": str(edit_file)
+ "edit-file": str(edit_file),
+ "species-id": species["SpeciesId"],
+ "population-id": population["Id"],
+ "dataset-id": dataset["Id"]
}),
jobs_db,
f"{app.config['UPLOAD_FOLDER']}/job_errors",
diff --git a/uploader/publications/misc.py b/uploader/publications/misc.py
index d93ecdd..fca6f71 100644
--- a/uploader/publications/misc.py
+++ b/uploader/publications/misc.py
@@ -4,7 +4,22 @@
def publications_differences(
filedata: tuple[dict, ...],
dbdata: tuple[dict, ...],
- pubmedid2pubidmap: dict[str, int]
+ pubmedid2pubidmap: tuple[dict, ...]
) -> tuple[dict, ...]:
"""Compute the differences between file data and db data"""
- return tuple()
+ diff = tuple()
+ for filerow, dbrow in zip(
+ sorted(filedata, key=lambda item: (
+ item["phenotype_id"], item["xref_id"])),
+ sorted(dbdata, key=lambda item: (
+ item["PhenotypeId"], item["xref_id"]))):
+ if filerow["PubMed_ID"] == dbrow["PubMed_ID"]:
+ continue
+
+ newpubmed = filerow["PubMed_ID"]
+ diff = diff + ({
+ **dbrow,
+ "PubMed_ID": newpubmed,
+ "PublicationId": pubmedid2pubidmap.get(newpubmed)},)
+
+ return diff
diff --git a/uploader/publications/models.py b/uploader/publications/models.py
index 89da06c..3fc9542 100644
--- a/uploader/publications/models.py
+++ b/uploader/publications/models.py
@@ -1,7 +1,17 @@
"""Module to handle persistence and retrieval of publication to/from MariaDB"""
+import logging
+
+from MySQLdb.cursors import DictCursor
+
+from gn_libs.mysqldb import Connection, debug_query
+
+logger = logging.getLogger(__name__)
+
def fetch_phenotype_publications(
- conn, ids: tuple[tuple[int, int], ...]) -> tuple[dict, ...]:
+ conn: Connection,
+ ids: tuple[tuple[int, int], ...]
+) -> tuple[dict, ...]:
"""Fetch publication from database by ID."""
paramstr = ",".join(["(%s, %s)"] * len(ids))
query = (
@@ -13,3 +23,51 @@ def fetch_phenotype_publications(
with conn.cursor(cursorclass=DictCursor) as cursor:
cursor.execute(query, tuple(item for row in ids for item in row))
return tuple(dict(row) for row in cursor.fetchall())
+
+
+def create_new_publications(
+ conn: Connection,
+ publications: tuple[dict, ...]
+) -> tuple[dict, ...]:
+ if len(publications) > 0:
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ cursor.executemany(
+ ("INSERT INTO "
+ "Publication( "
+ "PubMed_ID, Abstract, Authors, Title, Journal, Volume, Pages, "
+ "Month, Year"
+ ") "
+ "VALUES("
+ "%(pubmed_id)s, %(abstract)s, %(authors)s, %(title)s, "
+ "%(journal)s, %(volume)s, %(pages)s, %(month)s, %(year)s"
+ ") "
+ "ON DUPLICATE KEY UPDATE "
+ "Abstract=VALUES(Abstract), Authors=VALUES(Authors), "
+ "Title=VALUES(Title), Journal=VALUES(Journal), "
+ "Volume=VALUES(Volume), Pages=VALUES(pages), "
+ "Month=VALUES(Month), Year=VALUES(Year) "
+ "RETURNING *"),
+ publications)
+ return tuple({
+ **row, "PublicationId": row["Id"]
+ } for row in cursor.fetchall())
+ return tuple()
+
+
+def update_publications(conn: Connection , publications: tuple[dict, ...]) -> tuple[dict, ...]:
+ """Update details for multiple publications"""
+ if len(publications) > 0:
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ logger.debug("UPDATING PUBLICATIONS: %s", publications)
+ cursor.executemany(
+ ("UPDATE Publication SET "
+ "PubMed_ID=%(pubmed_id)s, Abstract=%(abstract)s, "
+ "Authors=%(authors)s, Title=%(title)s, Journal=%(journal)s, "
+ "Volume=%(volume)s, Pages=%(pages)s, Month=%(month)s, "
+ "Year=%(year)s "
+ "WHERE Id=%(publication_id)s"),
+ publications)
+ debug_query(cursor, logger)
+ return publications
+ return tuple()
+ return tuple()
diff --git a/uploader/publications/pubmed.py b/uploader/publications/pubmed.py
index d984d99..ed9b652 100644
--- a/uploader/publications/pubmed.py
+++ b/uploader/publications/pubmed.py
@@ -1,4 +1,10 @@
"""Module to interact with NCBI's PubMed"""
+import logging
+
+import requests
+from lxml import etree
+
+logger = logging.getLogger(__name__)
def __pub_date__(pubdate: etree.Element):
@@ -44,7 +50,7 @@ def __abstract__(article: etree.Element) -> str:
def __article__(pubmed_article: etree.Element) -> dict:
article = pubmed_article.find("MedlineCitation/Article")
return {
- "pubmed_id": pubmed_article.find("MedlineCitation/PMID").text,
+ "pubmed_id": int(pubmed_article.find("MedlineCitation/PMID").text),
"title": article.find("ArticleTitle").text,
**__journal__(article.find("Journal")),
"abstract": __abstract__(article),