diff options
63 files changed, 3765 insertions, 564 deletions
diff --git a/quality_control/checks.py b/quality_control/checks.py index bdfd12b..bb05e31 100644 --- a/quality_control/checks.py +++ b/quality_control/checks.py @@ -52,12 +52,15 @@ def decimal_places_pattern(mini: int, maxi: Optional[int] = None) -> re.Pattern: + r")$" ) -def decimal_points_error(filename: str,# pylint: disable=[too-many-arguments] - lineno: int, - field: str, - value: str, - mini: int, - maxi: Optional[int] = None) -> Optional[InvalidValue]: +def decimal_points_error( + # pylint: disable=[too-many-arguments, too-many-positional-arguments] + filename: str, + lineno: int, + field: str, + value: str, + mini: int, + maxi: Optional[int] = None +) -> Optional[InvalidValue]: """ Check that 'value' in a decimal number with the appropriate decimal places. """ diff --git a/quality_control/parsing.py b/quality_control/parsing.py index f1d21fc..7a8185d 100644 --- a/quality_control/parsing.py +++ b/quality_control/parsing.py @@ -104,23 +104,22 @@ def collect_errors( if line_number == 1: consistent_columns_checker = make_column_consistency_checker( filename, line) - for error in __process_errors__( - filename, line_number, line, - partial(header_errors, strains=strains), - errors): - yield error + yield from __process_errors__( + filename, line_number, line, + partial(header_errors, strains=strains), + errors) if line_number != 1: - col_consistency_error = consistent_columns_checker(line_number, line) + col_consistency_error = consistent_columns_checker(# pylint: disable=[possibly-used-before-assignment] + line_number, line) if col_consistency_error: yield col_consistency_error - for error in __process_errors__( + yield from __process_errors__( filename, line_number, line, ( average_errors if filetype == FileType.AVERAGE else se_errors), - errors): - yield error + errors) if update_progress: update_progress(line_number, line) diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py index dfa84ba..06175ce 100644 --- a/r_qtl/r_qtl2.py +++ b/r_qtl/r_qtl2.py @@ -16,7 +16,7 @@ from r_qtl.exceptions import InvalidFormat, MissingFileException FILE_TYPES = ( "geno", "founder_geno", "pheno", "covar", "phenocovar", "gmap", "pmap", - "phenose") + "phenose", "phenonum") __CONTROL_FILE_ERROR_MESSAGE__ = ( "The zipped bundle that was provided does not contain a valid control file " @@ -575,8 +575,30 @@ def read_text_file(filepath: Union[str, Path]) -> Iterator[str]: def read_csv_file(filepath: Union[str, Path], separator: str = ",", comment_char: str = "#") -> Iterator[tuple[str, ...]]: - """Read a file as a csv file.""" + """Read a file as a csv file. This does not process the N/A values.""" for line in read_text_file(filepath): if line.startswith(comment_char): continue yield tuple(field.strip() for field in line.split(separator)) + + +def read_csv_file_headers( + filepath: Union[str, Path], + transposed: bool, + separator: str = ",", + comment_char: str = "#" +) -> tuple[str, ...]: + """Read the 'true' headers of a CSV file.""" + headers = tuple() + for line in read_text_file(filepath): + if line.startswith(comment_char): + continue + + line = tuple(field.strip() for field in line.split(separator)) + if not transposed: + return line + + headers = headers + (line[0],) + continue + + return headers diff --git a/scripts/cli_parser.py b/scripts/cli_parser.py index d42ae66..0c91c5e 100644 --- a/scripts/cli_parser.py +++ b/scripts/cli_parser.py @@ -23,7 +23,8 @@ def init_cli_parser(program: str, description: Optional[str] = None) -> Argument "--loglevel", type=str, default="INFO", - choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], + choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL", + "debug", "info", "warning", "error", "critical"], help="The severity of events to track with the logger.") return parser diff --git a/scripts/insert_samples.py b/scripts/insert_samples.py index 1b0a052..742c4ae 100644 --- a/scripts/insert_samples.py +++ b/scripts/insert_samples.py @@ -3,6 +3,7 @@ import sys import logging import pathlib import argparse +import traceback import MySQLdb as mdb from redis import Redis @@ -73,6 +74,7 @@ def insert_samples(conn: mdb.Connection,# pylint: disable=[too-many-arguments] print("Samples upload successfully completed.") return 0 + if __name__ == "__main__": def cli_args(): @@ -127,7 +129,7 @@ if __name__ == "__main__": def main(): """Run script to insert samples into the database.""" - + status_code = 1 # Exit with an Exception args = cli_args() check_db(args.databaseuri) check_redis(args.redisuri) @@ -137,13 +139,19 @@ if __name__ == "__main__": with (Redis.from_url(args.redisuri, decode_responses=True) as rconn, database_connection(args.databaseuri) as dbconn): - return insert_samples(dbconn, - rconn, - args.speciesid, - args.populationid, - args.samplesfile, - args.separator, - args.firstlineheading, - args.quotechar) + + try: + status_code = insert_samples(dbconn, + rconn, + args.speciesid, + args.populationid, + args.samplesfile, + args.separator, + args.firstlineheading, + args.quotechar) + except Exception as _exc: + print(traceback.format_exc(), file=sys.stderr) + + return status_code sys.exit(main()) diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py new file mode 100644 index 0000000..5ce37f3 --- /dev/null +++ b/scripts/load_phenotypes_to_db.py @@ -0,0 +1,518 @@ +import sys +import uuid +import json +import logging +import argparse +import datetime +from pathlib import Path +from zipfile import ZipFile +from typing import Any, Union +from urllib.parse import urljoin +from functools import reduce, partial + +from MySQLdb.cursors import Cursor, DictCursor + +from gn_libs import jobs, mysqldb, sqlite3, monadic_requests as mrequests + +from r_qtl import r_qtl2 as rqtl2 +from uploader.species.models import species_by_id +from uploader.population.models import population_by_species_and_id +from uploader.samples.models import samples_by_species_and_population +from uploader.phenotypes.models import ( + dataset_by_id, + save_phenotypes_data, + create_new_phenotypes, + quick_save_phenotypes_data) +from uploader.publications.models import ( + create_new_publications, + fetch_publication_by_id) + +from scripts.rqtl2.bundleutils import build_line_joiner, build_line_splitter + +logging.basicConfig( + format="%(asctime)s — %(filename)s:%(lineno)s — %(levelname)s: %(message)s") +logger = logging.getLogger(__name__) + + + +def __replace_na_strings__(line, na_strings): + return ((None if value in na_strings else value) for value in line) + + +def save_phenotypes( + cursor: mysqldb.Connection, + control_data: dict[str, Any], + filesdir: Path +) -> tuple[dict, ...]: + """Read `phenofiles` and save the phenotypes therein.""" + ## TODO: Replace with something like this: ## + # phenofiles = control_data["phenocovar"] + control_data.get( + # "gn-metadata", {}).get("pheno", []) + # + # This is meant to load (and merge) data from the "phenocovar" and + # "gn-metadata -> pheno" files into a single collection of phenotypes. + phenofiles = tuple(filesdir.joinpath(_file) for _file in control_data["phenocovar"]) + if len(phenofiles) <= 0: + return tuple() + + if control_data["phenocovar_transposed"]: + logger.info("Undoing transposition of the files rows and columns.") + phenofiles = ( + rqtl2.transpose_csv_with_rename( + _file, + build_line_splitter(control_data), + build_line_joiner(control_data)) + for _file in phenofiles) + + _headers = rqtl2.read_csv_file_headers(phenofiles[0], + control_data["phenocovar_transposed"], + control_data["sep"], + control_data["comment.char"]) + return create_new_phenotypes( + cursor, + (dict(zip(_headers, + __replace_na_strings__(line, control_data["na.strings"]))) + for filecontent + in (rqtl2.read_csv_file(path, + separator=control_data["sep"], + comment_char=control_data["comment.char"]) + for path in phenofiles) + for idx, line in enumerate(filecontent) + if idx != 0)) + + +def __fetch_next_dataid__(conn: mysqldb.Connection) -> int: + """Fetch the next available DataId value from the database.""" + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute( + "SELECT MAX(DataId) AS CurrentMaxDataId FROM PublishXRef") + return int(cursor.fetchone()["CurrentMaxDataId"]) + 1 + + +def __row_to_dataitems__( + sample_row: dict, + dataidmap: dict, + pheno_name2id: dict[str, int], + samples: dict +) -> tuple[dict, ...]: + samplename = sample_row["id"] + + return ({ + "phenotype_id": dataidmap[pheno_name2id[phenoname]]["phenotype_id"], + "data_id": dataidmap[pheno_name2id[phenoname]]["data_id"], + "sample_name": samplename, + "sample_id": samples[samplename]["Id"], + "value": phenovalue + } for phenoname, phenovalue in sample_row.items() if phenoname != "id") + + +def __build_dataitems__( + filetype, + phenofiles, + control_data, + samples, + dataidmap, + pheno_name2id +): + _headers = rqtl2.read_csv_file_headers( + phenofiles[0], + False, # Any transposed files have been un-transposed by this point + control_data["sep"], + control_data["comment.char"]) + _filescontents = ( + rqtl2.read_csv_file(path, + separator=control_data["sep"], + comment_char=control_data["comment.char"]) + for path in phenofiles) + _linescontents = ( + __row_to_dataitems__( + dict(zip(("id",) + _headers[1:], + __replace_na_strings__(line, control_data["na.strings"]))), + dataidmap, + pheno_name2id, + samples) + for linenum, line in (enumline for filecontent in _filescontents + for enumline in enumerate(filecontent)) + if linenum > 0) + return (item for items in _linescontents + for item in items + if item["value"] is not None) + + +def save_numeric_data( + conn: mysqldb.Connection, + dataidmap: dict, + pheno_name2id: dict[str, int], + samples: tuple[dict, ...], + control_data: dict, + filesdir: Path, + filetype: str, + table: str +): + """Read data from files and save to the database.""" + phenofiles = tuple( + filesdir.joinpath(_file) for _file in control_data[filetype]) + if len(phenofiles) <= 0: + return tuple() + + if control_data[f"{filetype}_transposed"]: + logger.info("Undoing transposition of the files rows and columns.") + phenofiles = tuple( + rqtl2.transpose_csv_with_rename( + _file, + build_line_splitter(control_data), + build_line_joiner(control_data)) + for _file in phenofiles) + + try: + logger.debug("Attempt quick save with `LOAD … INFILE`.") + return quick_save_phenotypes_data( + conn, + table, + __build_dataitems__( + filetype, + phenofiles, + control_data, + samples, + dataidmap, + pheno_name2id), + filesdir) + except Exception as _exc: + logger.debug("Could not use `LOAD … INFILE`, using raw query", + exc_info=True) + import time;time.sleep(60) + return save_phenotypes_data( + conn, + table, + __build_dataitems__( + filetype, + phenofiles, + control_data, + samples, + dataidmap, + pheno_name2id)) + + +save_pheno_data = partial(save_numeric_data, + filetype="pheno", + table="PublishData") + + +save_phenotypes_se = partial(save_numeric_data, + filetype="phenose", + table="PublishSE") + + +save_phenotypes_n = partial(save_numeric_data, + filetype="phenonum", + table="NStrain") + + +def cross_reference_phenotypes_publications_and_data( + conn: mysqldb.Connection, xref_data: tuple[dict, ...] +): + """Crossreference the phenotypes, publication and data.""" + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute("SELECT MAX(Id) CurrentMaxId FROM PublishXRef") + _nextid = int(cursor.fetchone()["CurrentMaxId"]) + 1 + _params = tuple({**row, "xref_id": _id} + for _id, row in enumerate(xref_data, start=_nextid)) + cursor.executemany( + ("INSERT INTO PublishXRef(" + "Id, InbredSetId, PhenotypeId, PublicationId, DataId, comments" + ") " + "VALUES (" + "%(xref_id)s, %(population_id)s, %(phenotype_id)s, " + "%(publication_id)s, %(data_id)s, 'Upload of new data.'" + ")"), + _params) + return _params + return tuple() + + +def update_auth(authserver, token, species, population, dataset, xrefdata): + """Grant the user access to their data.""" + # TODO Call into the auth server to: + # 1. Link the phenotypes with a user group + # - fetch group: http://localhost:8081/auth/user/group + # - link data to group: http://localhost:8081/auth/data/link/phenotype + # - *might need code update in gn-auth: remove restriction, perhaps* + # 2. Create resource (perhaps?) + # - Get resource categories: http://localhost:8081/auth/resource/categories + # - Create a new resource: http://localhost:80host:8081/auth/resource/create + # - single resource for all phenotypes + # - resource name from user, species, population, dataset, datetime? + # - User will have "ownership" of resource by default + # 3. Link data to the resource: http://localhost:8081/auth/resource/data/link + # - Update code to allow linking multiple items in a single request + _tries = 0 # TODO use this to limit how many tries before quiting and bailing + _delay = 1 + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json" + } + def authserveruri(endpoint): + return urljoin(authserver, endpoint) + + def __fetch_user_details__(): + logger.debug("… Fetching user details") + return mrequests.get( + authserveruri("/auth/user/"), + headers=headers + ) + + def __link_data__(user): + logger.debug("… linking uploaded data to user's group") + return mrequests.post( + authserveruri("/auth/data/link/phenotype"), + headers=headers, + json={ + "species_name": species["Name"], + "group_id": user["group"]["group_id"], + "selected": [ + { + "SpeciesId": species["SpeciesId"], + "InbredSetId": population["Id"], + "PublishFreezeId": dataset["Id"], + "dataset_name": dataset["Name"], + "dataset_fullname": dataset["FullName"], + "dataset_shortname": dataset["ShortName"], + "PublishXRefId": item["xref_id"] + } + for item in xrefdata + ], + "using-raw-ids": "on" + }).then(lambda ld_results: (user, ld_results)) + + def __fetch_phenotype_category_details__(user, linkeddata): + logger.debug("… fetching phenotype category details") + return mrequests.get( + authserveruri("/auth/resource/categories"), + headers=headers + ).then( + lambda categories: ( + user, + linkeddata, + next(category for category in categories + if category["resource_category_key"] == "phenotype")) + ) + + def __create_resource__(user, linkeddata, category): + logger.debug("… creating authorisation resource object") + now = datetime.datetime.now().isoformat() + return mrequests.post( + authserveruri("/auth/resource/create"), + headers=headers, + json={ + "resource_category": category["resource_category_id"], + "resource_name": (f"{user['email']}—{dataset['Name']}—{now}—" + f"{len(xrefdata)} phenotypes"), + "public": "off" + }).then(lambda cr_results: (user, linkeddata, cr_results)) + + def __attach_data_to_resource__(user, linkeddata, resource): + logger.debug("… attaching data to authorisation resource object") + return mrequests.post( + authserveruri("/auth/resource/data/link"), + headers=headers, + json={ + "dataset_type": "phenotype", + "resource_id": resource["resource_id"], + "data_link_ids": [ + item["data_link_id"] for item in linkeddata["traits"]] + }).then(lambda attc: (user, linkeddata, resource, attc)) + + def __handle_error__(resp): + logger.error("ERROR: Updating the authorisation for the data failed.") + logger.debug( + "ERROR: The response from the authorisation server was:\n\t%s", + resp.json()) + return 1 + + def __handle_success__(val): + logger.info( + "The authorisation for the data has been updated successfully.") + return 0 + + return __fetch_user_details__().then(__link_data__).then( + lambda result: __fetch_phenotype_category_details__(*result) + ).then( + lambda result: __create_resource__(*result) + ).then( + lambda result: __attach_data_to_resource__(*result) + ).either(__handle_error__, __handle_success__) + + +def load_data(conn: mysqldb.Connection, job: dict) -> int: + """Load the data attached in the given job.""" + _job_metadata = job["metadata"] + # Steps + # 0. Read data from the files: can be multiple files per type + # + _species = species_by_id(conn, int(_job_metadata["species_id"])) + _population = population_by_species_and_id( + conn, + _species["SpeciesId"], + int(_job_metadata["population_id"])) + _dataset = dataset_by_id( + conn, + _species["SpeciesId"], + _population["Id"], + int(_job_metadata["dataset_id"])) + # 1. Just retrive the publication: Don't create publications for now. + _publication = fetch_publication_by_id( + conn, int(_job_metadata.get("publication_id", "0"))) or {"Id": 0} + # 2. Save all new phenotypes: + # -> return phenotype IDs + bundle = Path(_job_metadata["bundle_file"]) + _control_data = rqtl2.control_data(bundle) + logger.info("Extracting the zipped bundle of files.") + _outdir = Path(bundle.parent, f"bundle_{bundle.stem}") + with ZipFile(str(bundle), "r") as zfile: + _files = rqtl2.extract(zfile, _outdir) + logger.info("Saving new phenotypes.") + _phenos = save_phenotypes(conn, _control_data, _outdir) + def __build_phenos_maps__(accumulator, current): + dataid, row = current + return ({ + **accumulator[0], + row["phenotype_id"]: { + "population_id": _population["Id"], + "phenotype_id": row["phenotype_id"], + "data_id": dataid, + "publication_id": _publication["Id"], + } + }, { + **accumulator[1], + row["id"]: row["phenotype_id"] + }) + dataidmap, pheno_name2id = reduce( + __build_phenos_maps__, + enumerate(_phenos, start=__fetch_next_dataid__(conn)), + ({},{})) + # 3. a. Fetch the strain names and IDS: create name->ID map + samples = { + row["Name"]: row + for row in samples_by_species_and_population( + conn, _species["SpeciesId"], _population["Id"])} + # b. Save all the data items (DataIds are vibes), return new IDs + logger.info("Saving new phenotypes data.") + _num_data_rows = save_pheno_data(conn=conn, + dataidmap=dataidmap, + pheno_name2id=pheno_name2id, + samples=samples, + control_data=_control_data, + filesdir=_outdir) + logger.info("Saved %s new phenotype data rows.", _num_data_rows) + # 4. Cross-reference Phenotype, Publication, and PublishData in PublishXRef + logger.info("Cross-referencing new phenotypes to their data and publications.") + _xrefs = cross_reference_phenotypes_publications_and_data( + conn, tuple(dataidmap.values())) + # 5. If standard errors and N exist, save them too + # (use IDs returned in `3. b.` above). + logger.info("Saving new phenotypes standard errors.") + _num_se_rows = save_phenotypes_se(conn=conn, + dataidmap=dataidmap, + pheno_name2id=pheno_name2id, + samples=samples, + control_data=_control_data, + filesdir=_outdir) + logger.info("Saved %s new phenotype standard error rows.", _num_se_rows) + + logger.info("Saving new phenotypes sample counts.") + _num_n_rows = save_phenotypes_n(conn=conn, + dataidmap=dataidmap, + pheno_name2id=pheno_name2id, + samples=samples, + control_data=_control_data, + filesdir=_outdir) + logger.info("Saved %s new phenotype sample counts rows.", _num_n_rows) + return (_species, _population, _dataset, _xrefs) + + +if __name__ == "__main__": + def parse_args(): + """Setup command-line arguments.""" + parser = argparse.ArgumentParser( + prog="load_phenotypes_to_db", + description="Process the phenotypes' data and load it into the database.") + parser.add_argument("db_uri", type=str, help="MariaDB/MySQL connection URL") + parser.add_argument( + "jobs_db_path", type=Path, help="Path to jobs' SQLite database.") + parser.add_argument("job_id", type=uuid.UUID, help="ID of the running job") + parser.add_argument( + "--log-level", + type=str, + help="Determines what is logged out.", + choices=("debug", "info", "warning", "error", "critical"), + default="info") + return parser.parse_args() + + def setup_logging(log_level: str): + """Setup logging for the script.""" + logger.setLevel(log_level) + logging.getLogger("uploader.phenotypes.models").setLevel(log_level) + + + def main(): + """Entry-point for this script.""" + args = parse_args() + setup_logging(args.log_level.upper()) + + with (mysqldb.database_connection(args.db_uri) as conn, + conn.cursor(cursorclass=DictCursor) as cursor, + sqlite3.connection(args.jobs_db_path) as jobs_conn): + job = jobs.job(jobs_conn, args.job_id) + + # Lock the PublishXRef/PublishData/PublishSE/NStrain here: Why? + # The `DataId` values are sequential, but not auto-increment + # Can't convert `PublishXRef`.`DataId` to AUTO_INCREMENT. + # `SELECT MAX(DataId) FROM PublishXRef;` + # How do you check for a table lock? + # https://oracle-base.com/articles/mysql/mysql-identify-locked-tables + # `SHOW OPEN TABLES LIKE 'Publish%';` + _db_tables_ = ( + "Species", + "InbredSet", + "Strain", + "StrainXRef", + "Publication", + "Phenotype", + "PublishXRef", + "PublishFreeze", + "PublishData", + "PublishSE", + "NStrain") + + logger.debug( + ("Locking database tables for the connection:" + + "".join("\n\t- %s" for _ in _db_tables_) + "\n"), + *_db_tables_) + cursor.execute(# Lock the tables to avoid race conditions + "LOCK TABLES " + ", ".join( + f"{_table} WRITE" for _table in _db_tables_)) + + db_results = load_data(conn, job) + jobs.update_metadata( + jobs_conn, + args.job_id, + "xref_ids", + json.dumps([xref["xref_id"] for xref in db_results[3]])) + + logger.info("Unlocking all database tables.") + cursor.execute("UNLOCK TABLES") + + # Update authorisations (break this down) — maybe loop until it works? + logger.info("Updating authorisation.") + _job_metadata = job["metadata"] + return update_auth(_job_metadata["authserver"], + _job_metadata["token"], + *db_results) + + + try: + sys.exit(main()) + except Exception as _exc: + logger.debug("Data loading failed… Halting!", + exc_info=True) + sys.exit(1) diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py new file mode 100644 index 0000000..cee5f4e --- /dev/null +++ b/scripts/phenotypes_bulk_edit.py @@ -0,0 +1,266 @@ +import sys +import uuid +import logging +import argparse +from pathlib import Path +from typing import Iterator +from functools import reduce + +from MySQLdb.cursors import DictCursor + +from gn_libs import jobs, mysqldb, sqlite3 + +from uploader.phenotypes.models import phenotypes_data_by_ids +from uploader.phenotypes.misc import phenotypes_data_differences +from uploader.phenotypes.views import BULK_EDIT_COMMON_FIELDNAMES + +import uploader.publications.pubmed as pmed +from uploader.publications.misc import publications_differences +from uploader.publications.models import ( + update_publications, fetch_phenotype_publications) + +logging.basicConfig( + format="%(asctime)s — %(filename)s:%(lineno)s — %(levelname)s: %(message)s") +logger = logging.getLogger(__name__) + + +def check_ids(conn, ids: tuple[tuple[int, int], ...]) -> bool: + """Verify that all the `UniqueIdentifier` values are valid.""" + logger.info("Checking the 'UniqueIdentifier' values.") + with conn.cursor(cursorclass=DictCursor) as cursor: + paramstr = ",".join(["(%s, %s)"] * len(ids)) + cursor.execute( + "SELECT PhenotypeId AS phenotype_id, Id AS xref_id " + "FROM PublishXRef " + f"WHERE (PhenotypeId, Id) IN ({paramstr})", + tuple(item for row in ids for item in row)) + mysqldb.debug_query(cursor, logger) + found = tuple((row["phenotype_id"], row["xref_id"]) + for row in cursor.fetchall()) + + not_found = tuple(item for item in ids if item not in found) + if len(not_found) == 0: + logger.info("All 'UniqueIdentifier' are valid.") + return True + + for item in not_found: + logger.error(f"Invalid 'UniqueIdentifier' value: phId:%s::xrId:%s", item[0], item[1]) + + return False + + +def check_for_mandatory_fields(): + """Verify that mandatory fields have values.""" + pass + + +def __fetch_phenotypes__(conn, ids: tuple[int, ...]) -> tuple[dict, ...]: + """Fetch basic (non-numeric) phenotypes data from the database.""" + with conn.cursor(cursorclass=DictCursor) as cursor: + paramstr = ",".join(["%s"] * len(ids)) + cursor.execute(f"SELECT * FROM Phenotype WHERE Id IN ({paramstr}) " + "ORDER BY Id ASC", + ids) + return tuple(dict(row) for row in cursor.fetchall()) + + +def descriptions_differences(file_data, db_data) -> dict[str, str]: + """Compute differences in the descriptions.""" + logger.info("Computing differences in phenotype descriptions.") + assert len(file_data) == len(db_data), "The counts of phenotypes differ!" + description_columns = ("Pre_publication_description", + "Post_publication_description", + "Original_description", + "Pre_publication_abbreviation", + "Post_publication_abbreviation") + diff = tuple() + for file_row, db_row in zip(file_data, db_data): + assert file_row["phenotype_id"] == db_row["Id"] + inner_diff = { + key: file_row[key] + for key in description_columns + if not file_row[key] == db_row[key] + } + if bool(inner_diff): + diff = diff + ({ + "phenotype_id": file_row["phenotype_id"], + **inner_diff + },) + + return diff + + +def update_descriptions(): + """Update descriptions in the database""" + logger.info("Updating descriptions") + # Compute differences between db data and uploaded file + # Only run query for changed descriptions + pass + + +def link_publications(): + """Link phenotypes to relevant publications.""" + logger.info("Linking phenotypes to publications.") + # Create publication if PubMed_ID doesn't exist in db + pass + + +def update_values(): + """Update the phenotype values.""" + logger.info("Updating phenotypes values.") + # Compute differences between db data and uploaded file + # Only run query for changed data + pass + + +def parse_args(): + parser = argparse.ArgumentParser( + prog="Phenotypes Bulk-Edit Processor", + description="Process the bulk-edits to phenotype data and descriptions.") + parser.add_argument("db_uri", type=str, help="MariaDB/MySQL connection URL") + parser.add_argument( + "jobs_db_path", type=Path, help="Path to jobs' SQLite database.") + parser.add_argument("job_id", type=uuid.UUID, help="ID of the running job") + parser.add_argument( + "--log-level", + type=str, + help="Determines what is logged out.", + choices=("debug", "info", "warning", "error", "critical"), + default="info") + return parser.parse_args() + + +def read_file(filepath: Path) -> Iterator[str]: + """Read the file, one line at a time.""" + with filepath.open(mode="r", encoding="utf-8") as infile: + count = 0 + headers = None + for line in infile: + if line.startswith("#"): # ignore comments + continue; + + fields = line.strip().split("\t") + if count == 0: + headers = fields + count = count + 1 + continue + + _dict = dict(zip( + headers, + ((None if item.strip() == "" else item.strip()) + for item in fields))) + _pheno, _xref = _dict.pop("UniqueIdentifier").split("::") + _dict = { + key: ((float(val) if bool(val) else val) + if key not in BULK_EDIT_COMMON_FIELDNAMES + else val) + for key, val in _dict.items() + } + _dict["phenotype_id"] = int(_pheno.split(":")[1]) + _dict["xref_id"] = int(_xref.split(":")[1]) + if _dict["PubMed_ID"] is not None: + _dict["PubMed_ID"] = int(_dict["PubMed_ID"]) + + yield _dict + count = count + 1 + + +def run(conn, job): + """Process the data and update it.""" + file_contents = tuple(sorted(read_file(Path(job["metadata"]["edit-file"])), + key=lambda item: item["phenotype_id"])) + pheno_ids, pheno_xref_ids, pubmed_ids = reduce( + lambda coll, curr: ( + coll[0] + (curr["phenotype_id"],), + coll[1] + ((curr["phenotype_id"], curr["xref_id"]),), + coll[2].union(set([curr["PubMed_ID"]]))), + file_contents, + (tuple(), tuple(), set([None]))) + check_ids(conn, pheno_xref_ids) + check_for_mandatory_fields() + # stop running here if any errors are found. + + ### Compute differences + logger.info("Computing differences.") + # 1. Basic Phenotype data differences + # a. Descriptions differences + _desc_diff = descriptions_differences( + file_contents, __fetch_phenotypes__(conn, pheno_ids)) + logger.debug("DESCRIPTIONS DIFFERENCES: %s", _desc_diff) + + # b. Publications differences + _db_publications = fetch_phenotype_publications(conn, pheno_xref_ids) + logger.debug("DB PUBLICATIONS: %s", _db_publications) + + _pubmed_map = { + (int(row["PubMed_ID"]) if bool(row["PubMed_ID"]) else None): f"{row['phenotype_id']}::{row['xref_id']}" + for row in file_contents + } + _pub_id_map = { + f"{pub['PhenotypeId']}::{pub['xref_id']}": pub["PublicationId"] + for pub in _db_publications + } + + _new_publications = update_publications( + conn, tuple({ + **pub, "publication_id": _pub_id_map[_pubmed_map[pub["pubmed_id"]]] + } for pub in pmed.fetch_publications(tuple( + pubmed_id for pubmed_id in pubmed_ids + if pubmed_id not in + tuple(row["PubMed_ID"] for row in _db_publications))))) + _pub_diff = publications_differences( + file_contents, _db_publications, { + row["PubMed_ID" if "PubMed_ID" in row else "pubmed_id"]: row[ + "PublicationId" if "PublicationId" in row else "publication_id"] + for row in _db_publications + _new_publications}) + logger.debug("Publications diff: %s", _pub_diff) + # 2. Data differences + _db_pheno_data = phenotypes_data_by_ids(conn, tuple({ + "population_id": job["metadata"]["population-id"], + "phenoid": row[0], + "xref_id": row[1] + } for row in pheno_xref_ids)) + + data_diff = phenotypes_data_differences( + ({ + "phenotype_id": row["phenotype_id"], + "xref_id": row["xref_id"], + "data": { + key:val for key,val in row.items() + if key not in BULK_EDIT_COMMON_FIELDNAMES + [ + "phenotype_id", "xref_id"] + } + } for row in file_contents), + ({ + **row, + "PhenotypeId": row["Id"], + "data": { + dataitem["StrainName"]: dataitem + for dataitem in row["data"].values() + } + } for row in _db_pheno_data)) + logger.debug("Data differences: %s", data_diff) + ### END: Compute differences + update_descriptions() + link_publications() + update_values() + return 0 + + +def main(): + """Entry-point for this script.""" + args = parse_args() + logger.setLevel(args.log_level.upper()) + logger.debug("Arguments: %s", args) + + logging.getLogger("uploader.phenotypes.misc").setLevel(args.log_level.upper()) + logging.getLogger("uploader.phenotypes.models").setLevel(args.log_level.upper()) + logging.getLogger("uploader.publications.models").setLevel(args.log_level.upper()) + + with (mysqldb.database_connection(args.db_uri) as conn, + sqlite3.connection(args.jobs_db_path) as jobs_conn): + return run(conn, jobs.job(jobs_conn, args.job_id)) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/rqtl2/entry.py b/scripts/rqtl2/entry.py index 327ed2c..e0e00e7 100644 --- a/scripts/rqtl2/entry.py +++ b/scripts/rqtl2/entry.py @@ -20,27 +20,23 @@ def build_main( [Redis, Connection, str, Namespace, logging.Logger], int ], - loggername: str + logger: logging.Logger ) -> Callable[[],int]: """Build a function to be used as an entry-point for scripts.""" def main(): - try: - logging.basicConfig( - format=( - "%(asctime)s - %(levelname)s %(name)s: " - "(%(pathname)s: %(lineno)d) %(message)s"), - level=args.loglevel) - logger = logging.getLogger(loggername) - with (Redis.from_url(args.redisuri, decode_responses=True) as rconn, - database_connection(args.databaseuri) as dbconn): - fqjobid = jobs.job_key(args.redisprefix, args.jobid) + with (Redis.from_url(args.redisuri, decode_responses=True) as rconn, + database_connection(args.databaseuri) as dbconn): + logger.setLevel(args.loglevel.upper()) + fqjobid = jobs.job_key(args.redisprefix, args.jobid) + + try: rconn.hset(fqjobid, "status", "started") logger.addHandler(setup_redis_logger( rconn, fqjobid, f"{fqjobid}:log-messages", args.redisexpiry)) - logger.addHandler(StreamHandler(stream=sys.stdout)) + logger.addHandler(StreamHandler(stream=sys.stderr)) check_db(args.databaseuri) check_redis(args.redisuri) @@ -48,15 +44,15 @@ def build_main( logger.error("File not found: '%s'.", args.rqtl2bundle) return 2 - returncode = run_fn(rconn, dbconn, fqjobid, args, logger) + returncode = run_fn(rconn, dbconn, fqjobid, args) if returncode == 0: rconn.hset(fqjobid, "status", "completed:success") return returncode rconn.hset(fqjobid, "status", "completed:error") return returncode - except Exception as _exc:# pylint: disable=[broad-except] - logger.error("The process failed!", exc_info=True) - rconn.hset(fqjobid, "status", "completed:error") - return 4 + except Exception as _exc:# pylint: disable=[broad-except] + logger.error("The process failed!", exc_info=True) + rconn.hset(fqjobid, "status", "completed:error") + return 4 return main diff --git a/scripts/rqtl2/phenotypes_qc.py b/scripts/rqtl2/phenotypes_qc.py index 76ecb8d..5c89ca0 100644 --- a/scripts/rqtl2/phenotypes_qc.py +++ b/scripts/rqtl2/phenotypes_qc.py @@ -36,6 +36,10 @@ from scripts.cli_parser import init_cli_parser, add_global_data_arguments from scripts.rqtl2.bundleutils import build_line_joiner, build_line_splitter __MODULE__ = "scripts.rqtl2.phenotypes_qc" +logging.basicConfig( + format=("%(asctime)s - %(levelname)s %(name)s: " + "(%(pathname)s: %(lineno)d) %(message)s")) +logger = logging.getLogger(__MODULE__) def validate(phenobundle: Path, logger: Logger) -> dict: """Check that the bundle is generally valid""" @@ -177,7 +181,7 @@ def qc_phenocovar_file( filepath.name, f"{fqkey}:logs") as logger, Redis.from_url(redisuri, decode_responses=True) as rconn): - logger.info("Running QC on file: %s", filepath.name) + print("Running QC on file: ", filepath.name) _csvfile = rqtl2.read_csv_file(filepath, separator, comment_char) _headings = tuple(heading.lower() for heading in next(_csvfile)) _errors: tuple[InvalidValue, ...] = tuple() @@ -205,12 +209,12 @@ def qc_phenocovar_file( (f"Record {_lc} in file {filepath.name} has a different " "number of columns than the number of headings"))),) _line = dict(zip(_headings, line)) - if not bool(_line["description"]): + if not bool(_line.get("description")): _errs = _errs + ( save_error(InvalidValue(filepath.name, _line[_headings[0]], "description", - _line["description"], + _line.get("description"), "The description is not provided!")),) rconn.hset(file_fqkey(fqkey, "metadata", filepath), @@ -285,7 +289,7 @@ def qc_pheno_file(# pylint: disable=[too-many-locals, too-many-arguments] filepath.name, f"{fqkey}:logs") as logger, Redis.from_url(redisuri, decode_responses=True) as rconn): - logger.info("Running QC on file: %s", filepath.name) + print("Running QC on file: ", filepath.name) save_error = partial( push_error, rconn, file_fqkey(fqkey, "errors", filepath)) _csvfile = rqtl2.read_csv_file(filepath, separator, comment_char) @@ -369,11 +373,10 @@ def run_qc(# pylint: disable=[too-many-locals] rconn: Redis, dbconn: mdb.Connection, fullyqualifiedjobid: str, - args: Namespace, - logger: Logger + args: Namespace ) -> int: """Run quality control checks on the bundle.""" - logger.debug("Beginning the quality assurance checks.") + print("Beginning the quality assurance checks.") results = check_for_averages_files( **check_for_mandatory_pheno_keys( **validate(args.rqtl2bundle, logger))) @@ -398,7 +401,7 @@ def run_qc(# pylint: disable=[too-many-locals] for ftype in ("pheno", "phenocovar", "phenose", "phenonum"))) # - Fetch samples/individuals from database. - logger.debug("Fetching samples/individuals from the database.") + print("Fetching samples/individuals from the database.") samples = tuple(#type: ignore[var-annotated] item for item in set(reduce( lambda acc, item: acc + ( @@ -415,7 +418,7 @@ def run_qc(# pylint: disable=[too-many-locals] json.dumps(tuple(f"{fullyqualifiedjobid}:phenocovar:{_file}" for _file in cdata.get("phenocovar", [])))) with mproc.Pool(mproc.cpu_count() - 1) as pool: - logger.debug("Check for errors in 'phenocovar' file(s).") + print("Check for errors in 'phenocovar' file(s).") _phenocovar_qc_res = merge_dicts(*pool.starmap(qc_phenocovar_file, tuple( (extractiondir.joinpath(_file), args.redisuri, @@ -437,7 +440,7 @@ def run_qc(# pylint: disable=[too-many-locals] "Expected a non-negative number with at least one decimal " "place.")) - logger.debug("Check for errors in 'pheno' file(s).") + print("Check for errors in 'pheno' file(s).") _pheno_qc_res = merge_dicts(*pool.starmap(qc_pheno_file, tuple(( extractiondir.joinpath(_file), args.redisuri, @@ -456,7 +459,7 @@ def run_qc(# pylint: disable=[too-many-locals] # - Check the 3 checks above for phenose and phenonum values too # qc_phenose_files(…) # qc_phenonum_files(…) - logger.debug("Check for errors in 'phenose' file(s).") + print("Check for errors in 'phenose' file(s).") _phenose_qc_res = merge_dicts(*pool.starmap(qc_pheno_file, tuple(( extractiondir.joinpath(_file), args.redisuri, @@ -472,7 +475,7 @@ def run_qc(# pylint: disable=[too-many-locals] dec_err_fn ) for _file in cdata.get("phenose", [])))) - logger.debug("Check for errors in 'phenonum' file(s).") + print("Check for errors in 'phenonum' file(s).") _phenonum_qc_res = merge_dicts(*pool.starmap(qc_pheno_file, tuple(( extractiondir.joinpath(_file), args.redisuri, @@ -509,5 +512,5 @@ if __name__ == "__main__": type=Path) return parser.parse_args() - main = build_main(cli_args(), run_qc, __MODULE__) + main = build_main(cli_args(), run_qc, logger) sys.exit(main()) diff --git a/tests/conftest.py b/tests/conftest.py index 9012221..a716c52 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,6 +2,8 @@ import io import os import uuid +import shutil +from pathlib import Path from hashlib import sha256 import redis @@ -46,17 +48,20 @@ def cleanup_redis(redisuri: str, prefix: str): @pytest.fixture(scope="module") def client(): "Fixture for test client" - app = create_app() test_prefix = sha256(f"test:{uuid.uuid4()}".encode("utf8")).hexdigest() - app.config.update({ + tests_work_dir = Path("/tmp/{test_prefix}") + tests_work_dir.mkdir(exist_ok=True) + app = create_app({ "TESTING": True, "GNQC_REDIS_PREFIX": f"{test_prefix}:GNQC", - "JOBS_TTL_SECONDS": 2 * 60 * 60# 2 hours + "JOBS_TTL_SECONDS": 2 * 60 * 60,# 2 hours + "ASYNCHRONOUS_JOBS_SQLITE_DB": f"{tests_work_dir}/jobs.db" }) with app.app_context(): yield app.test_client() cleanup_redis(app.config["REDIS_URL"], test_prefix) + shutil.rmtree(tests_work_dir, ignore_errors=True) @pytest.fixture(scope="module") def db_url(client):#pylint: disable=[redefined-outer-name] diff --git a/tests/r_qtl/test_r_qtl2_control_file.py b/tests/r_qtl/test_r_qtl2_control_file.py index 316307d..5b9fef6 100644 --- a/tests/r_qtl/test_r_qtl2_control_file.py +++ b/tests/r_qtl/test_r_qtl2_control_file.py @@ -16,6 +16,7 @@ __DEFAULTS__ = { "pheno_transposed": False, "covar_transposed": False, "phenocovar_transposed": False, + "phenonum_transposed": False, "gmap_transposed": False, "pmap_transposed": False, "phenose_transposed": False diff --git a/tests/uploader/phenotypes/__init__.py b/tests/uploader/phenotypes/__init__.py new file mode 100644 index 0000000..1e0a932 --- /dev/null +++ b/tests/uploader/phenotypes/__init__.py @@ -0,0 +1 @@ +"""phenotypes tests""" diff --git a/tests/uploader/phenotypes/test_misc.py b/tests/uploader/phenotypes/test_misc.py new file mode 100644 index 0000000..cf475ad --- /dev/null +++ b/tests/uploader/phenotypes/test_misc.py @@ -0,0 +1,387 @@ +"""Test miscellaneous phenotypes functions.""" + +import pytest + +from uploader.phenotypes.misc import phenotypes_data_differences + +__sample_db_phenotypes_data__ = ( + { + "PhenotypeId": 4, + "xref_id": 10001, + "DataId": 8967043, + "data": { + "B6D2F1": {"StrainId": 1, "value": None}, + "C57BL/6J": {"StrainId": 2, "value": None}, + "DBA/2J": {"StrainId": 3, "value": None}, + "BXD1": {"StrainId": 4, "value": 61.4}, + "BXD2": {"StrainId": 5, "value": 49}, + "BXD5": {"StrainId": 6, "value": 62.5}, + "BXD6": {"StrainId": 7, "value": 53.1} + } + }, + { + "PhenotypeId": 10, + "xref_id": 10002, + "DataId": 8967044, + "data": { + "B6D2F1": {"StrainId": 1, "value": None}, + "C57BL/6J": {"StrainId": 2, "value": None}, + "DBA/2J": {"StrainId": 3, "value": None}, + "BXD1": {"StrainId": 4, "value": 54.1}, + "BXD2": {"StrainId": 5, "value": 50.1}, + "BXD5": {"StrainId": 6, "value": 53.3}, + "BXD6": {"StrainId": 7, "value": 55.1} + } + }, + { + "PhenotypeId": 15, + "xref_id": 10003, + "DataId": 8967045, + "data": { + "B6D2F1": {"StrainId": 1, "value": None}, + "C57BL/6J": {"StrainId": 2, "value": None}, + "DBA/2J": {"StrainId": 3, "value": None}, + "BXD1": {"StrainId": 4, "value": 483}, + "BXD2": {"StrainId": 5, "value": 403}, + "BXD5": {"StrainId": 6, "value": 501}, + "BXD6": {"StrainId": 7, "value": 403} + } + }, + { + "PhenotypeId": 20, + "xref_id": 10004, + "DataId": 8967046, + "data": { + "B6D2F1": {"StrainId": 1, "value": None}, + "C57BL/6J": {"StrainId": 2, "value": None}, + "DBA/2J": {"StrainId": 3, "value": None}, + "BXD1": {"StrainId": 4, "value": 49.8}, + "BXD2": {"StrainId": 5, "value": 45.5}, + "BXD5": {"StrainId": 6, "value": 62.9}, + "BXD6": {"StrainId": 7, "value": None} + } + }, + { + "PhenotypeId": 25, + "xref_id": 10005, + "DataId": 8967047, + "data": { + "B6D2F1": {"StrainId": 1, "value": None}, + "C57BL/6J": {"StrainId": 2, "value": None}, + "DBA/2J": {"StrainId": 3, "value": None}, + "BXD1": {"StrainId": 4, "value": 46}, + "BXD2": {"StrainId": 5, "value": 44.9}, + "BXD5": {"StrainId": 6, "value": 52.5}, + "BXD6": {"StrainId": 7, "value": None} + } + }) + + +@pytest.mark.unit_test +@pytest.mark.parametrize( + "filedata,dbdata,expected", + ((tuple(), tuple(), tuple()), # No data + + # No data difference + (({ + "phenotype_id": 4, + "xref_id": 10001, + "data": { + "B6D2F1": None, + "C57BL/6J": None, + "DBA/2J": None, + "BXD1": 61.4, + "BXD2": 49, + "BXD5":62.5, + "BXD6": 53.1 + } + }, + { + "phenotype_id": 10, + "xref_id": 10002, + "data": { + "B6D2F1": None, + "C57BL/6J": None, + "DBA/2J": None, + "BXD1": 54.1, + "BXD2": 50.1, + "BXD5": 53.3, + "BXD6": 55.1 + } + }, + { + "phenotype_id": 15, + "xref_id": 10003, + "data": { + "B6D2F1": None, + "C57BL/6J": None, + "DBA/2J": None, + "BXD1": 483, + "BXD2": 403, + "BXD5": 501, + "BXD6": 403 + } + }, + { + "phenotype_id": 20, + "xref_id": 10004, + "data": { + "B6D2F1": None, + "C57BL/6J": None, + "DBA/2J": None, + "BXD1": 49.8, + "BXD2": 45.5, + "BXD5": 62.9, + "BXD6": None + } + }, + { + "phenotype_id": 25, + "xref_id": 10005, + "data": { + "B6D2F1": None, + "C57BL/6J": None, + "DBA/2J": None, + "BXD1": 46, + "BXD2": 44.9, + "BXD5": 52.5, + "BXD6": None + } + }), + __sample_db_phenotypes_data__, + tuple()), + + # Change values: No deletions + (({ + "phenotype_id": 4, + "xref_id": 10001, + "data": { + "B6D2F1": None, + "C57BL/6J": None, + "DBA/2J": None, + "BXD1": 77.2, + "BXD2": 49, + "BXD5":62.5, + "BXD6": 53.1 + } + }, + { + "phenotype_id": 10, + "xref_id": 10002, + "data": { + "B6D2F1": None, + "C57BL/6J": None, + "DBA/2J": None, + "BXD1": 54.1, + "BXD2": 50.1, + "BXD5": 53.3, + "BXD6": 55.1 + } + }, + { + "phenotype_id": 15, + "xref_id": 10003, + "data": { + "B6D2F1": None, + "C57BL/6J": None, + "DBA/2J": None, + "BXD1": 483, + "BXD2": 403, + "BXD5": 503, + "BXD6": 903 + } + }, + { + "phenotype_id": 20, + "xref_id": 10004, + "data": { + "B6D2F1": None, + "C57BL/6J": None, + "DBA/2J": 1, + "BXD1": 8, + "BXD2": 9, + "BXD5": 62.9, + "BXD6": None + } + }, + { + "phenotype_id": 25, + "xref_id": 10005, + "data": { + "B6D2F1": None, + "C57BL/6J": None, + "DBA/2J": None, + "BXD1": 46, + "BXD2": 44.9, + "BXD5": 52.5, + "BXD6": None + } + }), + __sample_db_phenotypes_data__, + ({ + "PhenotypeId": 4, + "xref_id": 10001, + "DataId": 8967043, + "StrainId": 4, + "StrainName": "BXD1", + "value": 77.2 + }, + { + "PhenotypeId": 15, + "xref_id": 10003, + "DataId": 8967045, + "StrainId": 6, + "StrainName": "BXD5", + "value": 503 + }, + { + "PhenotypeId": 15, + "xref_id": 10003, + "DataId": 8967045, + "StrainId": 7, + "StrainName": "BXD6", + "value": 903 + }, + { + "PhenotypeId": 20, + "xref_id": 10004, + "DataId": 8967046, + "StrainId": 3, + "StrainName": "DBA/2J", + "value": 1 + }, + { + "PhenotypeId": 20, + "xref_id": 10004, + "DataId": 8967046, + "StrainId": 4, + "StrainName": "BXD1", + "value": 8 + }, + { + "PhenotypeId": 20, + "xref_id": 10004, + "DataId": 8967046, + "StrainId": 5, + "StrainName": "BXD2", + "value": 9 + })), + + # Changes — with deletions + (({ + "phenotype_id": 4, + "xref_id": 10001, + "data": { + "B6D2F1": None, + "C57BL/6J": None, + "DBA/2J": None, + "BXD1": None, + "BXD2": 49, + "BXD5":62.5, + "BXD6": 53.1 + } + }, + { + "phenotype_id": 10, + "xref_id": 10002, + "data": { + "B6D2F1": None, + "C57BL/6J": None, + "DBA/2J": None, + "BXD1": 54.1, + "BXD2": 50.1, + "BXD5": 53.3, + "BXD6": 55.1 + } + }, + { + "phenotype_id": 15, + "xref_id": 10003, + "data": { + "B6D2F1": None, + "C57BL/6J": None, + "DBA/2J": None, + "BXD1": 483, + "BXD2": 403, + "BXD5": None, + "BXD6": None + } + }, + { + "phenotype_id": 20, + "xref_id": 10004, + "data": { + "B6D2F1": None, + "C57BL/6J": None, + "DBA/2J": 15, + "BXD1": None, + "BXD2": 24, + "BXD5": 62.9, + "BXD6": None + } + }, + { + "phenotype_id": 25, + "xref_id": 10005, + "data": { + "B6D2F1": None, + "C57BL/6J": None, + "DBA/2J": None, + "BXD1": 46, + "BXD2": 44.9, + "BXD5": 52.5, + "BXD6": None + } + }), + __sample_db_phenotypes_data__, + ({ + "PhenotypeId": 4, + "xref_id": 10001, + "DataId": 8967043, + "StrainId": 4, + "StrainName": "BXD1", + "value": None + }, + { + "PhenotypeId": 15, + "xref_id": 10003, + "DataId": 8967045, + "StrainId": 6, + "StrainName": "BXD5", + "value": None + }, + { + "PhenotypeId": 15, + "xref_id": 10003, + "DataId": 8967045, + "StrainId": 7, + "StrainName": "BXD6", + "value": None + }, + { + "PhenotypeId": 20, + "xref_id": 10004, + "DataId": 8967046, + "StrainId": 3, + "StrainName": "DBA/2J", + "value": 15 + }, + { + "PhenotypeId": 20, + "xref_id": 10004, + "DataId": 8967046, + "StrainId": 4, + "StrainName": "BXD1", + "value": None + }, + { + "PhenotypeId": 20, + "xref_id": 10004, + "DataId": 8967046, + "StrainId": 5, + "StrainName": "BXD2", + "value": 24 + })))) +def test_phenotypes_data_differences(filedata, dbdata, expected): + """Test differences are computed correctly.""" + assert phenotypes_data_differences(filedata, dbdata) == expected diff --git a/tests/uploader/publications/__init__.py b/tests/uploader/publications/__init__.py new file mode 100644 index 0000000..de15e08 --- /dev/null +++ b/tests/uploader/publications/__init__.py @@ -0,0 +1 @@ +"""publications tests""" diff --git a/tests/uploader/publications/test_misc.py b/tests/uploader/publications/test_misc.py new file mode 100644 index 0000000..8c7e567 --- /dev/null +++ b/tests/uploader/publications/test_misc.py @@ -0,0 +1,68 @@ +"""Tests for functions used for bulk editing.""" +import pytest + +from uploader.publications.misc import publications_differences + + +@pytest.mark.unit_test +@pytest.mark.parametrize( + "filedata,dbdata,pubmed2pubidmap,expected", + (((), (), {}, tuple()), # no data + + # Same Data + (({"phenotype_id": 1, "xref_id": 10001, "PubMed_ID": 9999999999999}, + {"phenotype_id": 1, "xref_id": 10002, "PubMed_ID": 9999999999999}, + {"phenotype_id": 1, "xref_id": 10003, "PubMed_ID": 9999999999999}, + {"phenotype_id": 1, "xref_id": 10005, "PubMed_ID": 9999999999999}), + ({"PhenotypeId": 1, "xref_id": 10001, "PublicationId": 15, + "PubMed_ID": 9999999999999}, + {"PhenotypeId": 1, "xref_id": 10002, "PublicationId": 15, + "PubMed_ID": 9999999999999}, + {"PhenotypeId": 1, "xref_id": 10003, "PublicationId": 15, + "PubMed_ID": 9999999999999}, + {"PhenotypeId": 1, "xref_id": 10004, "PublicationId": 15, + "PubMed_ID": 9999999999999}), + {9999999999999: 15}, + tuple()), + + # Differences: no new pubmeds (all pubmeds in db) + (({"phenotype_id": 1, "xref_id": 10001, "PubMed_ID": 9999999999999}, + {"phenotype_id": 1, "xref_id": 10002, "PubMed_ID": 9999999999998}, + {"phenotype_id": 1, "xref_id": 10003, "PubMed_ID": 9999999999999}, + {"phenotype_id": 1, "xref_id": 10004, "PubMed_ID": 9999999999997}), + ({"PhenotypeId": 1, "xref_id": 10001, "PublicationId": 15, + "PubMed_ID": 9999999999999}, + {"PhenotypeId": 1, "xref_id": 10002, "PublicationId": 15, + "PubMed_ID": 9999999999999}, + {"PhenotypeId": 1, "xref_id": 10003, "PublicationId": 15, + "PubMed_ID": 9999999999999}, + {"PhenotypeId": 1, "xref_id": 10004, "PublicationId": 15, + "PubMed_ID": None}), + {9999999999999: 15, 9999999999998: 18, 9999999999997: 12}, + ({"PhenotypeId": 1, "xref_id": 10002, "PublicationId": 18, + "PubMed_ID": 9999999999998}, + {"PhenotypeId": 1, "xref_id": 10004, "PublicationId": 12, + "PubMed_ID": 9999999999997})), + + # Differences: Deletions of pubmeds + (({"phenotype_id": 1, "xref_id": 10001, "PubMed_ID": 9999999999999}, + {"phenotype_id": 1, "xref_id": 10002, "PubMed_ID": None}, + {"phenotype_id": 1, "xref_id": 10003, "PubMed_ID": 9999999999999}, + {"phenotype_id": 1, "xref_id": 10004, "PubMed_ID": None}), + ({"PhenotypeId": 1, "xref_id": 10001, "PublicationId": 15, + "PubMed_ID": 9999999999999}, + {"PhenotypeId": 1, "xref_id": 10002, "PublicationId": 15, + "PubMed_ID": 9999999999999}, + {"PhenotypeId": 1, "xref_id": 10003, "PublicationId": 15, + "PubMed_ID": 9999999999999}, + {"PhenotypeId": 1, "xref_id": 10004, "PublicationId": 15, + "PubMed_ID": 9999999999999}), + {9999999999999: 15, 9999999999998: 18, 9999999999997: 12}, + ({"PhenotypeId": 1, "xref_id": 10002, "PublicationId": None, + "PubMed_ID": None}, + {"PhenotypeId": 1, "xref_id": 10004, "PublicationId": None, + "PubMed_ID": None})))) +def test_publications_differences(filedata, dbdata, pubmed2pubidmap, expected): + """Test publication differences — flesh out description…""" + assert publications_differences( + filedata, dbdata, pubmed2pubidmap) == expected diff --git a/tests/uploader/test_parse.py b/tests/uploader/test_parse.py index 076c47c..20c75b7 100644 --- a/tests/uploader/test_parse.py +++ b/tests/uploader/test_parse.py @@ -8,7 +8,8 @@ from uploader.jobs import job, jobsnamespace from tests.conftest import uploadable_file_object -def test_parse_with_existing_uploaded_file(#pylint: disable=[too-many-arguments] +def test_parse_with_existing_uploaded_file( + #pylint: disable=[too-many-arguments,too-many-positional-arguments] client, db_url, redis_url, diff --git a/uploader/__init__.py b/uploader/__init__.py index cae531b..8b49ad5 100644 --- a/uploader/__init__.py +++ b/uploader/__init__.py @@ -3,19 +3,33 @@ import os import sys import logging from pathlib import Path +from typing import Optional from flask import Flask, request + +from cachelib import FileSystemCache + +from gn_libs import jobs as gnlibs_jobs + from flask_session import Session + from uploader.oauth2.client import user_logged_in, authserver_authorise_uri from . import session from .base_routes import base from .files.views import files from .species import speciesbp +from .publications import pubbp from .oauth2.views import oauth2 from .expression_data import exprdatabp from .errors import register_error_handlers +from .background_jobs import background_jobs_bp + +logging.basicConfig( + format=("%(asctime)s — %(filename)s:%(lineno)s — %(levelname)s " + "(%(thread)d:%(threadName)s): %(message)s") +) def override_settings_with_envvars( app: Flask, ignore: tuple[str, ...]=tuple()) -> None: @@ -50,10 +64,30 @@ def setup_logging(app: Flask) -> Flask: "SERVER_SOFTWARE", "").split('/') return __log_gunicorn__(app) if bool(software) else __log_dev__(app) +def setup_modules_logging(app_logger): + """Setup module-level loggers to the same log-level as the application.""" + loglevel = logging.getLevelName(app_logger.getEffectiveLevel()) + + def __setup__(logger_name): + _logger = logging.getLogger(logger_name) + _logger.setLevel(loglevel) + + __setup__("uploader.publications.models") + __setup__("uploader.publications.datatables") + + +def create_app(config: Optional[dict] = None): + """The application factory. + + config: dict + Useful to override settings in the settings files and environment + especially in environments such as testing.""" + if config is None: + config = {} -def create_app(): - """The application factory""" app = Flask(__name__) + + ### BEGIN: Application configuration app.config.from_pyfile( Path(__file__).parent.joinpath("default_settings.py")) if "UPLOADER_CONF" in os.environ: @@ -68,8 +102,16 @@ def create_app(): if secretsfile.exists(): # Silently ignore secrets if the file does not exist. app.config.from_pyfile(secretsfile) + app.config.update(config) # Override everything with passed in config + ### END: Application configuration + + app.config["SESSION_CACHELIB"] = FileSystemCache( + cache_dir=Path(app.config["SESSION_FILESYSTEM_CACHE_PATH"]).absolute(), + threshold=int(app.config["SESSION_FILESYSTEM_CACHE_THRESHOLD"]), + default_timeout=int(app.config["SESSION_FILESYSTEM_CACHE_TIMEOUT"])) setup_logging(app) + setup_modules_logging(app.logger) # setup jinja2 symbols app.add_template_global(lambda : request.url, name="request_url") @@ -86,6 +128,9 @@ def create_app(): app.register_blueprint(files, url_prefix="/files") app.register_blueprint(oauth2, url_prefix="/oauth2") app.register_blueprint(speciesbp, url_prefix="/species") + app.register_blueprint(pubbp, url_prefix="/publications") + app.register_blueprint(background_jobs_bp, url_prefix="/background-jobs/") register_error_handlers(app) + gnlibs_jobs.init_app(app) return app diff --git a/uploader/authorisation.py b/uploader/authorisation.py index a283980..3cf3585 100644 --- a/uploader/authorisation.py +++ b/uploader/authorisation.py @@ -16,13 +16,12 @@ def require_login(function): @wraps(function) def __is_session_valid__(*args, **kwargs): """Check that the user is logged in and their token is valid.""" - def __clear_session__(_no_token): - session.clear_session_info() - flash("You need to be signed in.", "alert-danger big-alert") + def __alert_needs_sign_in__(_no_token): + flash("You need to be signed in.", "alert alert-danger big-alert") return redirect("/") return session.user_token().either( - __clear_session__, + __alert_needs_sign_in__, lambda token: function(*args, **kwargs)) return __is_session_valid__ @@ -49,7 +48,7 @@ def require_token(func: Callable) -> Callable: """ def __invalid_token__(_whatever): logging.debug("==========> Failure log: %s", _whatever) - raise Exception( + raise Exception(# pylint: disable=[broad-exception-raised] "You attempted to access a feature of the system that requires " "authorisation. Unfortunately, we could not verify you have the " "appropriate authorisation to perform the action you requested. " diff --git a/uploader/background_jobs.py b/uploader/background_jobs.py new file mode 100644 index 0000000..dc9f837 --- /dev/null +++ b/uploader/background_jobs.py @@ -0,0 +1,119 @@ +"""Generic views and utilities to handle background jobs.""" +import uuid +import importlib +from typing import Callable +from functools import partial + +from flask import ( + url_for, + redirect, + Response, + Blueprint, + render_template, + current_app as app) + +from gn_libs import jobs +from gn_libs import sqlite3 +from gn_libs.jobs.jobs import JobNotFound + +from uploader.authorisation import require_login + +background_jobs_bp = Blueprint("background-jobs", __name__) +HandlerType = Callable[[dict], Response] + + +def __default_error_handler__(job: dict) -> Response: + return redirect(url_for("background-jobs.job_error", job_id=job["job_id"])) + +def register_handlers( + job_type: str, + success_handler: HandlerType, + # pylint: disable=[redefined-outer-name] + error_handler: HandlerType = __default_error_handler__ + # pylint: disable=[redefined-outer-name] +) -> str: + """Register success and error handlers for each job type.""" + if not bool(app.config.get("background-jobs")): + app.config["background-jobs"] = {} + + if not bool(app.config["background-jobs"].get(job_type)): + app.config["background-jobs"][job_type] = { + "success": success_handler, + "error": error_handler + } + + return job_type + + +def register_job_handlers(job: str): + """Related to register handlers above.""" + def __load_handler__(absolute_function_path): + _parts = absolute_function_path.split(".") + app.logger.debug("THE PARTS ARE: %s", _parts) + assert len(_parts) > 1, f"Invalid path: {absolute_function_path}" + module = importlib.import_module(f".{_parts[-2]}", + package=".".join(_parts[0:-2])) + return getattr(module, _parts[-1]) + + metadata = job["metadata"] + if metadata["success_handler"]: + _success_handler = __load_handler__(metadata["success_handler"]) + try: + _error_handler = __load_handler__(metadata["error_handler"]) + except Exception as _exc:# pylint: disable=[broad-exception-caught] + _error_handler = __default_error_handler__ + register_handlers( + metadata["job-type"], _success_handler, _error_handler) + + +def handler(job: dict, handler_type: str) -> HandlerType: + """Fetch a handler for the job.""" + _job_type = job["metadata"]["job-type"] + _handler = app.config.get( + "background-jobs", {} + ).get( + _job_type, {} + ).get(handler_type) + if bool(_handler): + return _handler(job) + raise Exception(# pylint: disable=[broad-exception-raised] + f"No '{handler_type}' handler registered for job type: {_job_type}") + + +error_handler = partial(handler, handler_type="error") +success_handler = partial(handler, handler_type="success") + + +@background_jobs_bp.route("/status/<uuid:job_id>") +@require_login +def job_status(job_id: uuid.UUID): + """View the job status.""" + with sqlite3.connection(app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"]) as conn: + try: + job = jobs.job(conn, job_id, fulldetails=True) + status = job["metadata"]["status"] + + register_job_handlers(job) + if status == "error": + return error_handler(job) + + if status == "completed": + return success_handler(job) + + return render_template("jobs/job-status.html", job=job) + except JobNotFound as _jnf: + return render_template( + "jobs/job-not-found.html", + job_id=job_id) + + +@background_jobs_bp.route("/error/<uuid:job_id>") +@require_login +def job_error(job_id: uuid.UUID): + """Handle job errors in a generic manner.""" + with sqlite3.connection(app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"]) as conn: + try: + job = jobs.job(conn, job_id, fulldetails=True) + return render_template("jobs/job-error.html", job=job) + except JobNotFound as _jnf: + return render_template("jobs/job-not-found.html", job_id=job_id) diff --git a/uploader/default_settings.py b/uploader/default_settings.py index f07f89e..1136ff8 100644 --- a/uploader/default_settings.py +++ b/uploader/default_settings.py @@ -7,6 +7,7 @@ import hashlib LOG_LEVEL = "WARNING" SECRET_KEY = b"<Please! Please! Please! Change This!>" UPLOAD_FOLDER = "/tmp/qc_app_files" +TEMPORARY_DIRECTORY = "/tmp/gn-uploader-tmpdir" REDIS_URL = "redis://" JOBS_TTL_SECONDS = 1209600 # 14 days GNQC_REDIS_PREFIX="gn-uploader" diff --git a/uploader/files/views.py b/uploader/files/views.py index ddf5350..29059c7 100644 --- a/uploader/files/views.py +++ b/uploader/files/views.py @@ -1,4 +1,6 @@ """Module for generic files endpoints.""" +import time +import random import traceback from pathlib import Path @@ -56,10 +58,13 @@ def __merge_chunks__(targetfile: Path, chunkpaths: tuple[Path, ...]) -> Path: """Merge the chunks into a single file.""" with open(targetfile, "ab") as _target: for chunkfile in chunkpaths: + app.logger.error("Merging chunk: %s", chunkfile) with open(chunkfile, "rb") as _chunkdata: _target.write(_chunkdata.read()) - chunkfile.unlink(missing_ok=True) + chunkfile.unlink() # Don't use `missing_ok=True` — chunk MUST exist + # If chunk does't exist, it might indicate a race condition. Handle + # that instead. return targetfile @@ -92,15 +97,51 @@ def resumable_upload_post(): Path(chunks_directory(_fileid), chunk_name(_uploadfilename, _achunk)) for _achunk in range(1, _totalchunks+1)) if all(_file.exists() for _file in chunkpaths): - # merge_files and clean up chunks - __merge_chunks__(_targetfile, chunkpaths) - chunks_directory(_fileid).rmdir() + ### HACK: Break possible race condition ### + # Looks like sometimes, there are multiple threads/requests trying + # to merge one file, leading to race conditions and in some rare + # instances, actual data corruption. This hack is meant to break + # that race condition. + _delays = ( + 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, + 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, + 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293) + _lockfile = Path(chunks_directory(_fileid), "merge.lock") + while True: + time.sleep(random.choice(_delays) / 1000) + if (chunks_directory(_fileid).exists() + and not (_lockfile.exists() and _targetfile.exists())): + # merge_files and clean up chunks + _lockfile.touch() + __merge_chunks__(_targetfile, chunkpaths) + _lockfile.unlink() + chunks_directory(_fileid).rmdir() + continue + + if (_targetfile.exists() + and not ( + chunks_directory(_fileid).exists() + and _lockfile.exists())): + # merge complete + break + + # There is still a thread that's merging this file + continue + ### END: HACK: Break possible race condition ### + + if _targetfile.exists(): + return jsonify({ + "uploaded-file": _targetfile.name, + "original-name": _uploadfilename, + "message": "File was uploaded successfully!", + "statuscode": 200 + }), 200 return jsonify({ "uploaded-file": _targetfile.name, "original-name": _uploadfilename, - "message": "File was uploaded successfully!", - "statuscode": 200 - }), 200 + "message": "Uploaded file is missing!", + "statuscode": 404 + }), 404 return jsonify({ "message": f"Chunk {int(_chunk)} uploaded successfully.", "statuscode": 201 diff --git a/uploader/jobs.py b/uploader/jobs.py index e86ee05..5968c03 100644 --- a/uploader/jobs.py +++ b/uploader/jobs.py @@ -41,7 +41,8 @@ def error_filename(jobid, error_dir): "Compute the path of the file where errors will be dumped." return f"{error_dir}/job_{jobid}.error" -def initialise_job(# pylint: disable=[too-many-arguments] +def initialise_job( + # pylint: disable=[too-many-arguments, too-many-positional-arguments] rconn: Redis, rprefix: str, jobid: str, command: list, job_type: str, ttl_seconds: int = 86400, extra_meta: Optional[dict] = None) -> dict: "Initialise a job 'object' and put in on redis" @@ -54,7 +55,8 @@ def initialise_job(# pylint: disable=[too-many-arguments] name=job_key(rprefix, jobid), time=timedelta(seconds=ttl_seconds)) return the_job -def build_file_verification_job(#pylint: disable=[too-many-arguments] +def build_file_verification_job( + #pylint: disable=[too-many-arguments, too-many-positional-arguments] redis_conn: Redis, dburi: str, redisuri: str, @@ -77,7 +79,8 @@ def build_file_verification_job(#pylint: disable=[too-many-arguments] "filename": os.path.basename(filepath), "percent": 0 }) -def data_insertion_job(# pylint: disable=[too-many-arguments] +def data_insertion_job( + # pylint: disable=[too-many-arguments, too-many-positional-arguments] redis_conn: Redis, filepath: str, filetype: str, totallines: int, speciesid: int, platformid: int, datasetid: int, databaseuri: str, redisuri: str, ttl_seconds: int) -> dict: diff --git a/uploader/monadic_requests.py b/uploader/monadic_requests.py index f1f5c77..eda42d0 100644 --- a/uploader/monadic_requests.py +++ b/uploader/monadic_requests.py @@ -59,6 +59,11 @@ def get(url, params=None, **kwargs) -> Either: :rtype: pymonad.either.Either """ + timeout = kwargs.get("timeout") + kwargs = {key: val for key,val in kwargs.items() if key != "timeout"} + if timeout is None: + timeout = (9.13, 20) + try: resp = requests.get(url, params=params, **kwargs) if resp.status_code in SUCCESS_CODES: @@ -76,6 +81,11 @@ def post(url, data=None, json=None, **kwargs) -> Either: :rtype: pymonad.either.Either """ + timeout = kwargs.get("timeout") + kwargs = {key: val for key,val in kwargs.items() if key != "timeout"} + if timeout is None: + timeout = (9.13, 20) + try: resp = requests.post(url, data=data, json=json, **kwargs) if resp.status_code in SUCCESS_CODES: @@ -95,10 +105,10 @@ def make_either_error_handler(msg): try: _data = error.json() except Exception as _exc: - raise Exception(error.content) from _exc - raise Exception(_data) + raise Exception(error.content) from _exc# pylint: disable=[broad-exception-raised] + raise Exception(_data)# pylint: disable=[broad-exception-raised] app.logger.debug("\n\n%s\n\n", msg) - raise Exception(error) + raise Exception(error)# pylint: disable=[broad-exception-raised] return __fail__ diff --git a/uploader/oauth2/client.py b/uploader/oauth2/client.py index 1efa299..12fbf80 100644 --- a/uploader/oauth2/client.py +++ b/uploader/oauth2/client.py @@ -1,6 +1,7 @@ """OAuth2 client utilities.""" import json import time +import uuid import random from datetime import datetime, timedelta from urllib.parse import urljoin, urlparse @@ -146,9 +147,24 @@ def oauth2_client(): __client__) +def fetch_user_details() -> Either: + """Retrieve user details from the auth server""" + suser = session.session_info()["user"] + if suser["email"] == "anon@ymous.user": + udets = oauth2_get("auth/user/").then( + lambda usrdets: session.set_user_details({ + "user_id": uuid.UUID(usrdets["user_id"]), + "name": usrdets["name"], + "email": usrdets["email"], + "token": session.user_token()})) + return udets + return Right(suser) + + def user_logged_in(): """Check whether the user has logged in.""" suser = session.session_info()["user"] + fetch_user_details() return suser["logged_in"] and suser["token"].is_right() diff --git a/uploader/oauth2/views.py b/uploader/oauth2/views.py index a7211cb..db4ef61 100644 --- a/uploader/oauth2/views.py +++ b/uploader/oauth2/views.py @@ -24,22 +24,24 @@ from .client import ( user_logged_in, authserver_uri, oauth2_clientid, + fetch_user_details, oauth2_clientsecret) oauth2 = Blueprint("oauth2", __name__) + @oauth2.route("/code") def authorisation_code(): """Receive authorisation code from auth server and use it to get token.""" def __process_error__(resp_or_exception): app.logger.debug("ERROR: (%s)", resp_or_exception) flash("There was an error retrieving the authorisation token.", - "alert-danger") + "alert alert-danger") return redirect("/") def __fail_set_user_details__(_failure): app.logger.debug("Fetching user details fails: %s", _failure) - flash("Could not retrieve the user details", "alert-danger") + flash("Could not retrieve the user details", "alert alert-danger") return redirect("/") def __success_set_user_details__(_success): @@ -48,19 +50,13 @@ def authorisation_code(): def __success__(token): session.set_user_token(token) - return oauth2_get("auth/user/").then( - lambda usrdets: session.set_user_details({ - "user_id": uuid.UUID(usrdets["user_id"]), - "name": usrdets["name"], - "email": usrdets["email"], - "token": session.user_token(), - "logged_in": True})).either( + return fetch_user_details().either( __fail_set_user_details__, __success_set_user_details__) code = request.args.get("code", "").strip() if not bool(code): - flash("AuthorisationError: No code was provided.", "alert-danger") + flash("AuthorisationError: No code was provided.", "alert alert-danger") return redirect("/") baseurl = urlparse(request.base_url, scheme=request.scheme) @@ -116,7 +112,7 @@ def logout(): _user = session_info["user"] _user_str = f"{_user['name']} ({_user['email']})" session.clear_session_info() - flash("Successfully signed out.", "alert-success") + flash("Successfully signed out.", "alert alert-success") return redirect("/") if user_logged_in(): @@ -134,5 +130,5 @@ def logout(): cleanup_thunk=lambda: __unset_session__( session.session_info())), lambda res: __unset_session__(session.session_info())) - flash("There is no user that is currently logged in.", "alert-info") + flash("There is no user that is currently logged in.", "alert alert-info") return redirect("/") diff --git a/uploader/phenotypes/misc.py b/uploader/phenotypes/misc.py new file mode 100644 index 0000000..cbe3b7f --- /dev/null +++ b/uploader/phenotypes/misc.py @@ -0,0 +1,26 @@ +"""Miscellaneous functions handling phenotypes and phenotypes data.""" +import logging + +logger = logging.getLogger(__name__) + + +def phenotypes_data_differences( + filedata: tuple[dict, ...], dbdata: tuple[dict, ...] +) -> tuple[dict, ...]: + """Compute differences between file data and db data""" + diff = tuple() + for filerow, dbrow in zip( + sorted(filedata, key=lambda item: (item["phenotype_id"], item["xref_id"])), + sorted(dbdata, key=lambda item: (item["PhenotypeId"], item["xref_id"]))): + for samplename, value in filerow["data"].items(): + if value != dbrow["data"].get(samplename, {}).get("value"): + diff = diff + ({ + "PhenotypeId": filerow["phenotype_id"], + "xref_id": filerow["xref_id"], + "DataId": dbrow["DataId"], + "StrainId": dbrow["data"].get(samplename, {}).get("StrainId"), + "StrainName": samplename, + "value": value + },) + + return diff diff --git a/uploader/phenotypes/models.py b/uploader/phenotypes/models.py index e1ec0c9..c2aeebf 100644 --- a/uploader/phenotypes/models.py +++ b/uploader/phenotypes/models.py @@ -1,14 +1,30 @@ """Database and utility functions for phenotypes.""" -from typing import Optional +import logging +import tempfile +from pathlib import Path from functools import reduce from datetime import datetime +from typing import Optional, Iterable import MySQLdb as mdb from MySQLdb.cursors import Cursor, DictCursor -from flask import current_app as app +from functional_tools import take from gn_libs.mysqldb import debug_query +logger = logging.getLogger(__name__) + + +__PHENO_DATA_TABLES__ = { + "PublishData": { + "table": "PublishData", "valueCol": "value", "DataIdCol": "Id"}, + "PublishSE": { + "table": "PublishSE", "valueCol": "error", "DataIdCol": "DataId"}, + "NStrain": { + "table": "NStrain", "valueCol": "count", "DataIdCol": "DataId"} +} + + def datasets_by_population( conn: mdb.Connection, species_id: int, @@ -32,10 +48,10 @@ def dataset_by_id(conn: mdb.Connection, """Fetch dataset details by identifier""" with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( - "SELECT s.SpeciesId, pf.* FROM Species AS s " - "INNER JOIN InbredSet AS iset ON s.Id=iset.SpeciesId " - "INNER JOIN PublishFreeze AS pf ON iset.Id=pf.InbredSetId " - "WHERE s.Id=%s AND iset.Id=%s AND pf.Id=%s", + "SELECT Species.SpeciesId, PublishFreeze.* FROM Species " + "INNER JOIN InbredSet ON Species.Id=InbredSet.SpeciesId " + "INNER JOIN PublishFreeze ON InbredSet.Id=PublishFreeze.InbredSetId " + "WHERE Species.Id=%s AND InbredSet.Id=%s AND PublishFreeze.Id=%s", (species_id, population_id, dataset_id)) return dict(cursor.fetchone()) @@ -75,7 +91,7 @@ def dataset_phenotypes(conn: mdb.Connection, limit: Optional[int] = None) -> tuple[dict, ...]: """Fetch the actual phenotypes.""" _query = ( - "SELECT pheno.*, pxr.Id AS xref_id, ist.InbredSetCode FROM Phenotype AS pheno " + "SELECT pheno.*, pxr.Id AS xref_id, pxr.InbredSetId, ist.InbredSetCode FROM Phenotype AS pheno " "INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId " "INNER JOIN PublishFreeze AS pf ON pxr.InbredSetId=pf.InbredSetId " "INNER JOIN InbredSet AS ist ON pf.InbredSetId=ist.Id " @@ -83,7 +99,7 @@ def dataset_phenotypes(conn: mdb.Connection, f" LIMIT {limit} OFFSET {offset}" if bool(limit) else "") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute(_query, (population_id, dataset_id)) - debug_query(cursor, app.logger) + debug_query(cursor, logger) return tuple(dict(row) for row in cursor.fetchall()) @@ -94,7 +110,7 @@ def __phenotype_se__(cursor: Cursor, xref_id, dataids_and_strainids): cursor.execute("SELECT * FROM PublishSE WHERE (DataId, StrainId) IN " f"({paramstr})", flat) - debug_query(cursor, app.logger) + debug_query(cursor, logger) _se = { (row["DataId"], row["StrainId"]): { "DataId": row["DataId"], @@ -107,7 +123,7 @@ def __phenotype_se__(cursor: Cursor, xref_id, dataids_and_strainids): cursor.execute("SELECT * FROM NStrain WHERE (DataId, StrainId) IN " f"({paramstr})", flat) - debug_query(cursor, app.logger) + debug_query(cursor, logger) _n = { (row["DataId"], row["StrainId"]): { "DataId": row["DataId"], @@ -137,6 +153,7 @@ def __organise_by_phenotype__(pheno, row): "Pre_publication_abbreviation": row["Pre_publication_abbreviation"], "Post_publication_abbreviation": row["Post_publication_abbreviation"], "xref_id": row["pxr.Id"], + "DataId": row["DataId"], "data": { **(_pheno["data"] if bool(_pheno) else {}), (row["DataId"], row["StrainId"]): { @@ -225,7 +242,7 @@ def phenotypes_data(conn: mdb.Connection, f" LIMIT {limit} OFFSET {offset}" if bool(limit) else "") with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute(_query, (population_id, dataset_id)) - debug_query(cursor, app.logger) + debug_query(cursor, logger) return tuple(dict(row) for row in cursor.fetchall()) @@ -252,5 +269,128 @@ def save_new_dataset(cursor: Cursor, "%(created)s, %(public)s, %(population_id)s, %(confidentiality)s, " "%(users)s)", params) - debug_query(cursor, app.logger) + debug_query(cursor, logger) return {**params, "Id": cursor.lastrowid} + + +def phenotypes_data_by_ids( + conn: mdb.Connection, + inbred_pheno_xref: dict[str, int] +) -> tuple[dict, ...]: + """Fetch all phenotype data, filtered by the `inbred_pheno_xref` mapping.""" + _paramstr = ",".join(["(%s, %s, %s)"] * len(inbred_pheno_xref)) + _query = ("SELECT " + "pub.PubMed_ID, pheno.*, pxr.*, pd.*, str.*, iset.InbredSetCode " + "FROM Publication AS pub " + "RIGHT JOIN PublishXRef AS pxr0 ON pub.Id=pxr0.PublicationId " + "INNER JOIN Phenotype AS pheno ON pxr0.PhenotypeId=pheno.id " + "INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId " + "INNER JOIN PublishData AS pd ON pxr.DataId=pd.Id " + "INNER JOIN Strain AS str ON pd.StrainId=str.Id " + "INNER JOIN StrainXRef AS sxr ON str.Id=sxr.StrainId " + "INNER JOIN PublishFreeze AS pf ON sxr.InbredSetId=pf.InbredSetId " + "INNER JOIN InbredSet AS iset ON pf.InbredSetId=iset.InbredSetId " + f"WHERE (pxr.InbredSetId, pheno.Id, pxr.Id) IN ({_paramstr}) " + "ORDER BY pheno.Id") + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute(_query, tuple(item for row in inbred_pheno_xref + for item in (row["population_id"], + row["phenoid"], + row["xref_id"]))) + debug_query(cursor, logger) + return tuple( + reduce(__organise_by_phenotype__, cursor.fetchall(), {}).values()) + + +def create_new_phenotypes(conn: mdb.Connection, + phenotypes: Iterable[dict]) -> tuple[dict, ...]: + """Add entirely new phenotypes to the database.""" + _phenos = tuple() + with conn.cursor(cursorclass=DictCursor) as cursor: + while True: + batch = take(phenotypes, 1000) + if len(batch) == 0: + break + + cursor.executemany( + ("INSERT INTO " + "Phenotype(Pre_publication_description, Original_description, Units, Authorized_Users) " + "VALUES (%s, %s, %s, 'robwilliams')"), + tuple((row["id"], row["description"], row["units"]) + for row in batch)) + paramstr = ", ".join(["%s"] * len(batch)) + cursor.execute( + "SELECT * FROM Phenotype WHERE Pre_publication_description IN " + f"({paramstr})", + tuple(item["id"] for item in batch)) + _phenos = _phenos + tuple({ + "phenotype_id": row["Id"], + "id": row["Pre_publication_description"], + "description": row["Original_description"], + "units": row["Units"] + } for row in cursor.fetchall()) + + return _phenos + + +def save_phenotypes_data( + conn: mdb.Connection, + table: str, + data: Iterable[dict] +) -> int: + """Save new phenotypes data into the database.""" + _table_details = __PHENO_DATA_TABLES__[table] + with conn.cursor(cursorclass=DictCursor) as cursor: + _count = 0 + while True: + batch = take(data, 100000) + if len(batch) == 0: + logger.warning("Got an empty batch. This needs investigation.") + break + + logger.debug("Saving batch of %s items.", len(batch)) + cursor.executemany( + (f"INSERT INTO {_table_details['table']}" + f"({_table_details['DataIdCol']}, StrainId, {_table_details['valueCol']}) " + "VALUES " + f"(%(data_id)s, %(sample_id)s, %(value)s) "), + tuple(batch)) + debug_query(cursor, logger) + _count = _count + len(batch) + + + logger.debug("Saved a total of %s data rows", _count) + return _count + + +def quick_save_phenotypes_data( + conn: mdb.Connection, + table: str, + dataitems: Iterable[dict], + tmpdir: Path +) -> int: + """Save data items to the database, but using """ + _table_details = __PHENO_DATA_TABLES__[table] + with (tempfile.NamedTemporaryFile( + prefix=f"{table}_data", mode="wt", dir=tmpdir) as tmpfile, + conn.cursor(cursorclass=DictCursor) as cursor): + _count = 0 + logger.debug("Write data rows to text file.") + for row in dataitems: + tmpfile.write( + f'{row["data_id"]}\t{row["sample_id"]}\t{row["value"]}\n') + _count = _count + 1 + tmpfile.flush() + + logger.debug("Load text file into database (table: %s)", + _table_details["table"]) + cursor.execute( + f"LOAD DATA LOCAL INFILE '{tmpfile.name}' " + f"INTO TABLE {_table_details['table']} " + "(" + f"{_table_details['DataIdCol']}, " + "StrainId, " + f"{_table_details['valueCol']}" + ")") + debug_query(cursor, logger) + return _count diff --git a/uploader/phenotypes/views.py b/uploader/phenotypes/views.py index dc2df8f..bc15f2d 100644 --- a/uploader/phenotypes/views.py +++ b/uploader/phenotypes/views.py @@ -1,41 +1,61 @@ """Views handling ('classical') phenotypes.""" import sys +import csv import uuid import json -import datetime +import logging +import tempfile from typing import Any from pathlib import Path from zipfile import ZipFile from functools import wraps, reduce from logging import INFO, ERROR, DEBUG, FATAL, CRITICAL, WARNING +from urllib.parse import urljoin, urlparse, ParseResult, urlunparse, urlencode + +import datetime +from datetime import timedelta from redis import Redis from pymonad.either import Left from requests.models import Response from MySQLdb.cursors import DictCursor +from werkzeug.utils import secure_filename + +from gn_libs import sqlite3 +from gn_libs import jobs as gnlibs_jobs +from gn_libs.jobs.jobs import JobNotFound from gn_libs.mysqldb import database_connection +from gn_libs import monadic_requests as mrequests + +from authlib.jose import jwt from flask import (flash, request, url_for, jsonify, redirect, Blueprint, + send_file, current_app as app) # from r_qtl import r_qtl2 as rqtl2 from r_qtl import r_qtl2_qc as rqc from r_qtl import exceptions as rqe + from uploader import jobs +from uploader import session from uploader.files import save_file#, fullpath from uploader.ui import make_template_renderer from uploader.oauth2.client import oauth2_post from uploader.authorisation import require_login +from uploader.oauth2 import jwks, client as oauth2client from uploader.route_utils import generic_select_population from uploader.datautils import safe_int, enumerate_sequence from uploader.species.models import all_species, species_by_id from uploader.monadic_requests import make_either_error_handler +from uploader.publications.models import fetch_publication_by_id from uploader.request_checks import with_species, with_population +from uploader.samples.models import samples_by_species_and_population from uploader.input_validation import (encode_errors, decode_errors, is_valid_representative_name) @@ -46,6 +66,7 @@ from .models import (dataset_by_id, save_new_dataset, dataset_phenotypes, datasets_by_population, + phenotypes_data_by_ids, phenotype_publication_data) phenotypesbp = Blueprint("phenotypes", __name__) @@ -357,6 +378,9 @@ def process_phenotypes_individual_files(error_uri): ("pheno", "phenotype-data"), ("phenose", "phenotype-se"), ("phenonum", "phenotype-n")): + cdata[f"{rqtlkey}_transposed"] = ( + (form.get(f"{formkey}-transposed") or "off") == "on") + if form.get("resumable-upload", False): # Chunked upload of large files was used filedata = json.loads(form[formkey]) @@ -379,6 +403,7 @@ def process_phenotypes_individual_files(error_uri): arcname=filepath.name) cdata[rqtlkey] = cdata.get(rqtlkey, []) + [filepath.name] + zfile.writestr("control_data.json", data=json.dumps(cdata, indent=2)) return bundlepath @@ -444,21 +469,18 @@ def add_phenotypes(species: dict, population: dict, dataset: dict, **kwargs):# p # str(dataset["Id"]), str(phenobundle), "--loglevel", - { - INFO: "INFO", - ERROR: "ERROR", - DEBUG: "DEBUG", - FATAL: "FATAL", - CRITICAL: "CRITICAL", - WARNING: "WARNING" - }[app.logger.getEffectiveLevel()], + logging.getLevelName( + app.logger.getEffectiveLevel() + ).lower(), "--redisexpiry", str(_ttl_seconds)], "phenotype_qc", _ttl_seconds, {"job-metadata": json.dumps({ "speciesid": species["SpeciesId"], "populationid": population["Id"], "datasetid": dataset["Id"], - "bundle": str(phenobundle.absolute())})}), + "bundle": str(phenobundle.absolute()), + **({"publicationid": request.form["publication-id"]} + if request.form.get("publication-id") else {})})}), _redisuri, f"{app.config['UPLOAD_FOLDER']}/job_errors") @@ -531,7 +553,8 @@ def review_job_data( **kwargs ):# pylint: disable=[unused-argument] """Review data one more time before entering it into the database.""" - with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn: + with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn, + database_connection(app.config["SQL_URI"]) as conn): try: job = jobs.job(rconn, jobs.jobsnamespace(), str(job_id)) except jobs.JobNotFound as _jnf: @@ -579,6 +602,7 @@ def review_job_data( filetype: __summarise__(filetype, meta) for filetype,meta in metadata.items() } + _job_metadata = json.loads(job["job-metadata"]) return render_template("phenotypes/review-job-data.html", species=species, population=population, @@ -586,9 +610,126 @@ def review_job_data( job_id=job_id, job=job, summary=summary, + publication=( + fetch_publication_by_id( + conn, int(_job_metadata["publicationid"])) + if _job_metadata.get("publicationid") + else None), activelink="add-phenotypes") +def load_phenotypes_success_handler(job): + """Handle loading new phenotypes into the database successfully.""" + return redirect(url_for( + "species.populations.phenotypes.load_data_success", + species_id=job["metadata"]["species_id"], + population_id=job["metadata"]["population_id"], + dataset_id=job["metadata"]["dataset_id"], + job_id=job["job_id"])) + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/load-data-to-database", + methods=["POST"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def load_data_to_database( + species: dict, + population: dict, + dataset: dict, + **kwargs +):# pylint: disable=[unused-argument] + """Load the data from the given QC job into the database.""" + jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"] + with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn, + sqlite3.connection(jobs_db) as conn): + qc_job = jobs.job(rconn, jobs.jobsnamespace(), request.form["data-qc-job-id"]) + _meta = json.loads(qc_job["job-metadata"]) + load_job_id = uuid.uuid4() + _loglevel = logging.getLevelName(app.logger.getEffectiveLevel()).lower() + command = [ + sys.executable, + "-u", + "-m", + "scripts.load_phenotypes_to_db", + app.config["SQL_URI"], + jobs_db, + str(load_job_id), + "--log-level", + _loglevel + ] + + def __handle_error__(resp): + return render_template("http-error.html", *resp.json()) + + def __handle_success__(load_job): + app.logger.debug("The phenotypes loading job: %s", load_job) + return redirect(url_for( + "background-jobs.job_status", job_id=load_job["job_id"])) + + issued = datetime.datetime.now() + jwtkey = jwks.newest_jwk_with_rotation( + jwks.jwks_directory(app, "UPLOADER_SECRETS"), + int(app.config["JWKS_ROTATION_AGE_DAYS"])) + + return mrequests.post( + urljoin(oauth2client.authserver_uri(), "auth/token"), + json={ + "grant_type": "urn:ietf:params:oauth:grant-type:jwt-bearer", + "scope": oauth2client.SCOPE, + "assertion": jwt.encode( + header={ + "alg": "RS256", + "typ": "JWT", + "kid": jwtkey.as_dict()["kid"] + }, + payload={ + "iss": str(oauth2client.oauth2_clientid()), + "sub": str(session.user_details()["user_id"]), + "aud": urljoin(oauth2client.authserver_uri(), + "auth/token"), + # TODO: Update expiry time once fix is implemented in + # auth server. + "exp": (issued + timedelta(minutes=5)).timestamp(), + "nbf": int(issued.timestamp()), + "iat": int(issued.timestamp()), + "jti": str(uuid.uuid4()) + }, + key=jwtkey).decode("utf8"), + "client_id": oauth2client.oauth2_clientid() + } + ).then( + lambda token: gnlibs_jobs.initialise_job( + conn, + load_job_id, + command, + "load-new-phenotypes-data", + extra_meta={ + "species_id": species["SpeciesId"], + "population_id": population["Id"], + "dataset_id": dataset["Id"], + "bundle_file": _meta["bundle"], + "publication_id": _meta["publicationid"], + "authserver": oauth2client.authserver_uri(), + "token": token["access_token"], + "success_handler": ( + "uploader.phenotypes.views" + ".load_phenotypes_success_handler") + }) + ).then( + lambda job: gnlibs_jobs.launch_job( + job, + jobs_db, + Path(f"{app.config['UPLOAD_FOLDER']}/job_errors"), + worker_manager="gn_libs.jobs.launcher", + loglevel=_loglevel) + ).either(__handle_error__, __handle_success__) + + def update_phenotype_metadata(conn, metadata: dict): """Update a phenotype's basic metadata values.""" with conn.cursor(cursorclass=DictCursor) as cursor: @@ -844,3 +985,216 @@ def edit_phenotype_data(# pylint: disable=[unused-argument] population_id=population["Id"], dataset_id=dataset["Id"], xref_id=xref_id)) + + +def process_phenotype_data_for_download(pheno: dict) -> dict: + """Sanitise data for download.""" + return { + "UniqueIdentifier": f"phId:{pheno['Id']}::xrId:{pheno['xref_id']}", + **{ + key: val for key, val in pheno.items() + if key not in ("Id", "xref_id", "data", "Units") + }, + **{ + data_item["StrainName"]: data_item["value"] + for data_item in pheno.get("data", {}).values() + } + } + + +BULK_EDIT_COMMON_FIELDNAMES = [ + "UniqueIdentifier", + "Post_publication_description", + "Pre_publication_abbreviation", + "Pre_publication_description", + "Original_description", + "Post_publication_abbreviation", + "PubMed_ID" +] + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/edit-download", + methods=["POST"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def edit_download_phenotype_data(# pylint: disable=[unused-argument] + species: dict, + population: dict, + dataset: dict, + **kwargs +): + formdata = request.json + with database_connection(app.config["SQL_URI"]) as conn: + samples_list = [ + sample["Name"] for sample in samples_by_species_and_population( + conn, species["SpeciesId"], population["Id"])] + data = ( + process_phenotype_data_for_download(pheno) + for pheno in phenotypes_data_by_ids(conn, tuple({ + "population_id": population["Id"], + "phenoid": row["phenotype_id"], + "xref_id": row["xref_id"] + } for row in formdata))) + + with (tempfile.TemporaryDirectory( + prefix=app.config["TEMPORARY_DIRECTORY"]) as tmpdir): + filename = Path(tmpdir).joinpath("tempfile.tsv") + with open(filename, mode="w") as outfile: + outfile.write( + "# **DO NOT** delete the 'UniqueIdentifier' row. It is used " + "by the system to identify and edit the correct rows and " + "columns in the database.\n") + outfile.write( + "# The '…_description' fields are useful for you to figure out " + "what row you are working on. Changing any of this fields will " + "also update the database, so do be careful.\n") + outfile.write( + "# Leave a field empty to delete the value in the database.\n") + outfile.write( + "# Any line beginning with a '#' character is considered a " + "comment line. This line, and all the lines above it, are " + "all comment lines. Comment lines will be ignored.\n") + writer = csv.DictWriter(outfile, + fieldnames= ( + BULK_EDIT_COMMON_FIELDNAMES + + samples_list), + dialect="excel-tab") + writer.writeheader() + writer.writerows(data) + outfile.flush() + + return send_file( + filename, + mimetype="text/csv", + as_attachment=True, + download_name=secure_filename(f"{dataset['Name']}_data")) + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/edit-upload", + methods=["GET", "POST"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def edit_upload_phenotype_data(# pylint: disable=[unused-argument] + species: dict, + population: dict, + dataset: dict, + **kwargs +): + if request.method == "GET": + return render_template( + "phenotypes/bulk-edit-upload.html", + species=species, + population=population, + dataset=dataset, + activelink="edit-phenotype") + + edit_file = save_file(request.files["file-upload-bulk-edit-upload"], + Path(app.config["UPLOAD_FOLDER"])) + + jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"] + with sqlite3.connection(jobs_db) as conn: + job_id = uuid.uuid4() + job_cmd = [ + sys.executable, "-u", + "-m", "scripts.phenotypes_bulk_edit", + app.config["SQL_URI"], + jobs_db, + str(job_id), + "--log-level", + logging.getLevelName( + app.logger.getEffectiveLevel() + ).lower() + ] + app.logger.debug("Phenotype-edit, bulk-upload command: %s", job_cmd) + _job = gnlibs_jobs.launch_job( + gnlibs_jobs.initialise_job(conn, + job_id, + job_cmd, + "phenotype-bulk-edit", + extra_meta = { + "edit-file": str(edit_file), + "species-id": species["SpeciesId"], + "population-id": population["Id"], + "dataset-id": dataset["Id"] + }), + jobs_db, + f"{app.config['UPLOAD_FOLDER']}/job_errors", + worker_manager="gn_libs.jobs.launcher") + + + return redirect(url_for("background-jobs.job_status", + job_id=job_id, + job_type="phenotype-bulk-edit")) + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/load-data-success/<uuid:job_id>", + methods=["GET"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def load_data_success( + species: dict, + population: dict, + dataset: dict, + job_id: uuid.UUID, + **kwargs +):# pylint: disable=[unused-argument] + with (database_connection(app.config["SQL_URI"]) as conn, + sqlite3.connection(app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"]) + as jobsconn): + try: + gn2_uri = urlparse(app.config["GN2_SERVER_URL"]) + job = gnlibs_jobs.job(jobsconn, job_id, fulldetails=True) + app.logger.debug("THE JOB: %s", job) + _xref_ids = (str(item) for item + in json.loads(job["metadata"].get("xref_ids", "[]"))) + _publication = fetch_publication_by_id( + conn, int(job["metadata"].get("publication_id", "0"))) + _search_terms = (item for item in + (str(_publication["PubMed_ID"] or ""), + _publication["Authors"], + (_publication["Title"] or "")) + if item != "") + return render_template("phenotypes/load-phenotypes-success.html", + species=species, + population=population, + dataset=dataset, + job=job, + search_page_uri=urlunparse(ParseResult( + scheme=gn2_uri.scheme, + netloc=gn2_uri.netloc, + path="/search", + params="", + query=urlencode({ + "species": species["Name"], + "group": population["Name"], + "type": "Phenotypes", + "dataset": dataset["Name"], + "search_terms_or": ( + # Very long URLs will cause + # errors. + " ".join(_xref_ids) + if len(_xref_ids) <= 100 + else ""), + "search_terms_and": " ".join( + _search_terms).strip(), + "accession_id": "None", + "FormID": "searchResult" + }), + fragment=""))) + except JobNotFound as jnf: + return render_template("jobs/job-not-found.html", job_id=job_id) diff --git a/uploader/platforms/models.py b/uploader/platforms/models.py index a859371..0dd9368 100644 --- a/uploader/platforms/models.py +++ b/uploader/platforms/models.py @@ -56,7 +56,8 @@ def platform_by_species_and_id( return None -def save_new_platform(# pylint: disable=[too-many-arguments] +def save_new_platform( + # pylint: disable=[too-many-arguments, too-many-positional-arguments] cursor: Cursor, species_id: int, geo_platform: str, diff --git a/uploader/publications/__init__.py b/uploader/publications/__init__.py new file mode 100644 index 0000000..7efcabb --- /dev/null +++ b/uploader/publications/__init__.py @@ -0,0 +1,2 @@ +"""Package for handling publications.""" +from .views import pubbp diff --git a/uploader/publications/datatables.py b/uploader/publications/datatables.py new file mode 100644 index 0000000..e07fafd --- /dev/null +++ b/uploader/publications/datatables.py @@ -0,0 +1,52 @@ +"""Fetch data for datatables.""" +import logging +from typing import Optional + +from MySQLdb.cursors import DictCursor + +from gn_libs.mysqldb import Connection, debug_query + +logger = logging.getLogger(__name__) + +def fetch_publications( + conn: Connection, + search: Optional[str] = None, + offset: int = 0, + limit: int = -1 +) -> tuple[dict, int, int, int]: + """Fetch publications from the database.""" + _query = "SELECT * FROM Publication" + _count_query = "SELECT COUNT(*) FROM Publication" + _params = None + _where_clause = "" + _limit_clause = "" + if search is not None and bool(search): + _where_clause = ("WHERE PubMed_ID LIKE %s " + "OR Authors LIKE %s " + "OR Title LIKE %s") + _params = (f"%{search}%",) * 3 + + if limit > 0: + _limit_clause = f"LIMIT {limit} OFFSET {offset}" + + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute("SELECT COUNT(*) FROM Publication") + _total_rows = int(cursor.fetchone()["COUNT(*)"]) + + cursor.execute(f"{_count_query} {_where_clause}", _params) + debug_query(cursor, logger) + _result = cursor.fetchone() + _total_filtered = int(_result["COUNT(*)"] if bool(_result) else 0) + + cursor.execute(f"{_query} {_where_clause} {_limit_clause}", _params) + debug_query(cursor, logger) + _current_filtered = tuple( + {**dict(row), "index": idx} + for idx, row + in enumerate(cursor.fetchall(), start=offset+1)) + + return ( + _current_filtered, + len(_current_filtered), + _total_filtered, + _total_rows) diff --git a/uploader/publications/misc.py b/uploader/publications/misc.py new file mode 100644 index 0000000..fca6f71 --- /dev/null +++ b/uploader/publications/misc.py @@ -0,0 +1,25 @@ +"""Miscellaneous functions dealing with publications.""" + + +def publications_differences( + filedata: tuple[dict, ...], + dbdata: tuple[dict, ...], + pubmedid2pubidmap: tuple[dict, ...] +) -> tuple[dict, ...]: + """Compute the differences between file data and db data""" + diff = tuple() + for filerow, dbrow in zip( + sorted(filedata, key=lambda item: ( + item["phenotype_id"], item["xref_id"])), + sorted(dbdata, key=lambda item: ( + item["PhenotypeId"], item["xref_id"]))): + if filerow["PubMed_ID"] == dbrow["PubMed_ID"]: + continue + + newpubmed = filerow["PubMed_ID"] + diff = diff + ({ + **dbrow, + "PubMed_ID": newpubmed, + "PublicationId": pubmedid2pubidmap.get(newpubmed)},) + + return diff diff --git a/uploader/publications/models.py b/uploader/publications/models.py new file mode 100644 index 0000000..b199991 --- /dev/null +++ b/uploader/publications/models.py @@ -0,0 +1,96 @@ +"""Module to handle persistence and retrieval of publication to/from MariaDB""" +import logging +from typing import Iterable, Optional + +from MySQLdb.cursors import DictCursor + +from gn_libs.mysqldb import Connection, debug_query + +logger = logging.getLogger(__name__) + + +def fetch_phenotype_publications( + conn: Connection, + ids: tuple[tuple[int, int], ...] +) -> tuple[dict, ...]: + """Fetch publication from database by ID.""" + paramstr = ",".join(["(%s, %s)"] * len(ids)) + query = ( + "SELECT " + "pxr.PhenotypeId, pxr.Id AS xref_id, pxr.PublicationId, pub.PubMed_ID " + "FROM PublishXRef AS pxr INNER JOIN Publication AS pub " + "ON pxr.PublicationId=pub.Id " + f"WHERE (pxr.PhenotypeId, pxr.Id) IN ({paramstr})") + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute(query, tuple(item for row in ids for item in row)) + return tuple(dict(row) for row in cursor.fetchall()) + + +def create_new_publications( + conn: Connection, + publications: tuple[dict, ...] +) -> tuple[dict, ...]: + if len(publications) > 0: + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.executemany( + ("INSERT INTO " + "Publication( " + "PubMed_ID, Abstract, Authors, Title, Journal, Volume, Pages, " + "Month, Year" + ") " + "VALUES(" + "%(pubmed_id)s, %(abstract)s, %(authors)s, %(title)s, " + "%(journal)s, %(volume)s, %(pages)s, %(month)s, %(year)s" + ") " + "RETURNING *"), + publications) + return tuple({ + **row, "publication_id": row["Id"] + } for row in cursor.fetchall()) + return tuple() + + +def update_publications(conn: Connection , publications: tuple[dict, ...]) -> tuple[dict, ...]: + """Update details for multiple publications""" + if len(publications) > 0: + with conn.cursor(cursorclass=DictCursor) as cursor: + logger.debug("UPDATING PUBLICATIONS: %s", publications) + cursor.executemany( + ("UPDATE Publication SET " + "PubMed_ID=%(pubmed_id)s, Abstract=%(abstract)s, " + "Authors=%(authors)s, Title=%(title)s, Journal=%(journal)s, " + "Volume=%(volume)s, Pages=%(pages)s, Month=%(month)s, " + "Year=%(year)s " + "WHERE Id=%(publication_id)s"), + publications) + debug_query(cursor, logger) + return publications + return tuple() + return tuple() + + +def fetch_publication_by_id(conn: Connection, publication_id: int) -> dict: + """Fetch a specific publication from the database.""" + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute("SELECT * FROM Publication WHERE Id=%s", + (publication_id,)) + _res = cursor.fetchone() + return dict(_res) if _res else {} + + +def fetch_publication_phenotypes( + conn: Connection, publication_id: int) -> Iterable[dict]: + """Fetch all phenotypes linked to this publication.""" + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute( + "SELECT pxr.Id AS xref_id, pxr.PublicationId, phe.* " + "FROM PublishXRef AS pxr INNER JOIN Phenotype AS phe " + "ON pxr.PhenotypeId=phe.Id " + "WHERE pxr.PublicationId=%s", + (publication_id,)) + while True: + row = cursor.fetchone() + if row: + yield row + else: + break diff --git a/uploader/publications/pubmed.py b/uploader/publications/pubmed.py new file mode 100644 index 0000000..ed9b652 --- /dev/null +++ b/uploader/publications/pubmed.py @@ -0,0 +1,103 @@ +"""Module to interact with NCBI's PubMed""" +import logging + +import requests +from lxml import etree + +logger = logging.getLogger(__name__) + + +def __pub_date__(pubdate: etree.Element): + pubyear = pubdate.find("Year") + pubmonth = pubdate.find("Month") + pubday = pubdate.find("Day") + return { + "year": pubyear.text if pubyear is not None else None, + "month": pubmonth.text if pubmonth is not None else None, + "day": pubday.text if pubday is not None else None + } + + +def __journal__(journal: etree.Element) -> dict: + volume = journal.find("JournalIssue/Volume") + issue = journal.find("JournalIssue/Issue") + return { + "volume": volume.text if volume is not None else None, + "issue": issue.text if issue is not None else None, + **__pub_date__(journal.find("JournalIssue/PubDate")), + "journal": journal.find("Title").text + } + +def __author__(author: etree.Element) -> str: + return "%s %s" % ( + author.find("LastName").text, + author.find("Initials").text) + + +def __pages__(pagination: etree.Element) -> str: + start = pagination.find("StartPage") + end = pagination.find("EndPage") + return (start.text + ( + f"-{end.text}" if end is not None else "" + )) if start is not None else "" + + +def __abstract__(article: etree.Element) -> str: + abstract = article.find("Abstract/AbstractText") + return abstract.text if abstract is not None else None + + +def __article__(pubmed_article: etree.Element) -> dict: + article = pubmed_article.find("MedlineCitation/Article") + return { + "pubmed_id": int(pubmed_article.find("MedlineCitation/PMID").text), + "title": article.find("ArticleTitle").text, + **__journal__(article.find("Journal")), + "abstract": __abstract__(article), + "pages": __pages__(article.find("Pagination")), + "authors": ", ".join(__author__(author) + for author in article.findall("AuthorList/Author")) + } + + +def __process_pubmed_publication_data__(text) -> tuple[dict, ...]: + """Process the data from PubMed into usable data.""" + doc = etree.XML(text) + articles = doc.xpath("//PubmedArticle") + logger.debug("Retrieved %s publications from NCBI", len(articles)) + return tuple(__article__(article) for article in articles) + +def fetch_publications(pubmed_ids: tuple[int, ...]) -> tuple[dict, ...]: + """Retrieve data on new publications from NCBI.""" + # See whether we can retrieve multiple publications in one go + # Parse data and save to DB + # Return PublicationId(s) for new publication(s). + if len(pubmed_ids) == 0: + logger.debug("There are no new PubMed IDs to fetch") + return tuple() + + logger.info("Fetching publications data for the following PubMed IDs: %s", + ", ".join((str(pid) for pid in pubmed_ids))) + + # Should we, perhaps, pass this in from a config variable? + uri = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi" + try: + response = requests.get( + uri, + params={ + "db": "pubmed", + "retmode": "xml", + "id": ",".join(str(item) for item in pubmed_ids) + }) + + if response.status_code == 200: + return __process_pubmed_publication_data__(response.text) + + logger.error( + "Could not fetch the new publication from %s (status code: %s)", + uri, + response.status_code) + except requests.exceptions.ConnectionError: + logger.error("Could not find the domain %s", uri) + + return tuple() diff --git a/uploader/publications/views.py b/uploader/publications/views.py new file mode 100644 index 0000000..0608a35 --- /dev/null +++ b/uploader/publications/views.py @@ -0,0 +1,107 @@ +"""Endpoints for publications""" +import json + +from MySQLdb.cursors import DictCursor +from gn_libs.mysqldb import database_connection +from flask import ( + flash, + request, + url_for, + redirect, + Blueprint, + render_template, + current_app as app) + +from uploader.authorisation import require_login + +from .models import ( + fetch_publication_by_id, + create_new_publications, + fetch_publication_phenotypes) + +from .datatables import fetch_publications + +from gn_libs.debug import __pk__ + +pubbp = Blueprint("publications", __name__) + + +@pubbp.route("/", methods=["GET"]) +@require_login +def index(): + """Index page for publications.""" + with database_connection(app.config["SQL_URI"]) as conn: + return render_template("publications/index.html") + + +@pubbp.route("/list", methods=["GET"]) +@require_login +def list_publications(): + # request breakdown: + # https://datatables.net/manual/server-side + _page = int(request.args.get("draw")) + _length = int(request.args.get("length") or '-1') + _start = int(request.args.get("start") or '0') + _search = request.args["search[value]"] + with (database_connection(app.config["SQL_URI"]) as conn, + conn.cursor(cursorclass=DictCursor) as cursor): + _publications, _current_rows, _totalfiltered, _totalrows = fetch_publications( + conn, + _search, + offset=_start, + limit=_length) + + return json.dumps({ + "draw": _page, + "recordsTotal": _totalrows, + "recordsFiltered": _totalfiltered, + "publications": _publications, + "status": "success" + }) + + +@pubbp.route("/view/<int:publication_id>", methods=["GET"]) +@require_login +def view_publication(publication_id: int): + """View more details on a particular publication.""" + with database_connection(app.config["SQL_URI"]) as conn: + return render_template( + "publications/view-publication.html", + publication=fetch_publication_by_id(conn, publication_id), + linked_phenotypes=tuple(fetch_publication_phenotypes( + conn, publication_id))) + + +@pubbp.route("/create", methods=["GET", "POST"]) +@require_login +def create_publication(): + """Create a new publication.""" + if(request.method == "GET"): + return render_template("publications/create-publication.html") + form = request.form + authors = form.get("publication-authors").encode("utf8") + if authors is None or authors == "": + flash("The publication's author(s) MUST be provided!", "alert alert-danger") + return redirect(url_for("publications.create", **request.args)) + + with database_connection(app.config["SQL_URI"]) as conn: + publications = create_new_publications(conn, ({ + "pubmed_id": form.get("pubmed-id") or None, + "abstract": form.get("publication-abstract").encode("utf8") or None, + "authors": authors, + "title": form.get("publication-title").encode("utf8") or None, + "journal": form.get("publication-journal").encode("utf8") or None, + "volume": form.get("publication-volume").encode("utf8") or None, + "pages": form.get("publication-pages").encode("utf8") or None, + "month": (form.get("publication-month") or "").encode("utf8").capitalize() or None, + "year": form.get("publication-year").encode("utf8") or None + },)) + flash("New publication created!", "alert alert-success") + return redirect(url_for( + request.args.get("return_to") or "publications.view_publication", + publication_id=publications[0]["publication_id"], + **request.args)) + + flash("Publication creation failed!", "alert alert-danger") + app.logger.debug("Failed to create the new publication.", exc_info=True) + return redirect(url_for("publications.create_publication")) diff --git a/uploader/route_utils.py b/uploader/route_utils.py index 18eadda..ce718fb 100644 --- a/uploader/route_utils.py +++ b/uploader/route_utils.py @@ -6,7 +6,8 @@ from gn_libs.mysqldb import database_connection from uploader.population.models import (populations_by_species, population_by_species_and_id) -def generic_select_population(# pylint: disable=[too-many-arguments] +def generic_select_population( + # pylint: disable=[too-many-arguments, too-many-positional-arguments] species: dict, template: str, population_id: str, diff --git a/uploader/samples/models.py b/uploader/samples/models.py index d7d5384..b419d61 100644 --- a/uploader/samples/models.py +++ b/uploader/samples/models.py @@ -15,11 +15,11 @@ def samples_by_species_and_population( """Fetch the samples by their species and population.""" with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( - "SELECT iset.InbredSetId, s.* FROM InbredSet AS iset " - "INNER JOIN StrainXRef AS sxr ON iset.InbredSetId=sxr.InbredSetId " - "INNER JOIN Strain AS s ON sxr.StrainId=s.Id " - "WHERE s.SpeciesId=%(species_id)s " - "AND iset.InbredSetId=%(population_id)s", + "SELECT InbredSet.InbredSetId, Strain.* FROM InbredSet " + "INNER JOIN StrainXRef ON InbredSet.InbredSetId=StrainXRef.InbredSetId " + "INNER JOIN Strain ON StrainXRef.StrainId=Strain.Id " + "WHERE Strain.SpeciesId=%(species_id)s " + "AND InbredSet.InbredSetId=%(population_id)s", {"species_id": species_id, "population_id": population_id}) return tuple(cursor.fetchall()) diff --git a/uploader/samples/views.py b/uploader/samples/views.py index 27e5d3c..c0adb88 100644 --- a/uploader/samples/views.py +++ b/uploader/samples/views.py @@ -221,7 +221,10 @@ def upload_status(species: dict, population: dict, job_id: uuid.UUID, **kwargs): if status == "error": return redirect(url_for( - "species.populations.samples.upload_failure", job_id=job_id)) + "species.populations.samples.upload_failure", + species_id=species["SpeciesId"], + population_id=population["Id"], + job_id=job_id)) error_filename = Path(jobs.error_filename( job_id, f"{app.config['UPLOAD_FOLDER']}/job_errors")) @@ -241,9 +244,14 @@ def upload_status(species: dict, population: dict, job_id: uuid.UUID, **kwargs): species=species, population=population), 400 -@samplesbp.route("/upload/failure/<uuid:job_id>", methods=["GET"]) + +@samplesbp.route("<int:species_id>/populations/<int:population_id>/" + "upload-samples/failure/<uuid:job_id>", + methods=["GET"]) @require_login -def upload_failure(job_id: uuid.UUID): +@with_population(species_redirect_uri="species.populations.samples.index", + redirect_uri="species.populations.samples.select_population") +def upload_failure(species: dict, population: dict, job_id: uuid.UUID, **kwargs): """Display the errors of the samples upload failure.""" job = with_redis_connection(lambda rconn: jobs.job( rconn, jobs.jobsnamespace(), job_id)) @@ -257,4 +265,7 @@ def upload_failure(job_id: uuid.UUID): if stat.st_size > 0: return render_template("worker_failure.html", job_id=job_id) - return render_template("samples/upload-failure.html", job=job) + return render_template("samples/upload-failure.html", + species=species, + population=population, + job=job) diff --git a/uploader/session.py b/uploader/session.py index b538187..5af5827 100644 --- a/uploader/session.py +++ b/uploader/session.py @@ -77,12 +77,15 @@ def set_user_token(token: str) -> SessionInfo: """Set the user's token.""" info = session_info() return save_session_info({ - **info, "user": {**info["user"], "token": Right(token)}})#type: ignore[misc] + **info, + "user": {**info["user"], "token": Right(token), "logged_in": True} + })#type: ignore[misc] def set_user_details(userdets: UserDetails) -> SessionInfo: """Set the user details information""" - return save_session_info({**session_info(), "user": userdets})#type: ignore[misc] + info = session_info() + return save_session_info({**info, "user": {**info["user"], **userdets}})#type: ignore[misc] def user_details() -> UserDetails: """Retrieve user details.""" diff --git a/uploader/static/css/styles.css b/uploader/static/css/styles.css index 80c5a56..df50dec 100644 --- a/uploader/static/css/styles.css +++ b/uploader/static/css/styles.css @@ -5,7 +5,7 @@ body { margin: 0.7em; display: grid; - grid-template-columns: 1fr 9fr; + grid-template-columns: 2fr 8fr; grid-gap: 20px; font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; @@ -100,15 +100,32 @@ body { padding-left: 0.5em; } -#main #all-content { - /* Place it in the parent element */ - grid-column-start: 1; - grid-column-end: 3; +@media screen and (max-width: 20in) { + #main #all-content { + /* Place it in the parent element */ + grid-column-start: 1; + grid-column-end: 3; - /* Define layout for the children elements */ - display: grid; - grid-template-columns: 7fr 3fr; /* For a maximum screen width of 1366 pixels */ - grid-gap: 1.5em; + /* Define layout for the children elements */ + max-width: 80%; + } + + #sidebar-content { + display: none; + } +} + +@media screen and (min-width: 20.1in) { + #main #all-content { + /* Place it in the parent element */ + grid-column-start: 1; + grid-column-end: 3; + + /* Define layout for the children elements */ + display: grid; + grid-template-columns: 7fr 3fr; + grid-gap: 1.5em; + } } #main #all-content .row { @@ -162,3 +179,9 @@ table.dataTable thead th, table.dataTable tfoot th{ table.dataTable tbody tr.selected td { background-color: #ffee99 !important; } + +.form-group { + margin-bottom: 2em; + padding-bottom: 0.2em; + border-bottom: solid gray 1px; +} diff --git a/uploader/static/js/debug.js b/uploader/static/js/debug.js new file mode 100644 index 0000000..eb01209 --- /dev/null +++ b/uploader/static/js/debug.js @@ -0,0 +1,40 @@ +/** + * The entire purpose of this function is for use to debug values inline + * without changing the flow of the code too much. + * + * This **MUST** be a non-arrow function to allow access to the `arguments` + * object. + * + * This function expects at least one argument. + * + * If more than one argument is provided, then: + * a) the last argument is considered the value, and will be returned + * b) all other arguments will be converted to string and output + * + * If only one argument is provided, it is considered the value, and will be + * returned. + * + * Zero arguments is an error condition. + **/ +function __pk__(val) { + /* Handle zero arguments */ + if (arguments.length < 1) { + throw new Error("Invalid arguments: Expected at least one argument."); + } + + msg = "/********** DEBUG **********/"; + if (arguments.length > 1) { + msg = Array.from( + arguments + ).slice( + 0, + arguments.length - 1 + ).map((val) => { + return String(val); + }).join("; ") + } + + value = arguments[arguments.length - 1]; + console.debug("/********** " + msg + " **********/", value); + return value; +} diff --git a/uploader/static/js/files.js b/uploader/static/js/files.js index 9d6bca1..0bde6f7 100644 --- a/uploader/static/js/files.js +++ b/uploader/static/js/files.js @@ -84,8 +84,8 @@ var errorHandler = makeResumableHandler("error"); var markResumableDragAndDropElement = (resumable, fileinput, droparea, browsebutton) => { if(resumable.support) { //Hide file input element and display drag&drop UI - add_class(fileinput, "hidden"); - remove_class(droparea, "hidden"); + add_class(fileinput, "visually-hidden"); + remove_class(droparea, "visually-hidden"); // Define UI elements for browse and drag&drop resumable.assignDrop(droparea); diff --git a/uploader/static/js/misc.js b/uploader/static/js/misc.js deleted file mode 100644 index cf7b39e..0000000 --- a/uploader/static/js/misc.js +++ /dev/null @@ -1,6 +0,0 @@ -"Miscellaneous functions and event-handlers" - -$(".not-implemented").click((event) => { - event.preventDefault(); - alert("This feature is not implemented yet. Please bear with us."); -}); diff --git a/uploader/static/js/pubmed.js b/uploader/static/js/pubmed.js new file mode 100644 index 0000000..9afd4c3 --- /dev/null +++ b/uploader/static/js/pubmed.js @@ -0,0 +1,113 @@ +var extract_details = (pubmed_id, details) => { + var months = { + "jan": "January", + "feb": "February", + "mar": "March", + "apr": "April", + "may": "May", + "jun": "June", + "jul": "July", + "aug": "August", + "sep": "September", + "oct": "October", + "nov": "November", + "dec": "December" + }; + var _date = details[pubmed_id].pubdate.split(" "); + return { + "authors": details[pubmed_id].authors.map((authobj) => { + return authobj.name; + }), + "title": details[pubmed_id].title, + "journal": details[pubmed_id].fulljournalname, + "volume": details[pubmed_id].volume, + "pages": details[pubmed_id].pages, + "month": _date.length > 1 ? months[_date[1].toLowerCase()] : "jan", + "year": _date[0], + }; +}; + +var update_publication_details = (details) => { + Object.entries(details).forEach((entry) => {; + switch(entry[0]) { + case "authors": + $("#txt-publication-authors").val(entry[1].join(", ")); + break; + case "month": + $("#select-publication-month") + .children("option") + .each((index, child) => { + console.debug(entry[1].toLowerCase()); + child.selected = child.value == entry[1].toLowerCase(); + }); + default: + $("#txt-publication-" + entry[0]).val(entry[1]); + break; + } + }); +}; + +var fetch_publication_abstract = (pubmed_id, pub_details) => { + $.ajax("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi", + { + "method": "GET", + "data": { + "db": "pubmed", + "id": pubmed_id, + "rettype": "abstract", + "retmode": "xml" + }, + "success": (data, textStatus, jqXHR) => { + update_publication_details({ + ...pub_details, + ...{ + "abstract": Array.from(data + .getElementsByTagName( + "Abstract")[0] + .children) + .map((elt) => {return elt.textContent.trim();}) + .join("\r\n") + }}); + }, + "error": (jqXHR, textStatus, errorThrown) => {}, + "complete": (jqXHR, textStatus) => {}, + "dataType": "xml" + }); +}; + +var fetch_publication_details = (pubmed_id, complete_thunks) => { + error_display = $("#search-pubmed-id-error"); + error_display.text(""); + add_class(error_display, "visually-hidden"); + $.ajax("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi", + { + "method": "GET", + "data": {"db": "pubmed", "id": pubmed_id, "format": "json"}, + "success": (data, textStatus, jqXHR) => { + // process and update publication details + hasError = ( + Object.hasOwn(data, "error") || + Object.hasOwn(data.result[pubmed_id], "error")); + if(hasError) { + error_display.text( + "There was an error fetching a publication with " + + "the given PubMed ID! The error received " + + "was: '" + ( + data.error || + data.result[pubmed_id].error) + + "'. Please check ID you provided and try " + + "again."); + remove_class(error_display, "visually-hidden"); + } else { + fetch_publication_abstract( + pubmed_id, + extract_details(pubmed_id, data.result)); + } + }, + "error": (jqXHR, textStatus, errorThrown) => {}, + "complete": () => { + complete_thunks.forEach((thunk) => {thunk()}); + }, + "dataType": "json" + }); +}; diff --git a/uploader/static/js/utils.js b/uploader/static/js/utils.js index 045dd47..1b31661 100644 --- a/uploader/static/js/utils.js +++ b/uploader/static/js/utils.js @@ -8,3 +8,30 @@ function trigger_change_event(element) { evt = new Event("change"); element.dispatchEvent(evt); } + + +var remove_class = (element, classvalue) => { + new_classes = (element.attr("class") || "").split(" ").map((val) => { + return val.trim(); + }).filter((val) => { + return ((val !== classvalue) && + (val !== "")) + }).join(" "); + + if(new_classes === "") { + element.removeAttr("class"); + } else { + element.attr("class", new_classes); + } +}; + + +var add_class = (element, classvalue) => { + remove_class(element, classvalue); + element.attr("class", (element.attr("class") || "") + " " + classvalue); +}; + +$(".not-implemented").click((event) => { + event.preventDefault(); + alert("This feature is not implemented yet. Please bear with us."); +}); diff --git a/uploader/templates/base.html b/uploader/templates/base.html index 09e6470..3c0d0d4 100644 --- a/uploader/templates/base.html +++ b/uploader/templates/base.html @@ -32,7 +32,7 @@ <a href="{{url_for('oauth2.logout')}}" title="Log out of the system"> <span class="glyphicon glyphicon-user"></span> - Sign Out</a> + {{user_email()}} Sign Out</a> {%else%} <a href="{{authserver_authorise_uri()}}" title="Log in to the system">Sign In</a> @@ -46,6 +46,9 @@ <ul class="nav flex-column"> <li {%if activemenu=="home"%}class="activemenu"{%endif%}> <a href="/" >Home</a></li> + <li {%if activemenu=="publications"%}class="activemenu"{%endif%}> + <a href="{{url_for('publications.index')}}" + title="View and manage publications.">Publications</a></li> <li {%if activemenu=="species"%}class="activemenu"{%endif%}> <a href="{{url_for('species.list_species')}}" title="View and manage species information.">Species</a></li> @@ -151,7 +154,7 @@ <!-- local dependencies --> - <script type="text/javascript" src="/static/js/misc.js"></script> + <script type="text/javascript" src="/static/js/utils.js"></script> <script type="text/javascript" src="/static/js/datatables.js"></script> {%block javascript%}{%endblock%} </body> diff --git a/uploader/templates/jobs/job-error.html b/uploader/templates/jobs/job-error.html new file mode 100644 index 0000000..b3015fc --- /dev/null +++ b/uploader/templates/jobs/job-error.html @@ -0,0 +1,17 @@ +{%extends "base.html"%} + +{%from "flash_messages.html" import flash_all_messages%} + +{%block title%}Background Jobs: Error{%endblock%} + +{%block pagetitle%}Background Jobs: Error{%endblock%} + +{%block contents%} + +<h1>Background Jobs: Error</h1> +<p>Job <strong>{{job["job_id"]}}</strong> failed!</p> +<p>The error details are in the "STDERR" section below.</p> + +<h2>STDERR</h2> +<pre>{{job["stderr"]}}</pre> +{%endblock%} diff --git a/uploader/templates/jobs/job-not-found.html b/uploader/templates/jobs/job-not-found.html new file mode 100644 index 0000000..a71e66f --- /dev/null +++ b/uploader/templates/jobs/job-not-found.html @@ -0,0 +1,11 @@ +{%extends "base.html"%} + +{%from "flash_messages.html" import flash_all_messages%} + +{%block title%}Background Jobs{%endblock%} + +{%block pagetitle%}Background Jobs{%endblock%} + +{%block contents%} +<p>Could not find job with ID: {{job_id}}</p> +{%endblock%} diff --git a/uploader/templates/jobs/job-status.html b/uploader/templates/jobs/job-status.html new file mode 100644 index 0000000..83c02fd --- /dev/null +++ b/uploader/templates/jobs/job-status.html @@ -0,0 +1,24 @@ +{%extends "base.html"%} + +{%from "flash_messages.html" import flash_all_messages%} + +{%block extrameta%} +<meta http-equiv="refresh" content="5" /> +{%endblock%} + +{%block title%}Background Jobs{%endblock%} + +{%block pagetitle%}Background Jobs{%endblock%} + +{%block contents%} + +<p>Status: {{job["metadata"]["status"]}}</p> +<p>Job Type: {{job["metadata"]["job-type"]}}</p> + +<h2>STDOUT</h2> +<pre>{{job["stdout"]}}</pre> + +<h2>STDERR</h2> +<pre>{{job["stderr"]}}</pre> + +{%endblock%} diff --git a/uploader/templates/phenotypes/add-phenotypes-base.html b/uploader/templates/phenotypes/add-phenotypes-base.html index 97b55f2..9909c20 100644 --- a/uploader/templates/phenotypes/add-phenotypes-base.html +++ b/uploader/templates/phenotypes/add-phenotypes-base.html @@ -42,110 +42,30 @@ {%block frm_add_phenotypes_elements%}{%endblock%} - <div class="checkbox"> - <label> - <input id="chk-published" type="checkbox" name="published?" /> - These phenotypes are published</label> - </div> - - <fieldset id="fldset-publication-info" class="hidden"> + <fieldset id="fldset-publication-info"> <legend>Publication Information</legend> - <div class="form-group"> - <label for="txt-pubmed-id" class="form-label">Pubmed ID</label> - <div class="input-group"> - <input id="txt-pubmed-id" name="pubmed-id" type="text" - class="form-control" /> - <span class="input-group-btn"> - <button id="btn-search-pubmed-id" class="btn btn-info">Search</button> - </span> - </div> - <span id="search-pubmed-id-error" - class="form-text text-muted text-danger hidden"> - </span><br /> - <span class="form-text text-muted"> - Enter your publication's PubMed ID above and click "Search" to search - for some (or all) of the publication details requested below. - </span> - </div> - - <div class="form-group"> - <label for="txt-publication-authors" class="form-label">Authors</label> - <input id="txt-publication-authors" name="publication-authors" - type="text" class="form-control" /> - <span class="form-text text-muted"> - Enter the authors in the following format …</span> - </div> - - <div class="form-group"> - <label for="txt-publication-title" class="form-label"> - Publication Title</label> - <input id="txt-publication-title" name="publication-title" type="text" - class="form-control" /> - <span class="form-text text-muted"> - Enter your publication's title.</span> - </div> - - <div class="form-group"> - <label for="txt-publication-abstract" class="form-label"> - Publication Abstract</label> - <textarea id="txt-publication-abstract" name="publication-abstract" - class="form-control" rows="10"></textarea> - <span class="form-text text-muted"> - Enter the abstract for your publication.</span> - </div> - - <div class="form-group"> - <label for="txt-publication-journal" class="form-label">Journal</label> - <input id="txt-publication-journal" name="journal" type="text" - class="form-control" /> - <span class="form-text text-muted"> - Enter the name of the journal where your work was published.</span> - </div> - - <div class="form-group"> - <label for="txt-publication-volume" class="form-label">Volume</label> - <input id="txt-publication-volume" name="publication-volume" type="text" - class="form-control" /> - <span class="form-text text-muted"> - Enter the volume in the following format …</span> - </div> - - <div class="form-group"> - <label for="txt-publication-pages" class="form-label">Pages</label> - <input id="txt-publication-pages" name="publication-pages" type="text" - class="form-control" /> - <span class="form-text text-muted"> - Enter the journal volume where your work was published.</span> - </div> - - <div class="form-group"> - <label for="select-publication-month" class="form-label"> - Publication Month</label> - <select id="select-publication-month" name="publication-month" - class="form-control"> - {%for month in monthnames%} - <option value="{{month | lower}}" - {%if current_month | lower == month | lower%} - selected="selected" - {%endif%}>{{month | capitalize}}</option> - {%endfor%} - </select> - <span class="form-text text-muted"> - Select the month when the work was published. - <span class="text-danger"> - This cannot be before, say 1600 and cannot be in the future!</span></span> - </div> - - <div class="form-group"> - <label for="txt-publication-year" class="form-label">Publication Year</label> - <input id="txt-publication-year" name="publication-year" type="text" - class="form-control" value="{{current_year}}" /> - <span class="form-text text-muted"> - Enter the year your work was published. - <span class="text-danger"> - This cannot be before, say 1600 and cannot be in the future!</span> - </span> - </div> + <input type="hidden" name="publication-id" id="txt-publication-id" /> + <span class="form-text text-muted"> + Select a publication for your data. <br /> + Can't find a publication you can use? Go ahead and + <a href="{{url_for( + 'publications.create_publication', + return_to='species.populations.phenotypes.add_phenotypes', + species_id=species.SpeciesId, + population_id=population.Id, + dataset_id=dataset.Id)}}">create a new publication</a>.</span> + <table id="tbl-select-publication" class="table compact stripe"> + <thead> + <tr> + <th>#</th> + <th>PubMed ID</th> + <th>Title</th> + <th>Authors</th> + </tr> + </thead> + + <tbody></tbody> + </table> </fieldset> <div class="form-group"> @@ -165,165 +85,80 @@ {%block javascript%} <script type="text/javascript"> - var remove_class = (element, classvalue) => { - new_classes = (element.attr("class") || "").split(" ").map((val) => { - return val.trim(); - }).filter((val) => { - return ((val !== classvalue) && - (val !== "")) - }).join(" "); - - if(new_classes === "") { - element.removeAttr("class"); - } else { - element.attr("class", new_classes); - } - }; - - var add_class = (element, classvalue) => { - remove_class(element, classvalue); - element.attr("class", (element.attr("class") || "") + " " + classvalue); - }; - - $("#chk-published").on("click", (event) => { - pub_details = $("#fldset-publication-info") - if(event.target.checked) { - // display the publication details - remove_class(pub_details, "hidden"); - } else { - // hide the publication details - add_class(pub_details, "hidden"); - } - }); - - var extract_details = (pubmed_id, details) => { - var months = { - "jan": "January", - "feb": "February", - "mar": "March", - "apr": "April", - "may": "May", - "jun": "June", - "jul": "July", - "aug": "August", - "sep": "September", - "oct": "October", - "nov": "November", - "dec": "December" - }; - var _date = details[pubmed_id].pubdate.split(" "); - return { - "authors": details[pubmed_id].authors.map((authobj) => { - return authobj.name; - }), - "title": details[pubmed_id].title, - "journal": details[pubmed_id].fulljournalname, - "volume": details[pubmed_id].volume, - "pages": details[pubmed_id].pages, - "month": _date.length > 1 ? months[_date[1].toLowerCase()] : "jan", - "year": _date[0], - }; - }; - - var update_publication_details = (details) => { - Object.entries(details).forEach((entry) => {; - switch(entry[0]) { - case "authors": - $("#txt-publication-authors").val(entry[1].join(", ")); - break; - case "month": - $("#select-publication-month") - .children("option") - .each((index, child) => { - child.selected = child.value == entry[1].toLowerCase(); - }); - default: - $("#txt-publication-" + entry[0]).val(entry[1]); - break; - } + $(function() { + var publicationsDataTable = buildDataTable( + "#tbl-select-publication", + [], + [ + {data: "index"}, + { + searchable: true, + data: (pub) => { + if(pub.PubMed_ID) { + return `<a href="https://pubmed.ncbi.nlm.nih.gov/` + + `${pub.PubMed_ID}/" target="_blank" ` + + `title="Link to publication on NCBI.">` + + `${pub.PubMed_ID}</a>`; + } + return ""; + } + }, + { + searchable: true, + data: (pub) => { + var title = "⸻"; + if(pub.Title) { + title = pub.Title + } + return `<a href="/publications/view/${pub.Id}" ` + + `target="_blank" ` + + `title="Link to view publication details">` + + `${title}</a>`; + } + }, + { + searchable: true, + data: (pub) => { + authors = pub.Authors.split(",").map( + (item) => {return item.trim();}); + if(authors.length > 1) { + return authors[0] + ", et. al."; + } + return authors[0]; + } + } + ], + { + serverSide: true, + ajax: { + url: "/publications/list", + dataSrc: "publications" + }, + select: "single", + paging: true, + scrollY: 700, + deferRender: true, + scroller: true, + scrollCollapse: true, + layout: { + topStart: "info", + topEnd: "search" + } + }); + publicationsDataTable.on("select", (event, datatable, type, indexes) => { + indexes.forEach((element, index, thearray) => { + let row = datatable.row(element).node(); + console.debug(datatable.row(element).data()); + $("#frm-add-phenotypes #txt-publication-id").val( + datatable.row(element).data().Id); + }); + }); + publicationsDataTable.on("deselect", (event, datatable, type, indexes) => { + indexes.forEach((element, index, thearray) => { + let row = datatable.row(element).node(); + $("#frm-add-phenotypes #txt-publication-id").val(null); + }); }); - }; - - var fetch_publication_abstract = (pubmed_id, pub_details) => { - $.ajax("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi", - { - "method": "GET", - "data": { - "db": "pubmed", - "id": pubmed_id, - "rettype": "abstract", - "retmode": "xml" - }, - "success": (data, textStatus, jqXHR) => { - update_publication_details({ - ...pub_details, - ...{ - "abstract": Array.from(data - .getElementsByTagName( - "Abstract")[0] - .children) - .map((elt) => {return elt.textContent.trim();}) - .join("\r\n") - }}); - }, - "error": (jqXHR, textStatus, errorThrown) => {}, - "complete": (jqXHR, textStatus) => {}, - "dataType": "xml" - }); - }; - - var fetch_publication_details = (pubmed_id, complete_thunks) => { - error_display = $("#search-pubmed-id-error"); - error_display.text(""); - add_class(error_display, "hidden"); - $.ajax("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi", - { - "method": "GET", - "data": {"db": "pubmed", "id": pubmed_id, "format": "json"}, - "success": (data, textStatus, jqXHR) => { - // process and update publication details - hasError = ( - Object.hasOwn(data, "error") || - Object.hasOwn(data.result[pubmed_id], "error")); - if(hasError) { - error_display.text( - "There was an error fetching a publication with " + - "the given PubMed ID! The error received " + - "was: '" + ( - data.error || - data.result[pubmed_id].error) + - "'. Please check ID you provided and try " + - "again."); - remove_class(error_display, "hidden"); - } else { - fetch_publication_abstract( - pubmed_id, - extract_details(pubmed_id, data.result)); - } - }, - "error": (jqXHR, textStatus, errorThrown) => {}, - "complete": () => { - complete_thunks.forEach((thunk) => {thunk()}); - }, - "dataType": "json" - }); - }; - - $("#btn-search-pubmed-id").on("click", (event) => { - event.preventDefault(); - var search_button = event.target; - var pubmed_id = $("#txt-pubmed-id").val().trim(); - remove_class($("#txt-pubmed-id").parent(), "has-error"); - if(pubmed_id == "") { - add_class($("#txt-pubmed-id").parent(), "has-error"); - return false; - } - - search_button.disabled = true; - // Fetch publication details - fetch_publication_details(pubmed_id, - [() => {search_button.disabled = false;}]); - return false; }); </script> diff --git a/uploader/templates/phenotypes/add-phenotypes-raw-files.html b/uploader/templates/phenotypes/add-phenotypes-raw-files.html index 7f8d8b0..67b56e3 100644 --- a/uploader/templates/phenotypes/add-phenotypes-raw-files.html +++ b/uploader/templates/phenotypes/add-phenotypes-raw-files.html @@ -105,111 +105,213 @@ </div> </fieldset> -<fieldset id="fldset-data-files"> +<fieldset id="fldset-files"> <legend>Data File(s)</legend> - <div class="form-group non-resumable-elements"> - <label for="finput-phenotype-descriptions" class="form-label"> - Phenotype Descriptions</label> - <input id="finput-phenotype-descriptions" - name="phenotype-descriptions" - class="form-control" - type="file" - data-preview-table="tbl-preview-pheno-desc" - required="required" /> - <span class="form-text text-muted"> - Provide a file that contains only the phenotype descriptions, - <a href="#docs-file-phenotype-description" - title="Documentation of the phenotype data file format."> - the documentation for the expected format of the file</a>.</span> - </div> - - {{display_resumable_elements( - "resumable-phenotype-descriptions", - "phenotype descriptions", - '<p>You can drop a CSV file that contains the phenotype descriptions here, - or you can click the "Browse" button (below and to the right) to select it - from your computer.</p> - <p>The CSV file must conform to some standards, as documented in the - <a href="#docs-file-phenotype-description" - title="Documentation of the phenotype data file format."> - "Phenotypes Descriptions" documentation</a> section below.</p>')}} - - - <div class="form-group non-resumable-elements"> - <label for="finput-phenotype-data" class="form-label">Phenotype Data</label> - <input id="finput-phenotype-data" - name="phenotype-data" - class="form-control" - type="file" - data-preview-table="tbl-preview-pheno-data" - required="required" /> - <span class="form-text text-muted"> - Provide a file that contains only the phenotype data. See - <a href="#docs-file-phenotype-data" - title="Documentation of the phenotype data file format."> - the documentation for the expected format of the file</a>.</span> - </div> - - {{display_resumable_elements( - "resumable-phenotype-data", - "phenotype data", - '<p>You can drop a CSV file that contains the phenotype data here, - or you can click the "Browse" button (below and to the right) to select it - from your computer.</p> - <p>The CSV file must conform to some standards, as documented in the - <a href="#docs-file-phenotype-data" - title="Documentation of the phenotype data file format."> - "Phenotypes Data" documentation</a> section below.</p>')}} - - {%if population.Family in families_with_se_and_n%} - <div class="form-group non-resumable-elements"> - <label for="finput-phenotype-se" class="form-label">Phenotype: Standard Errors</label> - <input id="finput-phenotype-se" - name="phenotype-se" - class="form-control" - type="file" - data-preview-table="tbl-preview-pheno-se" - required="required" /> - <span class="form-text text-muted"> - Provide a file that contains only the standard errors for the phenotypes, - computed from the data above.</span> - </div> - {{display_resumable_elements( - "resumable-phenotype-se", - "standard errors", - '<p>You can drop a CSV file that contains the computed standard-errors data - here, or you can click the "Browse" button (below and to the right) to - select it from your computer.</p> - <p>The CSV file must conform to some standards, as documented in the - <a href="#docs-file-phenotype-se" - title="Documentation of the phenotype data file format."> - "Phenotypes Data" documentation</a> section below.</p>')}} + <fieldset id="fldset-descriptions-file"> + <div class="form-group"> + <div class="form-check"> + <input id="chk-phenotype-descriptions-transposed" + name="phenotype-descriptions-transposed" + type="checkbox" + class="form-check-input" + style="border: solid #8EABF0" /> + <label for="chk-phenotype-descriptions-transposed" + class="form-check-label"> + Description file transposed?</label> + </div> + + <div class="non-resumable-elements"> + <label for="finput-phenotype-descriptions" class="form-label"> + Phenotype Descriptions</label> + <input id="finput-phenotype-descriptions" + name="phenotype-descriptions" + class="form-control" + type="file" + data-preview-table="tbl-preview-pheno-desc" + required="required" /> + <span class="form-text text-muted"> + Provide a file that contains only the phenotype descriptions, + <a href="#docs-file-phenotype-description" + title="Documentation of the phenotype data file format."> + the documentation for the expected format of the file</a>.</span> + </div> + {{display_resumable_elements( + "resumable-phenotype-descriptions", + "phenotype descriptions", + '<p>Drag and drop the CSV file that contains the descriptions of your + phenotypes here.</p> + + <p>The CSV file should be a matrix of + <strong>phenotypes × descriptions</strong> i.e. The first column + contains the phenotype names/identifiers whereas the first row is a list + of metadata fields like, "description", "units", etc.</p> + + <p>If the format is transposed (i.e. + <strong>descriptions × phenotypes</strong>) select the checkbox above. + </p> + + <p>Please see the + <a href="#docs-file-phenotype-description" + title="Documentation of the phenotype data file format."> + "Phenotypes Descriptions" documentation</a> section below for more + information on the expected format of the file provided here.</p>')}} + {{display_preview_table( + "tbl-preview-pheno-desc", "phenotype descriptions")}} + </div> + </fieldset> + + + <fieldset id="fldset-data-file"> + <div class="form-group"> + <div class="form-check"> + <input id="chk-phenotype-data-transposed" + name="phenotype-data-transposed" + type="checkbox" + class="form-check-input" + style="border: solid #8EABF0" /> + <label for="chk-phenotype-data-transposed" class="form-check-label"> + Data file transposed?</label> + </div> + + <div class="non-resumable-elements"> + <label for="finput-phenotype-data" class="form-label">Phenotype Data</label> + <input id="finput-phenotype-data" + name="phenotype-data" + class="form-control" + type="file" + data-preview-table="tbl-preview-pheno-data" + required="required" /> + <span class="form-text text-muted"> + Provide a file that contains only the phenotype data. See + <a href="#docs-file-phenotype-data" + title="Documentation of the phenotype data file format."> + the documentation for the expected format of the file</a>.</span> + </div> + + {{display_resumable_elements( + "resumable-phenotype-data", + "phenotype data", + '<p>Drag and drop a CSV file that contains the phenotypes numerical data + here. You can click the "Browse" button (below and to the right) to + select the file from your computer.</p> + + <p>The CSV should be a matrix of <strong>samples × phenotypes</strong>, + i.e. The first column contains the samples identifiers while the first + row is the list of phenotypes identifiers occurring in the phenotypes + descriptions file.</p> + + <p>If the format is transposed (i.e <strong>phenotypes × samples</strong>) + select the checkbox above.</p> + <p>Please see the + <a href="#docs-file-phenotype-data" + title="Documentation of the phenotype data file format."> + "Phenotypes Data" documentation</a> section below for more information + on the expected format for the file provided here.</p>')}} + {{display_preview_table("tbl-preview-pheno-data", "phenotype data")}} + </div> + </fieldset> - <div class="form-group non-resumable-elements"> - <label for="finput-phenotype-n" class="form-label">Phenotype: Number of Samples/Individuals</label> - <input id="finput-phenotype-n" - name="phenotype-n" - class="form-control" - type="file" - data-preview-table="tbl-preview-pheno-n" - required="required" /> - <span class="form-text text-muted"> - Provide a file that contains only the number of samples/individuals used in - the computation of the standard errors above.</span> - </div> - {{display_resumable_elements( - "resumable-phenotype-n", - "number of samples/individuals", - '<p>You can drop a CSV file that contains the number of samples/individuals - used in computation of the standard-errors here, or you can click the - "Browse" button (below and to the right) to select it from your computer. - </p> - <p>The CSV file must conform to some standards, as documented in the - <a href="#docs-file-phenotype-n" - title="Documentation of the phenotype data file format."> - "Phenotypes Data" documentation</a> section below.</p>')}} + {%if population.Family in families_with_se_and_n%} + <fieldset id="fldset-se-file"> + <div class="form-group"> + <div class="form-check"> + <input id="chk-phenotype-se-transposed" + name="phenotype-se-transposed" + type="checkbox" + class="form-check-input" + style="border: solid #8EABF0" /> + <label for="chk-phenotype-se-transposed" class="form-check-label"> + Standard-Errors file transposed?</label> + </div> + <div class="group non-resumable-elements"> + <label for="finput-phenotype-se" class="form-label">Phenotype: Standard Errors</label> + <input id="finput-phenotype-se" + name="phenotype-se" + class="form-control" + type="file" + data-preview-table="tbl-preview-pheno-se" + required="required" /> + <span class="form-text text-muted"> + Provide a file that contains only the standard errors for the phenotypes, + computed from the data above.</span> + </div> + + {{display_resumable_elements( + "resumable-phenotype-se", + "standard errors", + '<p>Drag and drop a CSV file that contains the phenotypes standard-errors + data here. You can click the "Browse" button (below and to the right) to + select the file from your computer.</p> + + <p>The CSV should be a matrix of <strong>samples × phenotypes</strong>, + i.e. The first column contains the samples identifiers while the first + row is the list of phenotypes identifiers occurring in the phenotypes + descriptions file.</p> + + <p>If the format is transposed (i.e <strong>phenotypes × samples</strong>) + select the checkbox above.</p> + + <p>Please see the + <a href="#docs-file-phenotype-se" + title="Documentation of the phenotype data file format."> + "Phenotypes Data" documentation</a> section below for more information + on the expected format of the file provided here.</p>')}} + + {{display_preview_table("tbl-preview-pheno-se", "standard errors")}} + </div> + </fieldset> + + + <fieldset id="fldset-n-file"> + <div class="form-group"> + <div class="form-check"> + <input id="chk-phenotype-n-transposed" + name="phenotype-n-transposed" + type="checkbox" + class="form-check-input" + style="border: solid #8EABF0" /> + <label for="chk-phenotype-n-transposed" class="form-check-label"> + Counts file transposed?</label> + </div> + <div class="non-resumable-elements"> + <label for="finput-phenotype-n" class="form-label">Phenotype: Number of Samples/Individuals</label> + <input id="finput-phenotype-n" + name="phenotype-n" + class="form-control" + type="file" + data-preview-table="tbl-preview-pheno-n" + required="required" /> + <span class="form-text text-muted"> + Provide a file that contains only the number of samples/individuals used in + the computation of the standard errors above.</span> + </div> + + {{display_resumable_elements( + "resumable-phenotype-n", + "number of samples/individuals", + '<p>Drag and drop a CSV file that contains the samples\' phenotypes counts + data here. You can click the "Browse" button (below and to the right) to + select the file from your computer.</p> + + <p>The CSV should be a matrix of <strong>samples × phenotypes</strong>, + i.e. The first column contains the samples identifiers while the first + row is the list of phenotypes identifiers occurring in the phenotypes + descriptions file.</p> + + <p>If the format is transposed (i.e <strong>phenotypes × samples</strong>) + select the checkbox above.</p> + + <p>Please see the + <a href="#docs-file-phenotype-se" + title="Documentation of the phenotype data file format."> + "Phenotypes Data" documentation</a> section below for more information + on the expected format of the file provided here.</p>')}} + + {{display_preview_table("tbl-preview-pheno-n", "number of samples/individuals")}} + </div> + </fieldset> </fieldset> {%endif%} {%endblock%} @@ -322,15 +424,15 @@ <span id="docs-file-phenotype-data"></span> <span id="docs-file-phenotype-se"></span> <span id="docs-file-phenotype-n"></span> - <p>The data is a matrix of <em>phenotypes × individuals</em>, e.g.</p> + <p>The data is a matrix of <em>samples(or individuals) × phenotypes</em>, e.g.</p> <code> # num-cases: 2549 # num-phenos: 13 - id,IND001,IND002,IND003,IND004,…<br /> - pheno10001,61.400002,54.099998,483,49.799999,…<br /> - pheno10002,49,50.099998,403,45.5,…<br /> - pheno10003,62.5,53.299999,501,62.900002,…<br /> - pheno10004,53.099998,55.099998,403,NA,…<br /> + id,pheno10001,pheno10002,pheno10003,pheno10004,53.099998,…<br /> + IND001,61.400002,49,62.5,55.099998,…<br /> + IND002,54.099998,50.099998,53.299999,55.099998,…<br /> + IND003,483,403,501,403,…<br /> + IND004,49.799999,45.5,62.900002,NA,…<br /> ⋮<br /></code> <p>where <code>IND001,IND002,IND003,IND004,…</code> are the @@ -346,12 +448,6 @@ {%endblock%} {%block sidebarcontents%} -{{display_preview_table("tbl-preview-pheno-desc", "descriptions")}} -{{display_preview_table("tbl-preview-pheno-data", "data")}} -{%if population.Family in families_with_se_and_n%} -{{display_preview_table("tbl-preview-pheno-se", "standard errors")}} -{{display_preview_table("tbl-preview-pheno-n", "number of samples")}} -{%endif%} {{display_pheno_dataset_card(species, population, dataset)}} {%endblock%} @@ -429,9 +525,9 @@ }); if(table.find("tbody tr.data-row").length > 0) { - add_class(table.find(".data-row-template"), "hidden"); + add_class(table.find(".data-row-template"), "visually-hidden"); } else { - remove_class(table.find(".data-row-template"), "hidden"); + remove_class(table.find(".data-row-template"), "visually-hidden"); } }; @@ -467,13 +563,24 @@ Object.entries(preview_tables_to_elements_map).forEach((mapentry) => { var preview_table = $(mapentry[0]); var file_input = $(mapentry[1]); - if(file_input.length === 1) { + if(file_input[0].files.length > 0) { readFirstNLines( file_input[0].files[0], 10, [makePreviewUpdater(preview_table)]); } }); + + if(typeof(resumables) !== "undefined") { + resumables.forEach((resumable) => { + if(resumable.files.length > 0) { + readFirstNLines( + resumable.files[0].file, + 10, + [makePreviewUpdater(resumable.preview_table)]); + } + }); + } }; [ @@ -506,7 +613,7 @@ var display_element = display_area .find(".file-display-template") .clone(); - remove_class(display_element, "hidden"); + remove_class(display_element, "visually-hidden"); remove_class(display_element, "file-display-template"); add_class(display_element, "file-display"); display_element.find(".filename").text(file.name @@ -526,7 +633,7 @@ return () => {/*Has no event!*/ var progress = (resumable.progress() * 100).toFixed(2); var pbar = progress_bar.find(".progress-bar"); - remove_class(progress_bar, "hidden"); + remove_class(progress_bar, "visually-hidden"); pbar.css("width", progress+"%"); pbar.attr("aria-valuenow", progress); pbar.text("Uploading: " + progress + "%"); @@ -536,9 +643,9 @@ var retryUpload = (retry_button, cancel_button) => { retry_button.on("click", (event) => { resumable.files.forEach((file) => {file.retry();}); - add_class(retry_button, "hidden"); - remove_class(cancel_button, "hidden"); - add_class(browse_button, "hidden"); + add_class(retry_button, "visually-hidden"); + remove_class(cancel_button, "visually-hidden"); + add_class(browse_button, "visually-hidden"); }); }; @@ -549,18 +656,18 @@ file.abort(); } }); - add_class(cancel_button, "hidden"); - remove_class(retry_button, "hidden"); - remove_class(browse_button, "hidden"); + add_class(cancel_button, "visually-hidden"); + remove_class(retry_button, "visually-hidden"); + remove_class(browse_button, "visually-hidden"); }); }; var startUpload = (browse_button, retry_button, cancel_button) => { return (event) => { - remove_class(cancel_button, "hidden"); - add_class(retry_button, "hidden"); - add_class(browse_button, "hidden"); + remove_class(cancel_button, "visually-hidden"); + add_class(retry_button, "visually-hidden"); + add_class(browse_button, "visually-hidden"); }; }; @@ -574,6 +681,7 @@ })); }); formdata.append("resumable-upload", "true"); + formdata.append("publication-id", $("#txt-publication-id").val()); return formdata; } @@ -642,7 +750,7 @@ file_input.parent(), $("#" + resumable_element_id), submit_button, - ["csv", "tsv"]), + ["csv", "tsv", "txt"]), file_input.parent(), $("#" + resumable_element_id), $("#" + resumable_element_id + "-browse-button")), @@ -678,13 +786,20 @@ ["frm-add-phenotypes", "finput-phenotype-se", "resumable-phenotype-se", "tbl-preview-pheno-se"], ["frm-add-phenotypes", "finput-phenotype-n", "resumable-phenotype-n", "tbl-preview-pheno-n"], ].map((row) => { - return makeResumableObject(row[0], row[1], row[2], row[3]); + r = makeResumableObject(row[0], row[1], row[2], row[3]); + r.preview_table = $("#" + row[3]); + return r; }).filter((val) => { return Boolean(val); }); $("#frm-add-phenotypes input[type=submit]").on("click", (event) => { event.preventDefault(); + console.debug(); + if ($("#txt-publication-id").val() == "") { + alert("You MUST provide a publication for the phenotypes."); + return false; + } // TODO: Check all the relevant files exist // TODO: Verify that files are not duplicated var filenames = []; diff --git a/uploader/templates/phenotypes/bulk-edit-upload.html b/uploader/templates/phenotypes/bulk-edit-upload.html new file mode 100644 index 0000000..d0f38f5 --- /dev/null +++ b/uploader/templates/phenotypes/bulk-edit-upload.html @@ -0,0 +1,62 @@ +{%extends "phenotypes/base.html"%} +{%from "flash_messages.html" import flash_all_messages%} +{%from "macro-table-pagination.html" import table_pagination%} +{%from "populations/macro-display-population-card.html" import display_population_card%} + +{%block title%}Phenotypes{%endblock%} + +{%block pagetitle%}Phenotypes{%endblock%} + +{%block lvl4_breadcrumbs%} +<li {%if activelink=="view-dataset"%} + class="breadcrumb-item active" + {%else%} + class="breadcrumb-item" + {%endif%}> + <a href="{{url_for('species.populations.phenotypes.view_dataset', + species_id=species.SpeciesId, + population_id=population.Id, + dataset_id=dataset.Id)}}">View</a> +</li> +{%endblock%} + +{%block contents%} +<div class="row"> + <p>Upload the edited file you downloaded and edited.</p> +</div> + +<div class="row"> + <form id="frm-bulk-edit-upload" + class="form-horizontal" + method="POST" + action="{{url_for( + 'species.populations.phenotypes.edit_upload_phenotype_data', + species_id=species.SpeciesId, + population_id=population.Id, + dataset_id=dataset.Id)}}" + enctype="multipart/form-data"> + + <div class="form-group row"> + <label for="file-upload-bulk-edit-upload" + class="form-label col-form-label col-sm-2"> + Edited File</label> + <div class="col-sm-10"> + <input id="file-upload-bulk-edit-upload" + name="file-upload-bulk-edit-upload" + class="form-control" + type="file" + accept="text/tab-separated-values" + required="required" /> + </div> + </div> + + <input type="submit" class="btn btn-primary" + value="upload to edit" /> + + </form> +</div> +{%endblock%} + + +{%block javascript%} +{%endblock%} diff --git a/uploader/templates/phenotypes/create-dataset.html b/uploader/templates/phenotypes/create-dataset.html index 8e45491..19a2b34 100644 --- a/uploader/templates/phenotypes/create-dataset.html +++ b/uploader/templates/phenotypes/create-dataset.html @@ -42,7 +42,7 @@ <input type="text" name="dataset-name" id="txt-dataset-name" - value="{{original_formdata.get('dataset-name') or (population.InbredSetCode + 'Publish')}}" + value="{{original_formdata.get('dataset-name') or (population.Name + 'Publish')}}" {%if errors["dataset-name"] is defined%} class="form-control danger" {%else%} @@ -51,7 +51,7 @@ required="required" /> <small class="form-text text-muted"> <p>A short representative name for the dataset.</p> - <p>Recommended: Use the population code and append "Publish" at the end. + <p>Recommended: Use the population name and append "Publish" at the end. <br />This field will only accept names composed of letters ('A-Za-z'), numbers (0-9), hyphens and underscores.</p> </small> @@ -86,7 +86,7 @@ name="dataset-shortname" type="text" class="form-control" - value="{{original_formdata.get('dataset-shortname') or (population.InbredSetCode + ' Publish')}}" /> + value="{{original_formdata.get('dataset-shortname') or (population.Name + 'Publish')}}" /> <small class="form-text text-muted"> <p>An optional, short name for the dataset. <br /> If this is not provided, it will default to the value provided for the diff --git a/uploader/templates/phenotypes/load-phenotypes-success.html b/uploader/templates/phenotypes/load-phenotypes-success.html new file mode 100644 index 0000000..645be16 --- /dev/null +++ b/uploader/templates/phenotypes/load-phenotypes-success.html @@ -0,0 +1,42 @@ +{%extends "phenotypes/base.html"%} +{%from "flash_messages.html" import flash_all_messages%} +{%from "macro-table-pagination.html" import table_pagination%} +{%from "phenotypes/macro-display-pheno-dataset-card.html" import display_pheno_dataset_card%} + +{%block title%}Phenotypes{%endblock%} + +{%block pagetitle%}Phenotypes{%endblock%} + +{%block lvl4_breadcrumbs%} +<li {%if activelink=="load-phenotypes-success"%} + class="breadcrumb-item active" + {%else%} + class="breadcrumb-item" + {%endif%}> + <a href="{{url_for('species.populations.phenotypes.add_phenotypes', + species_id=species.SpeciesId, + population_id=population.Id, + dataset_id=dataset.Id)}}">Add Phenotypes</a> +</li> +{%endblock%} + +{%block contents%} +<div class="row"> + <p>You have successfully loaded + <!-- maybe indicate the number of phenotypes here? -->your + new phenotypes into the database.</p> + <!-- TODO: Maybe notify user that they have sole access. --> + <!-- TODO: Maybe provide a link to go to GeneNetwork to view the data. --> + <p>View your data + <a href="{{search_page_uri}}" + target="_blank">on GeneNetwork2</a>. + You might need to login to GeneNetwork2 to view specific traits.</p> +</div> +{%endblock%} + +{%block sidebarcontents%} +{{display_pheno_dataset_card(species, population, dataset)}} +{%endblock%} + + +{%block more_javascript%}{%endblock%} diff --git a/uploader/templates/phenotypes/macro-display-preview-table.html b/uploader/templates/phenotypes/macro-display-preview-table.html index f54c53e..5a4c422 100644 --- a/uploader/templates/phenotypes/macro-display-preview-table.html +++ b/uploader/templates/phenotypes/macro-display-preview-table.html @@ -1,7 +1,7 @@ {%macro display_preview_table(tableid, filetype)%} -<div class="card" style="max-width: 676px;"> +<div class="card"> <div class="card-body"> - <h5 class="card-title">Phenotypes '{{filetype | title}}' File Preview</h5> + <h5 class="card-title">{{filetype | title}}: File Preview</h5> <div class="card-text" style="overflow: scroll;"> <table id="{{tableid}}" class="table table-condensed table-responsive"> <thead> @@ -9,9 +9,7 @@ </tr> <tbody> <tr> - <td class="data-row-template text-info"> - Provide a phenotype '{{filetype | lower}}' file to preview. - </td> + <td class="data-row-template text-info"></td> </tr> </tbody> </table> diff --git a/uploader/templates/phenotypes/macro-display-resumable-elements.html b/uploader/templates/phenotypes/macro-display-resumable-elements.html index b0bf1b5..ed14ea5 100644 --- a/uploader/templates/phenotypes/macro-display-resumable-elements.html +++ b/uploader/templates/phenotypes/macro-display-resumable-elements.html @@ -1,6 +1,6 @@ {%macro display_resumable_elements(id, title, help)%} <div id="{{id}}" - class="resumable-elements hidden" + class="resumable-elements visually-hidden" style="background:#D4D4EE;border-radius: 5px;;padding: 1em;border-left: solid #B2B2CC 1px;border-bottom: solid #B2B2CC 2px;margin-top:0.3em;"> <strong style="line-height: 1.2em;">{{title | title}}</strong> @@ -9,7 +9,7 @@ <div id="{{id}}-selected-files" class="resumable-selected-files" style="display:flex;flex-direction:row;flex-wrap: wrap;justify-content:space-around;gap:10px 20px;"> - <div class="panel panel-info file-display-template hidden"> + <div class="panel panel-info file-display-template visually-hidden"> <div class="panel-heading filename">The Filename Goes Here!</div> <div class="panel-body"> <ul> @@ -33,10 +33,10 @@ <a id="{{id}}-browse-button" class="resumable-browse-button btn btn-info" - href="#" + href="#{{id}}" style="margin-left: 80%;">Browse</a> - <div id="{{id}}-progress-bar" class="progress hidden"> + <div id="{{id}}-progress-bar" class="progress visually-hidden"> <div class="progress-bar" role="progress-bar" aria-valuenow="60" @@ -49,11 +49,11 @@ <div id="{{id}}-cancel-resume-buttons"> <a id="{{id}}-resume-button" - class="resumable-resume-button btn btn-info hidden" + class="resumable-resume-button btn btn-info visually-hidden" href="#">resume upload</a> <a id="{{id}}-cancel-button" - class="resumable-cancel-button btn btn-danger hidden" + class="resumable-cancel-button btn btn-danger visually-hidden" href="#">cancel upload</a> </div> </div> diff --git a/uploader/templates/phenotypes/review-job-data.html b/uploader/templates/phenotypes/review-job-data.html index 7bc8c62..859df74 100644 --- a/uploader/templates/phenotypes/review-job-data.html +++ b/uploader/templates/phenotypes/review-job-data.html @@ -35,14 +35,28 @@ {%if job%} <div class="row"> <h3 class="heading">Data Review</h3> + <p class="text-info"><strong> + The data has <em>NOT</em> been added/saved yet. Review the details below + and click "Continue" to save the data.</strong></p> <p>The “<strong>{{dataset.FullName}}</strong>” dataset from the “<strong>{{population.FullName}}</strong>” population of the species “<strong>{{species.SpeciesName}} ({{species.FullName}})</strong>” will be updated as follows:</p> + <ul> + {%if publication%} + <li>All {{summary.get("pheno", {}).get("total-data-rows", "0")}} phenotypes + are linked to the following publication: + <ul> + <li><strong>Publication Title:</strong> + {{publication.Title or "—"}}</li> + <li><strong>Author(s):</strong> + {{publication.Authors or "—"}}</li> + </ul> + </li> + {%endif%} {%for ftype in ("phenocovar", "pheno", "phenose", "phenonum")%} {%if summary.get(ftype, False)%} - <ul> <li>A total of {{summary[ftype]["number-of-files"]}} files will be processed adding {%if ftype == "phenocovar"%}(possibly){%endif%} {{summary[ftype]["total-data-rows"]}} new @@ -53,11 +67,21 @@ {%endif%} to the database. </li> - </ul> {%endif%} {%endfor%} + </ul> - <a href="#" class="not-implemented btn btn-primary">continue</a> + <form id="frm-review-phenotype-data" + method="POST" + action="{{url_for('species.populations.phenotypes.load_data_to_database', + species_id=species.SpeciesId, + population_id=population.Id, + dataset_id=dataset.Id)}}"> + <input type="hidden" name="data-qc-job-id" value="{{job.jobid}}" /> + <input type="submit" + value="continue" + class="btn btn-primary" /> + </form> </div> {%else%} <div class="row"> diff --git a/uploader/templates/phenotypes/view-dataset.html b/uploader/templates/phenotypes/view-dataset.html index 10fd428..21563d6 100644 --- a/uploader/templates/phenotypes/view-dataset.html +++ b/uploader/templates/phenotypes/view-dataset.html @@ -89,7 +89,12 @@ {%block javascript%} <script type="text/javascript"> $(function() { + var species_id = {{species.SpeciesId}}; + var population_id = {{population.Id}}; + var dataset_id = {{dataset.Id}}; + var dataset_name = "{{dataset.Name}}"; var data = {{phenotypes | tojson}}; + var dtPhenotypesList = buildDataTable( "#tbl-phenotypes-list", data, @@ -142,19 +147,83 @@ }, { text: "Bulk Edit (Download Data)", + className: "btn btn-info btn-bulk-edit", + titleAttr: "Click to download data for editing.", action: (event, dt, node, config) => { - alert("Not implemented yet!"); - }, - className: "btn btn-info", - titleAttr: "Click to download data for editing." + var phenoids = []; + var selected = dt.rows({selected: true, page: "all"}).data(); + for(var idx = 0; idx < selected.length; idx++) { + phenoids.push({ + phenotype_id: selected[idx].Id, + xref_id: selected[idx].xref_id + }); + } + if(phenoids.length == 0) { + alert("No record selected. Nothing to do!"); + return false; + } + + $(".btn-bulk-edit").prop("disabled", true); + $(".btn-bulk-edit").addClass("d-none"); + var spinner = $( + "<div id='bulk-edit-spinner' class='spinner-grow text-info'>"); + spinner_content = $( + "<span class='visually-hidden'>"); + spinner_content.html( + "Downloading data …"); + spinner.append(spinner_content) + $(".btn-bulk-edit").parent().append( + spinner); + + $.ajax( + (`/species/${species_id}/populations/` + + `${population_id}/phenotypes/datasets/` + + `${dataset_id}/edit-download`), + { + method: "POST", + data: JSON.stringify(phenoids), + xhrFields: { + responseType: "blob" + }, + success: (data, textStatus, jqXHR) => { + var link = document.createElement("a"); + uri = window.URL.createObjectURL(data); + link.href = uri; + link.download = `${dataset_name}_data.tsv`; + + document.body.appendChild(link); + link.click(); + window.URL.revokeObjectURL(uri); + link.remove(); + }, + error: (jQXHR, textStatus, errorThrown) => { + console.log("Experienced an error: ", textStatus); + console.log("The ERROR: ", errorThrown); + }, + complete: (jqXHR, textStatus) => { + $("#bulk-edit-spinner").remove(); + $(".btn-bulk-edit").removeClass( + "d-none"); + $(".btn-bulk-edit").prop( + "disabled", false); + }, + contentType: "application/json" + }); + } }, { text: "Bulk Edit (Upload Data)", + className: "btn btn-info btn-bulk-edit", + titleAttr: "Click to upload edited data you got by clicking the `Bulk Edit (Download Data)` button.", action: (event, dt, node, config) => { - alert("Not implemented yet!") - }, - className: "btn btn-info", - titleAttr: "Click to upload edited data you got by clicking the `Bulk Edit (Download Data)` button." + window.location.assign( + `${window.location.protocol}//` + + `${window.location.host}` + + `/species/${species_id}` + + `/populations/${population_id}` + + `/phenotypes/datasets/${dataset_id}` + + `/edit-upload`) + } } ] }, diff --git a/uploader/templates/publications/base.html b/uploader/templates/publications/base.html new file mode 100644 index 0000000..db80bfa --- /dev/null +++ b/uploader/templates/publications/base.html @@ -0,0 +1,12 @@ +{%extends "base.html"%} + +{%block lvl1_breadcrumbs%} +<li {%if activelink=="publications"%} + class="breadcrumb-item active" + {%else%} + class="breadcrumb-item" + {%endif%}> + <a href="{{url_for('publications.index')}}">Publications</a> +</li> +{%block lvl2_breadcrumbs%}{%endblock%} +{%endblock%} diff --git a/uploader/templates/publications/create-publication.html b/uploader/templates/publications/create-publication.html new file mode 100644 index 0000000..3f828a9 --- /dev/null +++ b/uploader/templates/publications/create-publication.html @@ -0,0 +1,191 @@ +{%extends "publications/base.html"%} +{%from "flash_messages.html" import flash_all_messages%} + +{%block title%}View Publication{%endblock%} + +{%block pagetitle%}View Publication{%endblock%} + + +{%block contents%} +{{flash_all_messages()}} + +<div class="row"> + <form id="frm-create-publication" + method="POST" + action="{{url_for('publications.create_publication', **request.args)}}" + class="form-horizontal"> + + <div class="row mb-3"> + <label for="txt-pubmed-id" class="col-sm-2 col-form-label"> + PubMed ID</label> + <div class="col-sm-10"> + <div class="input-group"> + <input type="text" + id="txt-pubmed-id" + name="pubmed-id" + class="form-control"/> + <div class="input-group-text"> + <button class="btn btn-outline-primary" + id="btn-search-pubmed-id">search</button> + </div> + </div> + <span id="search-pubmed-id-error" + class="form-text text-muted text-danger visually-hidden"> + </span> + <span class="form-text text-muted">This is the publication's ID on + <a href="https://pubmed.ncbi.nlm.nih.gov/" + title="Link to NCBI's PubMed service">NCBI's Pubmed Service</a> + </span> + </div> + </div> + + <div class="row mb-3"> + <label for="txt-publication-title" class="col-sm-2 col-form-label"> + Title</label> + <div class="col-sm-10"> + <input type="text" + id="txt-publication-title" + name="publication-title" + class="form-control" /> + <span class="form-text text-muted">Provide the publication's title here.</span> + </div> + </div> + + <div class="row mb-3"> + <label for="txt-publication-authors" class="col-sm-2 col-form-label"> + Authors</label> + <div class="col-sm-10"> + <input type="text" + id="txt-publication-authors" + name="publication-authors" + required="required" + class="form-control" /> + <span class="form-text text-muted"> + A publication <strong>MUST</strong> have an author. You <em>must</em> + provide a value for the authors field. + </span> + </div> + </div> + + <div class="row mb-3"> + <label for="txt-publication-journal" class="col-sm-2 col-form-label"> + Journal</label> + <div class="col-sm-10"> + <input type="text" + id="txt-publication-journal" + name="publication-journal" + class="form-control" /> + <span class="form-text text-muted">Provide the name journal where the + publication was done, here.</span> + </div> + </div> + + <div class="row mb-3"> + <label for="select-publication-month" + class="col-sm-2 col-form-label"> + Month</label> + <div class="col-sm-4"> + <select class="form-control" + id="select-publication-month" + name="publication-month"> + <option value="">Select a month</option> + <option value="january">January</option> + <option value="february">February</option> + <option value="march">March</option> + <option value="april">April</option> + <option value="may">May</option> + <option value="june">June</option> + <option value="july">July</option> + <option value="august">August</option> + <option value="september">September</option> + <option value="october">October</option> + <option value="november">November</option> + <option value="december">December</option> + </select> + <span class="form-text text-muted">Month of publication</span> + </div> + + <label for="txt-publication-year" + class="col-sm-2 col-form-label"> + Year</label> + <div class="col-sm-4"> + <input type="number" + id="txt-publication-year" + name="publication-year" + class="form-control" + min="1960" /> + <span class="form-text text-muted">Year of publication</span> + </div> + </div> + + <div class="row mb-3"> + <label for="txt-publication-volume" + class="col-sm-2 col-form-label"> + Volume</label> + <div class="col-sm-4"> + <input type="text" + id="txt-publication-volume" + name="publication-volume" + class="form-control"> + <span class="form-text text-muted">Journal volume</span> + </div> + + <label for="txt-publication-pages" + class="col-sm-2 col-form-label"> + Pages</label> + <div class="col-sm-4"> + <input type="text" + id="txt-publication-pages" + name="publication-pages" + class="form-control" /> + <span class="form-text text-muted">Journal pages for the publication</span> + </div> + </div> + + <div class="row mb-3"> + <label for="txt-abstract" class="col-sm-2 col-form-label">Abstract</label> + <div class="col-sm-10"> + <textarea id="txt-publication-abstract" + name="publication-abstract" + class="form-control" + rows="7"></textarea> + </div> + </div> + + <div class="row mb-3"> + <div class="col-sm-2"></div> + <div class="col-sm-8"> + <input type="submit" class="btn btn-primary" value="Add" /> + <input type="reset" class="btn btn-danger" /> + </div> + </div> + +</form> +</div> + +{%endblock%} + + +{%block javascript%} +<script type="text/javascript" src="/static/js/pubmed.js"></script> +<script type="text/javascript"> + $(function() { + $("#btn-search-pubmed-id").on("click", (event) => { + event.preventDefault(); + var search_button = event.target; + var pubmed_id = $("#txt-pubmed-id").val().trim(); + remove_class($("#txt-pubmed-id").parent(), "has-error"); + if(pubmed_id == "") { + add_class($("#txt-pubmed-id").parent(), "has-error"); + return false; + } + + search_button.disabled = true; + // Fetch publication details + fetch_publication_details(pubmed_id, + [() => {search_button.disabled = false;}]); + return false; + }); + }); +</script> +{%endblock%} diff --git a/uploader/templates/publications/index.html b/uploader/templates/publications/index.html new file mode 100644 index 0000000..f846d54 --- /dev/null +++ b/uploader/templates/publications/index.html @@ -0,0 +1,92 @@ +{%extends "publications/base.html"%} +{%from "flash_messages.html" import flash_all_messages%} + +{%block title%}Publications{%endblock%} + +{%block pagetitle%}Publications{%endblock%} + + +{%block contents%} +{{flash_all_messages()}} + +<div class="row" style="padding-bottom: 1em;"> + <a href="{{url_for('publications.create_publication')}}" + class="btn btn-primary"> + add new publication</a> +</div> + +<div class="row"> + <table id="tbl-list-publications" class="table compact stripe"> + <thead> + <tr> + <th>#</th> + <th>PubMed ID</th> + <th>Title</th> + <th>Authors</th> + </tr> + </thead> + + <tbody></tbody> + </table> +</div> +{%endblock%} + + +{%block javascript%} +<script type="text/javascript"> + $(function() { + var publicationsDataTable = buildDataTable( + "#tbl-list-publications", + [], + [ + {data: "index"}, + { + data: (pub) => { + if(pub.PubMed_ID) { + return `<a href="https://pubmed.ncbi.nlm.nih.gov/` + + `${pub.PubMed_ID}/" target="_blank" ` + + `title="Link to publication on NCBI.">` + + `${pub.PubMed_ID}</a>`; + } + return ""; + } + }, + { + data: (pub) => { + var title = "⸻"; + if(pub.Title) { + title = pub.Title + } + return `<a href="/publications/view/${pub.Id}" ` + + `target="_blank" ` + + `title="Link to view publication details">` + + `${title}</a>`; + } + }, + { + data: (pub) => { + authors = pub.Authors.split(",").map( + (item) => {return item.trim();}); + if(authors.length > 1) { + return authors[0] + ", et. al."; + } + return authors[0]; + } + } + ], + { + ajax: { + url: "/publications/list", + dataSrc: "publications" + }, + scrollY: 700, + paging: false, + deferRender: true, + layout: { + topStart: "info", + topEnd: "search" + } + }); + }); +</script> +{%endblock%} diff --git a/uploader/templates/publications/view-publication.html b/uploader/templates/publications/view-publication.html new file mode 100644 index 0000000..388547a --- /dev/null +++ b/uploader/templates/publications/view-publication.html @@ -0,0 +1,78 @@ +{%extends "publications/base.html"%} +{%from "flash_messages.html" import flash_all_messages%} + +{%block title%}View Publication{%endblock%} + +{%block pagetitle%}View Publication{%endblock%} + + +{%block contents%} +{{flash_all_messages()}} + +<div class="row"> + <table class="table"> + <tr> + <th>PubMed</th> + <td> + {%if publication.PubMed_ID%} + <a href="https://pubmed.ncbi.nlm.nih.gov/{{publication.PubMed_ID}}/" + target="_blank">{{publication.PubMed_ID}}</a> + {%else%} + — + {%endif%} + </td> + </tr> + <tr> + <th>Title</th> + <td>{{publication.Title or "—"}}</td> + </tr> + <tr> + <th>Authors</th> + <td>{{publication.Authors or "—"}}</td> + </tr> + <tr> + <th>Journal</th> + <td>{{publication.Journal or "—"}}</td> + </tr> + <tr> + <th>Published</th> + <td>{{publication.Month or ""}} {{publication.Year or "—"}}</td> + </tr> + <tr> + <th>Volume</th> + <td>{{publication.Volume or "—"}}</td> + </tr> + <tr> + <th>Pages</th> + <td>{{publication.Pages or "—"}}</td> + </tr> + <tr> + <th>Abstract</th> + <td> + {%for line in (publication.Abstract or "—").replace("\r\n", "<br />").replace("\n", "<br />").split("<br />")%} + <p>{{line}}</p> + {%endfor%} + </td> + </tr> + </table> +</div> + +<div class="row"> + <form id="frm-edit-delete-publication" method="POST" action="#"> + <input type="hidden" name="publication_id" value="{{publication.Id}}" /> + <div class="form-group"> + <input type="submit" value="edit" class="btn btn-primary not-implemented" /> + {%if linked_phenotypes | length == 0%} + <input type="submit" value="delete" class="btn btn-danger not-implemented" /> + {%endif%} + </div> + </form> +</div> +{%endblock%} + + +{%block javascript%} +<script type="text/javascript"> + $(function() {}); +</script> +{%endblock%} diff --git a/uploader/templates/samples/upload-failure.html b/uploader/templates/samples/upload-failure.html index 458ab55..2cf8053 100644 --- a/uploader/templates/samples/upload-failure.html +++ b/uploader/templates/samples/upload-failure.html @@ -15,7 +15,7 @@ <h3>Debugging Information</h3> <ul> - <li><strong>job id</strong>: {{job.job_id}}</li> + <li><strong>job id</strong>: {{job.jobid}}</li> <li><strong>status</strong>: {{job.status}}</li> <li><strong>job type</strong>: {{job["job-type"]}}</li> </ul> |