diff options
45 files changed, 2249 insertions, 548 deletions
diff --git a/quality_control/checks.py b/quality_control/checks.py index bdfd12b..bb05e31 100644 --- a/quality_control/checks.py +++ b/quality_control/checks.py @@ -52,12 +52,15 @@ def decimal_places_pattern(mini: int, maxi: Optional[int] = None) -> re.Pattern: + r")$" ) -def decimal_points_error(filename: str,# pylint: disable=[too-many-arguments] - lineno: int, - field: str, - value: str, - mini: int, - maxi: Optional[int] = None) -> Optional[InvalidValue]: +def decimal_points_error( + # pylint: disable=[too-many-arguments, too-many-positional-arguments] + filename: str, + lineno: int, + field: str, + value: str, + mini: int, + maxi: Optional[int] = None +) -> Optional[InvalidValue]: """ Check that 'value' in a decimal number with the appropriate decimal places. """ diff --git a/quality_control/parsing.py b/quality_control/parsing.py index f1d21fc..7a8185d 100644 --- a/quality_control/parsing.py +++ b/quality_control/parsing.py @@ -104,23 +104,22 @@ def collect_errors( if line_number == 1: consistent_columns_checker = make_column_consistency_checker( filename, line) - for error in __process_errors__( - filename, line_number, line, - partial(header_errors, strains=strains), - errors): - yield error + yield from __process_errors__( + filename, line_number, line, + partial(header_errors, strains=strains), + errors) if line_number != 1: - col_consistency_error = consistent_columns_checker(line_number, line) + col_consistency_error = consistent_columns_checker(# pylint: disable=[possibly-used-before-assignment] + line_number, line) if col_consistency_error: yield col_consistency_error - for error in __process_errors__( + yield from __process_errors__( filename, line_number, line, ( average_errors if filetype == FileType.AVERAGE else se_errors), - errors): - yield error + errors) if update_progress: update_progress(line_number, line) diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py index dfa84ba..06175ce 100644 --- a/r_qtl/r_qtl2.py +++ b/r_qtl/r_qtl2.py @@ -16,7 +16,7 @@ from r_qtl.exceptions import InvalidFormat, MissingFileException FILE_TYPES = ( "geno", "founder_geno", "pheno", "covar", "phenocovar", "gmap", "pmap", - "phenose") + "phenose", "phenonum") __CONTROL_FILE_ERROR_MESSAGE__ = ( "The zipped bundle that was provided does not contain a valid control file " @@ -575,8 +575,30 @@ def read_text_file(filepath: Union[str, Path]) -> Iterator[str]: def read_csv_file(filepath: Union[str, Path], separator: str = ",", comment_char: str = "#") -> Iterator[tuple[str, ...]]: - """Read a file as a csv file.""" + """Read a file as a csv file. This does not process the N/A values.""" for line in read_text_file(filepath): if line.startswith(comment_char): continue yield tuple(field.strip() for field in line.split(separator)) + + +def read_csv_file_headers( + filepath: Union[str, Path], + transposed: bool, + separator: str = ",", + comment_char: str = "#" +) -> tuple[str, ...]: + """Read the 'true' headers of a CSV file.""" + headers = tuple() + for line in read_text_file(filepath): + if line.startswith(comment_char): + continue + + line = tuple(field.strip() for field in line.split(separator)) + if not transposed: + return line + + headers = headers + (line[0],) + continue + + return headers diff --git a/scripts/cli_parser.py b/scripts/cli_parser.py index d42ae66..0c91c5e 100644 --- a/scripts/cli_parser.py +++ b/scripts/cli_parser.py @@ -23,7 +23,8 @@ def init_cli_parser(program: str, description: Optional[str] = None) -> Argument "--loglevel", type=str, default="INFO", - choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], + choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL", + "debug", "info", "warning", "error", "critical"], help="The severity of events to track with the logger.") return parser diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py new file mode 100644 index 0000000..5ce37f3 --- /dev/null +++ b/scripts/load_phenotypes_to_db.py @@ -0,0 +1,518 @@ +import sys +import uuid +import json +import logging +import argparse +import datetime +from pathlib import Path +from zipfile import ZipFile +from typing import Any, Union +from urllib.parse import urljoin +from functools import reduce, partial + +from MySQLdb.cursors import Cursor, DictCursor + +from gn_libs import jobs, mysqldb, sqlite3, monadic_requests as mrequests + +from r_qtl import r_qtl2 as rqtl2 +from uploader.species.models import species_by_id +from uploader.population.models import population_by_species_and_id +from uploader.samples.models import samples_by_species_and_population +from uploader.phenotypes.models import ( + dataset_by_id, + save_phenotypes_data, + create_new_phenotypes, + quick_save_phenotypes_data) +from uploader.publications.models import ( + create_new_publications, + fetch_publication_by_id) + +from scripts.rqtl2.bundleutils import build_line_joiner, build_line_splitter + +logging.basicConfig( + format="%(asctime)s — %(filename)s:%(lineno)s — %(levelname)s: %(message)s") +logger = logging.getLogger(__name__) + + + +def __replace_na_strings__(line, na_strings): + return ((None if value in na_strings else value) for value in line) + + +def save_phenotypes( + cursor: mysqldb.Connection, + control_data: dict[str, Any], + filesdir: Path +) -> tuple[dict, ...]: + """Read `phenofiles` and save the phenotypes therein.""" + ## TODO: Replace with something like this: ## + # phenofiles = control_data["phenocovar"] + control_data.get( + # "gn-metadata", {}).get("pheno", []) + # + # This is meant to load (and merge) data from the "phenocovar" and + # "gn-metadata -> pheno" files into a single collection of phenotypes. + phenofiles = tuple(filesdir.joinpath(_file) for _file in control_data["phenocovar"]) + if len(phenofiles) <= 0: + return tuple() + + if control_data["phenocovar_transposed"]: + logger.info("Undoing transposition of the files rows and columns.") + phenofiles = ( + rqtl2.transpose_csv_with_rename( + _file, + build_line_splitter(control_data), + build_line_joiner(control_data)) + for _file in phenofiles) + + _headers = rqtl2.read_csv_file_headers(phenofiles[0], + control_data["phenocovar_transposed"], + control_data["sep"], + control_data["comment.char"]) + return create_new_phenotypes( + cursor, + (dict(zip(_headers, + __replace_na_strings__(line, control_data["na.strings"]))) + for filecontent + in (rqtl2.read_csv_file(path, + separator=control_data["sep"], + comment_char=control_data["comment.char"]) + for path in phenofiles) + for idx, line in enumerate(filecontent) + if idx != 0)) + + +def __fetch_next_dataid__(conn: mysqldb.Connection) -> int: + """Fetch the next available DataId value from the database.""" + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute( + "SELECT MAX(DataId) AS CurrentMaxDataId FROM PublishXRef") + return int(cursor.fetchone()["CurrentMaxDataId"]) + 1 + + +def __row_to_dataitems__( + sample_row: dict, + dataidmap: dict, + pheno_name2id: dict[str, int], + samples: dict +) -> tuple[dict, ...]: + samplename = sample_row["id"] + + return ({ + "phenotype_id": dataidmap[pheno_name2id[phenoname]]["phenotype_id"], + "data_id": dataidmap[pheno_name2id[phenoname]]["data_id"], + "sample_name": samplename, + "sample_id": samples[samplename]["Id"], + "value": phenovalue + } for phenoname, phenovalue in sample_row.items() if phenoname != "id") + + +def __build_dataitems__( + filetype, + phenofiles, + control_data, + samples, + dataidmap, + pheno_name2id +): + _headers = rqtl2.read_csv_file_headers( + phenofiles[0], + False, # Any transposed files have been un-transposed by this point + control_data["sep"], + control_data["comment.char"]) + _filescontents = ( + rqtl2.read_csv_file(path, + separator=control_data["sep"], + comment_char=control_data["comment.char"]) + for path in phenofiles) + _linescontents = ( + __row_to_dataitems__( + dict(zip(("id",) + _headers[1:], + __replace_na_strings__(line, control_data["na.strings"]))), + dataidmap, + pheno_name2id, + samples) + for linenum, line in (enumline for filecontent in _filescontents + for enumline in enumerate(filecontent)) + if linenum > 0) + return (item for items in _linescontents + for item in items + if item["value"] is not None) + + +def save_numeric_data( + conn: mysqldb.Connection, + dataidmap: dict, + pheno_name2id: dict[str, int], + samples: tuple[dict, ...], + control_data: dict, + filesdir: Path, + filetype: str, + table: str +): + """Read data from files and save to the database.""" + phenofiles = tuple( + filesdir.joinpath(_file) for _file in control_data[filetype]) + if len(phenofiles) <= 0: + return tuple() + + if control_data[f"{filetype}_transposed"]: + logger.info("Undoing transposition of the files rows and columns.") + phenofiles = tuple( + rqtl2.transpose_csv_with_rename( + _file, + build_line_splitter(control_data), + build_line_joiner(control_data)) + for _file in phenofiles) + + try: + logger.debug("Attempt quick save with `LOAD … INFILE`.") + return quick_save_phenotypes_data( + conn, + table, + __build_dataitems__( + filetype, + phenofiles, + control_data, + samples, + dataidmap, + pheno_name2id), + filesdir) + except Exception as _exc: + logger.debug("Could not use `LOAD … INFILE`, using raw query", + exc_info=True) + import time;time.sleep(60) + return save_phenotypes_data( + conn, + table, + __build_dataitems__( + filetype, + phenofiles, + control_data, + samples, + dataidmap, + pheno_name2id)) + + +save_pheno_data = partial(save_numeric_data, + filetype="pheno", + table="PublishData") + + +save_phenotypes_se = partial(save_numeric_data, + filetype="phenose", + table="PublishSE") + + +save_phenotypes_n = partial(save_numeric_data, + filetype="phenonum", + table="NStrain") + + +def cross_reference_phenotypes_publications_and_data( + conn: mysqldb.Connection, xref_data: tuple[dict, ...] +): + """Crossreference the phenotypes, publication and data.""" + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute("SELECT MAX(Id) CurrentMaxId FROM PublishXRef") + _nextid = int(cursor.fetchone()["CurrentMaxId"]) + 1 + _params = tuple({**row, "xref_id": _id} + for _id, row in enumerate(xref_data, start=_nextid)) + cursor.executemany( + ("INSERT INTO PublishXRef(" + "Id, InbredSetId, PhenotypeId, PublicationId, DataId, comments" + ") " + "VALUES (" + "%(xref_id)s, %(population_id)s, %(phenotype_id)s, " + "%(publication_id)s, %(data_id)s, 'Upload of new data.'" + ")"), + _params) + return _params + return tuple() + + +def update_auth(authserver, token, species, population, dataset, xrefdata): + """Grant the user access to their data.""" + # TODO Call into the auth server to: + # 1. Link the phenotypes with a user group + # - fetch group: http://localhost:8081/auth/user/group + # - link data to group: http://localhost:8081/auth/data/link/phenotype + # - *might need code update in gn-auth: remove restriction, perhaps* + # 2. Create resource (perhaps?) + # - Get resource categories: http://localhost:8081/auth/resource/categories + # - Create a new resource: http://localhost:80host:8081/auth/resource/create + # - single resource for all phenotypes + # - resource name from user, species, population, dataset, datetime? + # - User will have "ownership" of resource by default + # 3. Link data to the resource: http://localhost:8081/auth/resource/data/link + # - Update code to allow linking multiple items in a single request + _tries = 0 # TODO use this to limit how many tries before quiting and bailing + _delay = 1 + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json" + } + def authserveruri(endpoint): + return urljoin(authserver, endpoint) + + def __fetch_user_details__(): + logger.debug("… Fetching user details") + return mrequests.get( + authserveruri("/auth/user/"), + headers=headers + ) + + def __link_data__(user): + logger.debug("… linking uploaded data to user's group") + return mrequests.post( + authserveruri("/auth/data/link/phenotype"), + headers=headers, + json={ + "species_name": species["Name"], + "group_id": user["group"]["group_id"], + "selected": [ + { + "SpeciesId": species["SpeciesId"], + "InbredSetId": population["Id"], + "PublishFreezeId": dataset["Id"], + "dataset_name": dataset["Name"], + "dataset_fullname": dataset["FullName"], + "dataset_shortname": dataset["ShortName"], + "PublishXRefId": item["xref_id"] + } + for item in xrefdata + ], + "using-raw-ids": "on" + }).then(lambda ld_results: (user, ld_results)) + + def __fetch_phenotype_category_details__(user, linkeddata): + logger.debug("… fetching phenotype category details") + return mrequests.get( + authserveruri("/auth/resource/categories"), + headers=headers + ).then( + lambda categories: ( + user, + linkeddata, + next(category for category in categories + if category["resource_category_key"] == "phenotype")) + ) + + def __create_resource__(user, linkeddata, category): + logger.debug("… creating authorisation resource object") + now = datetime.datetime.now().isoformat() + return mrequests.post( + authserveruri("/auth/resource/create"), + headers=headers, + json={ + "resource_category": category["resource_category_id"], + "resource_name": (f"{user['email']}—{dataset['Name']}—{now}—" + f"{len(xrefdata)} phenotypes"), + "public": "off" + }).then(lambda cr_results: (user, linkeddata, cr_results)) + + def __attach_data_to_resource__(user, linkeddata, resource): + logger.debug("… attaching data to authorisation resource object") + return mrequests.post( + authserveruri("/auth/resource/data/link"), + headers=headers, + json={ + "dataset_type": "phenotype", + "resource_id": resource["resource_id"], + "data_link_ids": [ + item["data_link_id"] for item in linkeddata["traits"]] + }).then(lambda attc: (user, linkeddata, resource, attc)) + + def __handle_error__(resp): + logger.error("ERROR: Updating the authorisation for the data failed.") + logger.debug( + "ERROR: The response from the authorisation server was:\n\t%s", + resp.json()) + return 1 + + def __handle_success__(val): + logger.info( + "The authorisation for the data has been updated successfully.") + return 0 + + return __fetch_user_details__().then(__link_data__).then( + lambda result: __fetch_phenotype_category_details__(*result) + ).then( + lambda result: __create_resource__(*result) + ).then( + lambda result: __attach_data_to_resource__(*result) + ).either(__handle_error__, __handle_success__) + + +def load_data(conn: mysqldb.Connection, job: dict) -> int: + """Load the data attached in the given job.""" + _job_metadata = job["metadata"] + # Steps + # 0. Read data from the files: can be multiple files per type + # + _species = species_by_id(conn, int(_job_metadata["species_id"])) + _population = population_by_species_and_id( + conn, + _species["SpeciesId"], + int(_job_metadata["population_id"])) + _dataset = dataset_by_id( + conn, + _species["SpeciesId"], + _population["Id"], + int(_job_metadata["dataset_id"])) + # 1. Just retrive the publication: Don't create publications for now. + _publication = fetch_publication_by_id( + conn, int(_job_metadata.get("publication_id", "0"))) or {"Id": 0} + # 2. Save all new phenotypes: + # -> return phenotype IDs + bundle = Path(_job_metadata["bundle_file"]) + _control_data = rqtl2.control_data(bundle) + logger.info("Extracting the zipped bundle of files.") + _outdir = Path(bundle.parent, f"bundle_{bundle.stem}") + with ZipFile(str(bundle), "r") as zfile: + _files = rqtl2.extract(zfile, _outdir) + logger.info("Saving new phenotypes.") + _phenos = save_phenotypes(conn, _control_data, _outdir) + def __build_phenos_maps__(accumulator, current): + dataid, row = current + return ({ + **accumulator[0], + row["phenotype_id"]: { + "population_id": _population["Id"], + "phenotype_id": row["phenotype_id"], + "data_id": dataid, + "publication_id": _publication["Id"], + } + }, { + **accumulator[1], + row["id"]: row["phenotype_id"] + }) + dataidmap, pheno_name2id = reduce( + __build_phenos_maps__, + enumerate(_phenos, start=__fetch_next_dataid__(conn)), + ({},{})) + # 3. a. Fetch the strain names and IDS: create name->ID map + samples = { + row["Name"]: row + for row in samples_by_species_and_population( + conn, _species["SpeciesId"], _population["Id"])} + # b. Save all the data items (DataIds are vibes), return new IDs + logger.info("Saving new phenotypes data.") + _num_data_rows = save_pheno_data(conn=conn, + dataidmap=dataidmap, + pheno_name2id=pheno_name2id, + samples=samples, + control_data=_control_data, + filesdir=_outdir) + logger.info("Saved %s new phenotype data rows.", _num_data_rows) + # 4. Cross-reference Phenotype, Publication, and PublishData in PublishXRef + logger.info("Cross-referencing new phenotypes to their data and publications.") + _xrefs = cross_reference_phenotypes_publications_and_data( + conn, tuple(dataidmap.values())) + # 5. If standard errors and N exist, save them too + # (use IDs returned in `3. b.` above). + logger.info("Saving new phenotypes standard errors.") + _num_se_rows = save_phenotypes_se(conn=conn, + dataidmap=dataidmap, + pheno_name2id=pheno_name2id, + samples=samples, + control_data=_control_data, + filesdir=_outdir) + logger.info("Saved %s new phenotype standard error rows.", _num_se_rows) + + logger.info("Saving new phenotypes sample counts.") + _num_n_rows = save_phenotypes_n(conn=conn, + dataidmap=dataidmap, + pheno_name2id=pheno_name2id, + samples=samples, + control_data=_control_data, + filesdir=_outdir) + logger.info("Saved %s new phenotype sample counts rows.", _num_n_rows) + return (_species, _population, _dataset, _xrefs) + + +if __name__ == "__main__": + def parse_args(): + """Setup command-line arguments.""" + parser = argparse.ArgumentParser( + prog="load_phenotypes_to_db", + description="Process the phenotypes' data and load it into the database.") + parser.add_argument("db_uri", type=str, help="MariaDB/MySQL connection URL") + parser.add_argument( + "jobs_db_path", type=Path, help="Path to jobs' SQLite database.") + parser.add_argument("job_id", type=uuid.UUID, help="ID of the running job") + parser.add_argument( + "--log-level", + type=str, + help="Determines what is logged out.", + choices=("debug", "info", "warning", "error", "critical"), + default="info") + return parser.parse_args() + + def setup_logging(log_level: str): + """Setup logging for the script.""" + logger.setLevel(log_level) + logging.getLogger("uploader.phenotypes.models").setLevel(log_level) + + + def main(): + """Entry-point for this script.""" + args = parse_args() + setup_logging(args.log_level.upper()) + + with (mysqldb.database_connection(args.db_uri) as conn, + conn.cursor(cursorclass=DictCursor) as cursor, + sqlite3.connection(args.jobs_db_path) as jobs_conn): + job = jobs.job(jobs_conn, args.job_id) + + # Lock the PublishXRef/PublishData/PublishSE/NStrain here: Why? + # The `DataId` values are sequential, but not auto-increment + # Can't convert `PublishXRef`.`DataId` to AUTO_INCREMENT. + # `SELECT MAX(DataId) FROM PublishXRef;` + # How do you check for a table lock? + # https://oracle-base.com/articles/mysql/mysql-identify-locked-tables + # `SHOW OPEN TABLES LIKE 'Publish%';` + _db_tables_ = ( + "Species", + "InbredSet", + "Strain", + "StrainXRef", + "Publication", + "Phenotype", + "PublishXRef", + "PublishFreeze", + "PublishData", + "PublishSE", + "NStrain") + + logger.debug( + ("Locking database tables for the connection:" + + "".join("\n\t- %s" for _ in _db_tables_) + "\n"), + *_db_tables_) + cursor.execute(# Lock the tables to avoid race conditions + "LOCK TABLES " + ", ".join( + f"{_table} WRITE" for _table in _db_tables_)) + + db_results = load_data(conn, job) + jobs.update_metadata( + jobs_conn, + args.job_id, + "xref_ids", + json.dumps([xref["xref_id"] for xref in db_results[3]])) + + logger.info("Unlocking all database tables.") + cursor.execute("UNLOCK TABLES") + + # Update authorisations (break this down) — maybe loop until it works? + logger.info("Updating authorisation.") + _job_metadata = job["metadata"] + return update_auth(_job_metadata["authserver"], + _job_metadata["token"], + *db_results) + + + try: + sys.exit(main()) + except Exception as _exc: + logger.debug("Data loading failed… Halting!", + exc_info=True) + sys.exit(1) diff --git a/scripts/rqtl2/entry.py b/scripts/rqtl2/entry.py index 327ed2c..e0e00e7 100644 --- a/scripts/rqtl2/entry.py +++ b/scripts/rqtl2/entry.py @@ -20,27 +20,23 @@ def build_main( [Redis, Connection, str, Namespace, logging.Logger], int ], - loggername: str + logger: logging.Logger ) -> Callable[[],int]: """Build a function to be used as an entry-point for scripts.""" def main(): - try: - logging.basicConfig( - format=( - "%(asctime)s - %(levelname)s %(name)s: " - "(%(pathname)s: %(lineno)d) %(message)s"), - level=args.loglevel) - logger = logging.getLogger(loggername) - with (Redis.from_url(args.redisuri, decode_responses=True) as rconn, - database_connection(args.databaseuri) as dbconn): - fqjobid = jobs.job_key(args.redisprefix, args.jobid) + with (Redis.from_url(args.redisuri, decode_responses=True) as rconn, + database_connection(args.databaseuri) as dbconn): + logger.setLevel(args.loglevel.upper()) + fqjobid = jobs.job_key(args.redisprefix, args.jobid) + + try: rconn.hset(fqjobid, "status", "started") logger.addHandler(setup_redis_logger( rconn, fqjobid, f"{fqjobid}:log-messages", args.redisexpiry)) - logger.addHandler(StreamHandler(stream=sys.stdout)) + logger.addHandler(StreamHandler(stream=sys.stderr)) check_db(args.databaseuri) check_redis(args.redisuri) @@ -48,15 +44,15 @@ def build_main( logger.error("File not found: '%s'.", args.rqtl2bundle) return 2 - returncode = run_fn(rconn, dbconn, fqjobid, args, logger) + returncode = run_fn(rconn, dbconn, fqjobid, args) if returncode == 0: rconn.hset(fqjobid, "status", "completed:success") return returncode rconn.hset(fqjobid, "status", "completed:error") return returncode - except Exception as _exc:# pylint: disable=[broad-except] - logger.error("The process failed!", exc_info=True) - rconn.hset(fqjobid, "status", "completed:error") - return 4 + except Exception as _exc:# pylint: disable=[broad-except] + logger.error("The process failed!", exc_info=True) + rconn.hset(fqjobid, "status", "completed:error") + return 4 return main diff --git a/scripts/rqtl2/phenotypes_qc.py b/scripts/rqtl2/phenotypes_qc.py index 76ecb8d..5c89ca0 100644 --- a/scripts/rqtl2/phenotypes_qc.py +++ b/scripts/rqtl2/phenotypes_qc.py @@ -36,6 +36,10 @@ from scripts.cli_parser import init_cli_parser, add_global_data_arguments from scripts.rqtl2.bundleutils import build_line_joiner, build_line_splitter __MODULE__ = "scripts.rqtl2.phenotypes_qc" +logging.basicConfig( + format=("%(asctime)s - %(levelname)s %(name)s: " + "(%(pathname)s: %(lineno)d) %(message)s")) +logger = logging.getLogger(__MODULE__) def validate(phenobundle: Path, logger: Logger) -> dict: """Check that the bundle is generally valid""" @@ -177,7 +181,7 @@ def qc_phenocovar_file( filepath.name, f"{fqkey}:logs") as logger, Redis.from_url(redisuri, decode_responses=True) as rconn): - logger.info("Running QC on file: %s", filepath.name) + print("Running QC on file: ", filepath.name) _csvfile = rqtl2.read_csv_file(filepath, separator, comment_char) _headings = tuple(heading.lower() for heading in next(_csvfile)) _errors: tuple[InvalidValue, ...] = tuple() @@ -205,12 +209,12 @@ def qc_phenocovar_file( (f"Record {_lc} in file {filepath.name} has a different " "number of columns than the number of headings"))),) _line = dict(zip(_headings, line)) - if not bool(_line["description"]): + if not bool(_line.get("description")): _errs = _errs + ( save_error(InvalidValue(filepath.name, _line[_headings[0]], "description", - _line["description"], + _line.get("description"), "The description is not provided!")),) rconn.hset(file_fqkey(fqkey, "metadata", filepath), @@ -285,7 +289,7 @@ def qc_pheno_file(# pylint: disable=[too-many-locals, too-many-arguments] filepath.name, f"{fqkey}:logs") as logger, Redis.from_url(redisuri, decode_responses=True) as rconn): - logger.info("Running QC on file: %s", filepath.name) + print("Running QC on file: ", filepath.name) save_error = partial( push_error, rconn, file_fqkey(fqkey, "errors", filepath)) _csvfile = rqtl2.read_csv_file(filepath, separator, comment_char) @@ -369,11 +373,10 @@ def run_qc(# pylint: disable=[too-many-locals] rconn: Redis, dbconn: mdb.Connection, fullyqualifiedjobid: str, - args: Namespace, - logger: Logger + args: Namespace ) -> int: """Run quality control checks on the bundle.""" - logger.debug("Beginning the quality assurance checks.") + print("Beginning the quality assurance checks.") results = check_for_averages_files( **check_for_mandatory_pheno_keys( **validate(args.rqtl2bundle, logger))) @@ -398,7 +401,7 @@ def run_qc(# pylint: disable=[too-many-locals] for ftype in ("pheno", "phenocovar", "phenose", "phenonum"))) # - Fetch samples/individuals from database. - logger.debug("Fetching samples/individuals from the database.") + print("Fetching samples/individuals from the database.") samples = tuple(#type: ignore[var-annotated] item for item in set(reduce( lambda acc, item: acc + ( @@ -415,7 +418,7 @@ def run_qc(# pylint: disable=[too-many-locals] json.dumps(tuple(f"{fullyqualifiedjobid}:phenocovar:{_file}" for _file in cdata.get("phenocovar", [])))) with mproc.Pool(mproc.cpu_count() - 1) as pool: - logger.debug("Check for errors in 'phenocovar' file(s).") + print("Check for errors in 'phenocovar' file(s).") _phenocovar_qc_res = merge_dicts(*pool.starmap(qc_phenocovar_file, tuple( (extractiondir.joinpath(_file), args.redisuri, @@ -437,7 +440,7 @@ def run_qc(# pylint: disable=[too-many-locals] "Expected a non-negative number with at least one decimal " "place.")) - logger.debug("Check for errors in 'pheno' file(s).") + print("Check for errors in 'pheno' file(s).") _pheno_qc_res = merge_dicts(*pool.starmap(qc_pheno_file, tuple(( extractiondir.joinpath(_file), args.redisuri, @@ -456,7 +459,7 @@ def run_qc(# pylint: disable=[too-many-locals] # - Check the 3 checks above for phenose and phenonum values too # qc_phenose_files(…) # qc_phenonum_files(…) - logger.debug("Check for errors in 'phenose' file(s).") + print("Check for errors in 'phenose' file(s).") _phenose_qc_res = merge_dicts(*pool.starmap(qc_pheno_file, tuple(( extractiondir.joinpath(_file), args.redisuri, @@ -472,7 +475,7 @@ def run_qc(# pylint: disable=[too-many-locals] dec_err_fn ) for _file in cdata.get("phenose", [])))) - logger.debug("Check for errors in 'phenonum' file(s).") + print("Check for errors in 'phenonum' file(s).") _phenonum_qc_res = merge_dicts(*pool.starmap(qc_pheno_file, tuple(( extractiondir.joinpath(_file), args.redisuri, @@ -509,5 +512,5 @@ if __name__ == "__main__": type=Path) return parser.parse_args() - main = build_main(cli_args(), run_qc, __MODULE__) + main = build_main(cli_args(), run_qc, logger) sys.exit(main()) diff --git a/tests/r_qtl/test_r_qtl2_control_file.py b/tests/r_qtl/test_r_qtl2_control_file.py index 316307d..5b9fef6 100644 --- a/tests/r_qtl/test_r_qtl2_control_file.py +++ b/tests/r_qtl/test_r_qtl2_control_file.py @@ -16,6 +16,7 @@ __DEFAULTS__ = { "pheno_transposed": False, "covar_transposed": False, "phenocovar_transposed": False, + "phenonum_transposed": False, "gmap_transposed": False, "pmap_transposed": False, "phenose_transposed": False diff --git a/tests/uploader/phenotypes/__init__.py b/tests/uploader/phenotypes/__init__.py new file mode 100644 index 0000000..1e0a932 --- /dev/null +++ b/tests/uploader/phenotypes/__init__.py @@ -0,0 +1 @@ +"""phenotypes tests""" diff --git a/tests/uploader/phenotypes/test_misc.py b/tests/uploader/phenotypes/test_misc.py index c0261aa..cf475ad 100644 --- a/tests/uploader/phenotypes/test_misc.py +++ b/tests/uploader/phenotypes/test_misc.py @@ -218,12 +218,54 @@ __sample_db_phenotypes_data__ = ( } }), __sample_db_phenotypes_data__, - ({"PhenotypeId": 4, "xref_id": 10001, "DataId": 8967043, "StrainId": 4, "StrainName": "BXD1", "value": 77.2}, - {"PhenotypeId": 15, "xref_id": 10003, "DataId": 8967045, "StrainId": 6, "StrainName": "BXD5", "value": 503}, - {"PhenotypeId": 15, "xref_id": 10003, "DataId": 8967045, "StrainId": 7, "StrainName": "BXD6", "value": 903}, - {"PhenotypeId": 20, "xref_id": 10004, "DataId": 8967046, "StrainId": 3, "StrainName": "DBA/2J", "value": 1}, - {"PhenotypeId": 20, "xref_id": 10004, "DataId": 8967046, "StrainId": 4, "StrainName": "BXD1", "value": 8}, - {"PhenotypeId": 20, "xref_id": 10004, "DataId": 8967046, "StrainId": 5, "StrainName": "BXD2", "value": 9})), + ({ + "PhenotypeId": 4, + "xref_id": 10001, + "DataId": 8967043, + "StrainId": 4, + "StrainName": "BXD1", + "value": 77.2 + }, + { + "PhenotypeId": 15, + "xref_id": 10003, + "DataId": 8967045, + "StrainId": 6, + "StrainName": "BXD5", + "value": 503 + }, + { + "PhenotypeId": 15, + "xref_id": 10003, + "DataId": 8967045, + "StrainId": 7, + "StrainName": "BXD6", + "value": 903 + }, + { + "PhenotypeId": 20, + "xref_id": 10004, + "DataId": 8967046, + "StrainId": 3, + "StrainName": "DBA/2J", + "value": 1 + }, + { + "PhenotypeId": 20, + "xref_id": 10004, + "DataId": 8967046, + "StrainId": 4, + "StrainName": "BXD1", + "value": 8 + }, + { + "PhenotypeId": 20, + "xref_id": 10004, + "DataId": 8967046, + "StrainId": 5, + "StrainName": "BXD2", + "value": 9 + })), # Changes — with deletions (({ @@ -292,12 +334,54 @@ __sample_db_phenotypes_data__ = ( } }), __sample_db_phenotypes_data__, - ({"PhenotypeId": 4, "xref_id": 10001, "DataId": 8967043, "StrainId": 4, "StrainName": "BXD1", "value": None}, - {"PhenotypeId": 15, "xref_id": 10003, "DataId": 8967045, "StrainId": 6, "StrainName": "BXD5", "value": None}, - {"PhenotypeId": 15, "xref_id": 10003, "DataId": 8967045, "StrainId": 7, "StrainName": "BXD6", "value": None}, - {"PhenotypeId": 20, "xref_id": 10004, "DataId": 8967046, "StrainId": 3, "StrainName": "DBA/2J", "value": 15}, - {"PhenotypeId": 20, "xref_id": 10004, "DataId": 8967046, "StrainId": 4, "StrainName": "BXD1", "value": None}, - {"PhenotypeId": 20, "xref_id": 10004, "DataId": 8967046, "StrainId": 5, "StrainName": "BXD2", "value": 24})))) + ({ + "PhenotypeId": 4, + "xref_id": 10001, + "DataId": 8967043, + "StrainId": 4, + "StrainName": "BXD1", + "value": None + }, + { + "PhenotypeId": 15, + "xref_id": 10003, + "DataId": 8967045, + "StrainId": 6, + "StrainName": "BXD5", + "value": None + }, + { + "PhenotypeId": 15, + "xref_id": 10003, + "DataId": 8967045, + "StrainId": 7, + "StrainName": "BXD6", + "value": None + }, + { + "PhenotypeId": 20, + "xref_id": 10004, + "DataId": 8967046, + "StrainId": 3, + "StrainName": "DBA/2J", + "value": 15 + }, + { + "PhenotypeId": 20, + "xref_id": 10004, + "DataId": 8967046, + "StrainId": 4, + "StrainName": "BXD1", + "value": None + }, + { + "PhenotypeId": 20, + "xref_id": 10004, + "DataId": 8967046, + "StrainId": 5, + "StrainName": "BXD2", + "value": 24 + })))) def test_phenotypes_data_differences(filedata, dbdata, expected): """Test differences are computed correctly.""" assert phenotypes_data_differences(filedata, dbdata) == expected diff --git a/tests/uploader/publications/__init__.py b/tests/uploader/publications/__init__.py new file mode 100644 index 0000000..de15e08 --- /dev/null +++ b/tests/uploader/publications/__init__.py @@ -0,0 +1 @@ +"""publications tests""" diff --git a/tests/uploader/publications/test_misc.py b/tests/uploader/publications/test_misc.py index 7a52941..8c7e567 100644 --- a/tests/uploader/publications/test_misc.py +++ b/tests/uploader/publications/test_misc.py @@ -63,5 +63,6 @@ from uploader.publications.misc import publications_differences {"PhenotypeId": 1, "xref_id": 10004, "PublicationId": None, "PubMed_ID": None})))) def test_publications_differences(filedata, dbdata, pubmed2pubidmap, expected): + """Test publication differences — flesh out description…""" assert publications_differences( filedata, dbdata, pubmed2pubidmap) == expected diff --git a/tests/uploader/test_parse.py b/tests/uploader/test_parse.py index 076c47c..20c75b7 100644 --- a/tests/uploader/test_parse.py +++ b/tests/uploader/test_parse.py @@ -8,7 +8,8 @@ from uploader.jobs import job, jobsnamespace from tests.conftest import uploadable_file_object -def test_parse_with_existing_uploaded_file(#pylint: disable=[too-many-arguments] +def test_parse_with_existing_uploaded_file( + #pylint: disable=[too-many-arguments,too-many-positional-arguments] client, db_url, redis_url, diff --git a/uploader/__init__.py b/uploader/__init__.py index 69d34f7..8b49ad5 100644 --- a/uploader/__init__.py +++ b/uploader/__init__.py @@ -3,13 +3,17 @@ import os import sys import logging from pathlib import Path +from typing import Optional from flask import Flask, request -from flask_session import Session + from cachelib import FileSystemCache from gn_libs import jobs as gnlibs_jobs +from flask_session import Session + + from uploader.oauth2.client import user_logged_in, authserver_authorise_uri from . import session @@ -22,6 +26,11 @@ from .expression_data import exprdatabp from .errors import register_error_handlers from .background_jobs import background_jobs_bp +logging.basicConfig( + format=("%(asctime)s — %(filename)s:%(lineno)s — %(levelname)s " + "(%(thread)d:%(threadName)s): %(message)s") +) + def override_settings_with_envvars( app: Flask, ignore: tuple[str, ...]=tuple()) -> None: """Override settings in `app` with those in ENVVARS""" @@ -55,13 +64,27 @@ def setup_logging(app: Flask) -> Flask: "SERVER_SOFTWARE", "").split('/') return __log_gunicorn__(app) if bool(software) else __log_dev__(app) +def setup_modules_logging(app_logger): + """Setup module-level loggers to the same log-level as the application.""" + loglevel = logging.getLevelName(app_logger.getEffectiveLevel()) + + def __setup__(logger_name): + _logger = logging.getLogger(logger_name) + _logger.setLevel(loglevel) + + __setup__("uploader.publications.models") + __setup__("uploader.publications.datatables") -def create_app(config: dict = {}): + +def create_app(config: Optional[dict] = None): """The application factory. config: dict Useful to override settings in the settings files and environment especially in environments such as testing.""" + if config is None: + config = {} + app = Flask(__name__) ### BEGIN: Application configuration @@ -88,6 +111,7 @@ def create_app(config: dict = {}): default_timeout=int(app.config["SESSION_FILESYSTEM_CACHE_TIMEOUT"])) setup_logging(app) + setup_modules_logging(app.logger) # setup jinja2 symbols app.add_template_global(lambda : request.url, name="request_url") diff --git a/uploader/authorisation.py b/uploader/authorisation.py index bc950d8..3cf3585 100644 --- a/uploader/authorisation.py +++ b/uploader/authorisation.py @@ -48,7 +48,7 @@ def require_token(func: Callable) -> Callable: """ def __invalid_token__(_whatever): logging.debug("==========> Failure log: %s", _whatever) - raise Exception( + raise Exception(# pylint: disable=[broad-exception-raised] "You attempted to access a feature of the system that requires " "authorisation. Unfortunately, we could not verify you have the " "appropriate authorisation to perform the action you requested. " diff --git a/uploader/background_jobs.py b/uploader/background_jobs.py index ac47ff2..dc9f837 100644 --- a/uploader/background_jobs.py +++ b/uploader/background_jobs.py @@ -1,14 +1,88 @@ +"""Generic views and utilities to handle background jobs.""" import uuid +import importlib +from typing import Callable +from functools import partial -from flask import request, Blueprint, render_template, current_app as app +from flask import ( + url_for, + redirect, + Response, + Blueprint, + render_template, + current_app as app) from gn_libs import jobs -from gn_libs.jobs.jobs import JobNotFound from gn_libs import sqlite3 +from gn_libs.jobs.jobs import JobNotFound from uploader.authorisation import require_login background_jobs_bp = Blueprint("background-jobs", __name__) +HandlerType = Callable[[dict], Response] + + +def __default_error_handler__(job: dict) -> Response: + return redirect(url_for("background-jobs.job_error", job_id=job["job_id"])) + +def register_handlers( + job_type: str, + success_handler: HandlerType, + # pylint: disable=[redefined-outer-name] + error_handler: HandlerType = __default_error_handler__ + # pylint: disable=[redefined-outer-name] +) -> str: + """Register success and error handlers for each job type.""" + if not bool(app.config.get("background-jobs")): + app.config["background-jobs"] = {} + + if not bool(app.config["background-jobs"].get(job_type)): + app.config["background-jobs"][job_type] = { + "success": success_handler, + "error": error_handler + } + + return job_type + + +def register_job_handlers(job: str): + """Related to register handlers above.""" + def __load_handler__(absolute_function_path): + _parts = absolute_function_path.split(".") + app.logger.debug("THE PARTS ARE: %s", _parts) + assert len(_parts) > 1, f"Invalid path: {absolute_function_path}" + module = importlib.import_module(f".{_parts[-2]}", + package=".".join(_parts[0:-2])) + return getattr(module, _parts[-1]) + + metadata = job["metadata"] + if metadata["success_handler"]: + _success_handler = __load_handler__(metadata["success_handler"]) + try: + _error_handler = __load_handler__(metadata["error_handler"]) + except Exception as _exc:# pylint: disable=[broad-exception-caught] + _error_handler = __default_error_handler__ + register_handlers( + metadata["job-type"], _success_handler, _error_handler) + + +def handler(job: dict, handler_type: str) -> HandlerType: + """Fetch a handler for the job.""" + _job_type = job["metadata"]["job-type"] + _handler = app.config.get( + "background-jobs", {} + ).get( + _job_type, {} + ).get(handler_type) + if bool(_handler): + return _handler(job) + raise Exception(# pylint: disable=[broad-exception-raised] + f"No '{handler_type}' handler registered for job type: {_job_type}") + + +error_handler = partial(handler, handler_type="error") +success_handler = partial(handler, handler_type="success") + @background_jobs_bp.route("/status/<uuid:job_id>") @require_login @@ -17,19 +91,29 @@ def job_status(job_id: uuid.UUID): with sqlite3.connection(app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"]) as conn: try: job = jobs.job(conn, job_id, fulldetails=True) - stdout = "" - stderr = "" - # with (open(job["metadata"]["stdout-file"], encoding="utf-8") as stdout_file, - # open(job["metadata"]["stderr-file"], encoding="utf-8") as stderr_file): - # stdout = stdout_file.read() - # stderr = stderr_file.read() + status = job["metadata"]["status"] - return render_template( - f"jobs/job-status.html", - job=job, - stdout=stdout, - stderr=stderr) - except JobNotFound as jnf: + register_job_handlers(job) + if status == "error": + return error_handler(job) + + if status == "completed": + return success_handler(job) + + return render_template("jobs/job-status.html", job=job) + except JobNotFound as _jnf: return render_template( "jobs/job-not-found.html", job_id=job_id) + + +@background_jobs_bp.route("/error/<uuid:job_id>") +@require_login +def job_error(job_id: uuid.UUID): + """Handle job errors in a generic manner.""" + with sqlite3.connection(app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"]) as conn: + try: + job = jobs.job(conn, job_id, fulldetails=True) + return render_template("jobs/job-error.html", job=job) + except JobNotFound as _jnf: + return render_template("jobs/job-not-found.html", job_id=job_id) diff --git a/uploader/files/views.py b/uploader/files/views.py index ddf5350..29059c7 100644 --- a/uploader/files/views.py +++ b/uploader/files/views.py @@ -1,4 +1,6 @@ """Module for generic files endpoints.""" +import time +import random import traceback from pathlib import Path @@ -56,10 +58,13 @@ def __merge_chunks__(targetfile: Path, chunkpaths: tuple[Path, ...]) -> Path: """Merge the chunks into a single file.""" with open(targetfile, "ab") as _target: for chunkfile in chunkpaths: + app.logger.error("Merging chunk: %s", chunkfile) with open(chunkfile, "rb") as _chunkdata: _target.write(_chunkdata.read()) - chunkfile.unlink(missing_ok=True) + chunkfile.unlink() # Don't use `missing_ok=True` — chunk MUST exist + # If chunk does't exist, it might indicate a race condition. Handle + # that instead. return targetfile @@ -92,15 +97,51 @@ def resumable_upload_post(): Path(chunks_directory(_fileid), chunk_name(_uploadfilename, _achunk)) for _achunk in range(1, _totalchunks+1)) if all(_file.exists() for _file in chunkpaths): - # merge_files and clean up chunks - __merge_chunks__(_targetfile, chunkpaths) - chunks_directory(_fileid).rmdir() + ### HACK: Break possible race condition ### + # Looks like sometimes, there are multiple threads/requests trying + # to merge one file, leading to race conditions and in some rare + # instances, actual data corruption. This hack is meant to break + # that race condition. + _delays = ( + 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, + 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, + 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293) + _lockfile = Path(chunks_directory(_fileid), "merge.lock") + while True: + time.sleep(random.choice(_delays) / 1000) + if (chunks_directory(_fileid).exists() + and not (_lockfile.exists() and _targetfile.exists())): + # merge_files and clean up chunks + _lockfile.touch() + __merge_chunks__(_targetfile, chunkpaths) + _lockfile.unlink() + chunks_directory(_fileid).rmdir() + continue + + if (_targetfile.exists() + and not ( + chunks_directory(_fileid).exists() + and _lockfile.exists())): + # merge complete + break + + # There is still a thread that's merging this file + continue + ### END: HACK: Break possible race condition ### + + if _targetfile.exists(): + return jsonify({ + "uploaded-file": _targetfile.name, + "original-name": _uploadfilename, + "message": "File was uploaded successfully!", + "statuscode": 200 + }), 200 return jsonify({ "uploaded-file": _targetfile.name, "original-name": _uploadfilename, - "message": "File was uploaded successfully!", - "statuscode": 200 - }), 200 + "message": "Uploaded file is missing!", + "statuscode": 404 + }), 404 return jsonify({ "message": f"Chunk {int(_chunk)} uploaded successfully.", "statuscode": 201 diff --git a/uploader/jobs.py b/uploader/jobs.py index e86ee05..5968c03 100644 --- a/uploader/jobs.py +++ b/uploader/jobs.py @@ -41,7 +41,8 @@ def error_filename(jobid, error_dir): "Compute the path of the file where errors will be dumped." return f"{error_dir}/job_{jobid}.error" -def initialise_job(# pylint: disable=[too-many-arguments] +def initialise_job( + # pylint: disable=[too-many-arguments, too-many-positional-arguments] rconn: Redis, rprefix: str, jobid: str, command: list, job_type: str, ttl_seconds: int = 86400, extra_meta: Optional[dict] = None) -> dict: "Initialise a job 'object' and put in on redis" @@ -54,7 +55,8 @@ def initialise_job(# pylint: disable=[too-many-arguments] name=job_key(rprefix, jobid), time=timedelta(seconds=ttl_seconds)) return the_job -def build_file_verification_job(#pylint: disable=[too-many-arguments] +def build_file_verification_job( + #pylint: disable=[too-many-arguments, too-many-positional-arguments] redis_conn: Redis, dburi: str, redisuri: str, @@ -77,7 +79,8 @@ def build_file_verification_job(#pylint: disable=[too-many-arguments] "filename": os.path.basename(filepath), "percent": 0 }) -def data_insertion_job(# pylint: disable=[too-many-arguments] +def data_insertion_job( + # pylint: disable=[too-many-arguments, too-many-positional-arguments] redis_conn: Redis, filepath: str, filetype: str, totallines: int, speciesid: int, platformid: int, datasetid: int, databaseuri: str, redisuri: str, ttl_seconds: int) -> dict: diff --git a/uploader/monadic_requests.py b/uploader/monadic_requests.py index f1f5c77..eda42d0 100644 --- a/uploader/monadic_requests.py +++ b/uploader/monadic_requests.py @@ -59,6 +59,11 @@ def get(url, params=None, **kwargs) -> Either: :rtype: pymonad.either.Either """ + timeout = kwargs.get("timeout") + kwargs = {key: val for key,val in kwargs.items() if key != "timeout"} + if timeout is None: + timeout = (9.13, 20) + try: resp = requests.get(url, params=params, **kwargs) if resp.status_code in SUCCESS_CODES: @@ -76,6 +81,11 @@ def post(url, data=None, json=None, **kwargs) -> Either: :rtype: pymonad.either.Either """ + timeout = kwargs.get("timeout") + kwargs = {key: val for key,val in kwargs.items() if key != "timeout"} + if timeout is None: + timeout = (9.13, 20) + try: resp = requests.post(url, data=data, json=json, **kwargs) if resp.status_code in SUCCESS_CODES: @@ -95,10 +105,10 @@ def make_either_error_handler(msg): try: _data = error.json() except Exception as _exc: - raise Exception(error.content) from _exc - raise Exception(_data) + raise Exception(error.content) from _exc# pylint: disable=[broad-exception-raised] + raise Exception(_data)# pylint: disable=[broad-exception-raised] app.logger.debug("\n\n%s\n\n", msg) - raise Exception(error) + raise Exception(error)# pylint: disable=[broad-exception-raised] return __fail__ diff --git a/uploader/phenotypes/models.py b/uploader/phenotypes/models.py index 9ff89ae..c2aeebf 100644 --- a/uploader/phenotypes/models.py +++ b/uploader/phenotypes/models.py @@ -1,17 +1,30 @@ """Database and utility functions for phenotypes.""" import logging -from typing import Optional +import tempfile +from pathlib import Path from functools import reduce from datetime import datetime +from typing import Optional, Iterable import MySQLdb as mdb from MySQLdb.cursors import Cursor, DictCursor +from functional_tools import take from gn_libs.mysqldb import debug_query logger = logging.getLogger(__name__) +__PHENO_DATA_TABLES__ = { + "PublishData": { + "table": "PublishData", "valueCol": "value", "DataIdCol": "Id"}, + "PublishSE": { + "table": "PublishSE", "valueCol": "error", "DataIdCol": "DataId"}, + "NStrain": { + "table": "NStrain", "valueCol": "count", "DataIdCol": "DataId"} +} + + def datasets_by_population( conn: mdb.Connection, species_id: int, @@ -35,10 +48,10 @@ def dataset_by_id(conn: mdb.Connection, """Fetch dataset details by identifier""" with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( - "SELECT s.SpeciesId, pf.* FROM Species AS s " - "INNER JOIN InbredSet AS iset ON s.Id=iset.SpeciesId " - "INNER JOIN PublishFreeze AS pf ON iset.Id=pf.InbredSetId " - "WHERE s.Id=%s AND iset.Id=%s AND pf.Id=%s", + "SELECT Species.SpeciesId, PublishFreeze.* FROM Species " + "INNER JOIN InbredSet ON Species.Id=InbredSet.SpeciesId " + "INNER JOIN PublishFreeze ON InbredSet.Id=PublishFreeze.InbredSetId " + "WHERE Species.Id=%s AND InbredSet.Id=%s AND PublishFreeze.Id=%s", (species_id, population_id, dataset_id)) return dict(cursor.fetchone()) @@ -287,3 +300,97 @@ def phenotypes_data_by_ids( debug_query(cursor, logger) return tuple( reduce(__organise_by_phenotype__, cursor.fetchall(), {}).values()) + + +def create_new_phenotypes(conn: mdb.Connection, + phenotypes: Iterable[dict]) -> tuple[dict, ...]: + """Add entirely new phenotypes to the database.""" + _phenos = tuple() + with conn.cursor(cursorclass=DictCursor) as cursor: + while True: + batch = take(phenotypes, 1000) + if len(batch) == 0: + break + + cursor.executemany( + ("INSERT INTO " + "Phenotype(Pre_publication_description, Original_description, Units, Authorized_Users) " + "VALUES (%s, %s, %s, 'robwilliams')"), + tuple((row["id"], row["description"], row["units"]) + for row in batch)) + paramstr = ", ".join(["%s"] * len(batch)) + cursor.execute( + "SELECT * FROM Phenotype WHERE Pre_publication_description IN " + f"({paramstr})", + tuple(item["id"] for item in batch)) + _phenos = _phenos + tuple({ + "phenotype_id": row["Id"], + "id": row["Pre_publication_description"], + "description": row["Original_description"], + "units": row["Units"] + } for row in cursor.fetchall()) + + return _phenos + + +def save_phenotypes_data( + conn: mdb.Connection, + table: str, + data: Iterable[dict] +) -> int: + """Save new phenotypes data into the database.""" + _table_details = __PHENO_DATA_TABLES__[table] + with conn.cursor(cursorclass=DictCursor) as cursor: + _count = 0 + while True: + batch = take(data, 100000) + if len(batch) == 0: + logger.warning("Got an empty batch. This needs investigation.") + break + + logger.debug("Saving batch of %s items.", len(batch)) + cursor.executemany( + (f"INSERT INTO {_table_details['table']}" + f"({_table_details['DataIdCol']}, StrainId, {_table_details['valueCol']}) " + "VALUES " + f"(%(data_id)s, %(sample_id)s, %(value)s) "), + tuple(batch)) + debug_query(cursor, logger) + _count = _count + len(batch) + + + logger.debug("Saved a total of %s data rows", _count) + return _count + + +def quick_save_phenotypes_data( + conn: mdb.Connection, + table: str, + dataitems: Iterable[dict], + tmpdir: Path +) -> int: + """Save data items to the database, but using """ + _table_details = __PHENO_DATA_TABLES__[table] + with (tempfile.NamedTemporaryFile( + prefix=f"{table}_data", mode="wt", dir=tmpdir) as tmpfile, + conn.cursor(cursorclass=DictCursor) as cursor): + _count = 0 + logger.debug("Write data rows to text file.") + for row in dataitems: + tmpfile.write( + f'{row["data_id"]}\t{row["sample_id"]}\t{row["value"]}\n') + _count = _count + 1 + tmpfile.flush() + + logger.debug("Load text file into database (table: %s)", + _table_details["table"]) + cursor.execute( + f"LOAD DATA LOCAL INFILE '{tmpfile.name}' " + f"INTO TABLE {_table_details['table']} " + "(" + f"{_table_details['DataIdCol']}, " + "StrainId, " + f"{_table_details['valueCol']}" + ")") + debug_query(cursor, logger) + return _count diff --git a/uploader/phenotypes/views.py b/uploader/phenotypes/views.py index 92a7c4b..bc15f2d 100644 --- a/uploader/phenotypes/views.py +++ b/uploader/phenotypes/views.py @@ -4,20 +4,30 @@ import csv import uuid import json import logging -import datetime import tempfile from typing import Any from pathlib import Path from zipfile import ZipFile from functools import wraps, reduce from logging import INFO, ERROR, DEBUG, FATAL, CRITICAL, WARNING +from urllib.parse import urljoin, urlparse, ParseResult, urlunparse, urlencode + +import datetime +from datetime import timedelta from redis import Redis from pymonad.either import Left from requests.models import Response from MySQLdb.cursors import DictCursor from werkzeug.utils import secure_filename + +from gn_libs import sqlite3 +from gn_libs import jobs as gnlibs_jobs +from gn_libs.jobs.jobs import JobNotFound from gn_libs.mysqldb import database_connection +from gn_libs import monadic_requests as mrequests + +from authlib.jose import jwt from flask import (flash, request, url_for, @@ -31,15 +41,19 @@ from flask import (flash, from r_qtl import r_qtl2_qc as rqc from r_qtl import exceptions as rqe + from uploader import jobs +from uploader import session from uploader.files import save_file#, fullpath from uploader.ui import make_template_renderer from uploader.oauth2.client import oauth2_post from uploader.authorisation import require_login +from uploader.oauth2 import jwks, client as oauth2client from uploader.route_utils import generic_select_population from uploader.datautils import safe_int, enumerate_sequence from uploader.species.models import all_species, species_by_id from uploader.monadic_requests import make_either_error_handler +from uploader.publications.models import fetch_publication_by_id from uploader.request_checks import with_species, with_population from uploader.samples.models import samples_by_species_and_population from uploader.input_validation import (encode_errors, @@ -364,6 +378,9 @@ def process_phenotypes_individual_files(error_uri): ("pheno", "phenotype-data"), ("phenose", "phenotype-se"), ("phenonum", "phenotype-n")): + cdata[f"{rqtlkey}_transposed"] = ( + (form.get(f"{formkey}-transposed") or "off") == "on") + if form.get("resumable-upload", False): # Chunked upload of large files was used filedata = json.loads(form[formkey]) @@ -386,6 +403,7 @@ def process_phenotypes_individual_files(error_uri): arcname=filepath.name) cdata[rqtlkey] = cdata.get(rqtlkey, []) + [filepath.name] + zfile.writestr("control_data.json", data=json.dumps(cdata, indent=2)) return bundlepath @@ -451,21 +469,18 @@ def add_phenotypes(species: dict, population: dict, dataset: dict, **kwargs):# p # str(dataset["Id"]), str(phenobundle), "--loglevel", - { - INFO: "INFO", - ERROR: "ERROR", - DEBUG: "DEBUG", - FATAL: "FATAL", - CRITICAL: "CRITICAL", - WARNING: "WARNING" - }[app.logger.getEffectiveLevel()], + logging.getLevelName( + app.logger.getEffectiveLevel() + ).lower(), "--redisexpiry", str(_ttl_seconds)], "phenotype_qc", _ttl_seconds, {"job-metadata": json.dumps({ "speciesid": species["SpeciesId"], "populationid": population["Id"], "datasetid": dataset["Id"], - "bundle": str(phenobundle.absolute())})}), + "bundle": str(phenobundle.absolute()), + **({"publicationid": request.form["publication-id"]} + if request.form.get("publication-id") else {})})}), _redisuri, f"{app.config['UPLOAD_FOLDER']}/job_errors") @@ -538,7 +553,8 @@ def review_job_data( **kwargs ):# pylint: disable=[unused-argument] """Review data one more time before entering it into the database.""" - with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn: + with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn, + database_connection(app.config["SQL_URI"]) as conn): try: job = jobs.job(rconn, jobs.jobsnamespace(), str(job_id)) except jobs.JobNotFound as _jnf: @@ -586,6 +602,7 @@ def review_job_data( filetype: __summarise__(filetype, meta) for filetype,meta in metadata.items() } + _job_metadata = json.loads(job["job-metadata"]) return render_template("phenotypes/review-job-data.html", species=species, population=population, @@ -593,9 +610,126 @@ def review_job_data( job_id=job_id, job=job, summary=summary, + publication=( + fetch_publication_by_id( + conn, int(_job_metadata["publicationid"])) + if _job_metadata.get("publicationid") + else None), activelink="add-phenotypes") +def load_phenotypes_success_handler(job): + """Handle loading new phenotypes into the database successfully.""" + return redirect(url_for( + "species.populations.phenotypes.load_data_success", + species_id=job["metadata"]["species_id"], + population_id=job["metadata"]["population_id"], + dataset_id=job["metadata"]["dataset_id"], + job_id=job["job_id"])) + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/load-data-to-database", + methods=["POST"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def load_data_to_database( + species: dict, + population: dict, + dataset: dict, + **kwargs +):# pylint: disable=[unused-argument] + """Load the data from the given QC job into the database.""" + jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"] + with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn, + sqlite3.connection(jobs_db) as conn): + qc_job = jobs.job(rconn, jobs.jobsnamespace(), request.form["data-qc-job-id"]) + _meta = json.loads(qc_job["job-metadata"]) + load_job_id = uuid.uuid4() + _loglevel = logging.getLevelName(app.logger.getEffectiveLevel()).lower() + command = [ + sys.executable, + "-u", + "-m", + "scripts.load_phenotypes_to_db", + app.config["SQL_URI"], + jobs_db, + str(load_job_id), + "--log-level", + _loglevel + ] + + def __handle_error__(resp): + return render_template("http-error.html", *resp.json()) + + def __handle_success__(load_job): + app.logger.debug("The phenotypes loading job: %s", load_job) + return redirect(url_for( + "background-jobs.job_status", job_id=load_job["job_id"])) + + issued = datetime.datetime.now() + jwtkey = jwks.newest_jwk_with_rotation( + jwks.jwks_directory(app, "UPLOADER_SECRETS"), + int(app.config["JWKS_ROTATION_AGE_DAYS"])) + + return mrequests.post( + urljoin(oauth2client.authserver_uri(), "auth/token"), + json={ + "grant_type": "urn:ietf:params:oauth:grant-type:jwt-bearer", + "scope": oauth2client.SCOPE, + "assertion": jwt.encode( + header={ + "alg": "RS256", + "typ": "JWT", + "kid": jwtkey.as_dict()["kid"] + }, + payload={ + "iss": str(oauth2client.oauth2_clientid()), + "sub": str(session.user_details()["user_id"]), + "aud": urljoin(oauth2client.authserver_uri(), + "auth/token"), + # TODO: Update expiry time once fix is implemented in + # auth server. + "exp": (issued + timedelta(minutes=5)).timestamp(), + "nbf": int(issued.timestamp()), + "iat": int(issued.timestamp()), + "jti": str(uuid.uuid4()) + }, + key=jwtkey).decode("utf8"), + "client_id": oauth2client.oauth2_clientid() + } + ).then( + lambda token: gnlibs_jobs.initialise_job( + conn, + load_job_id, + command, + "load-new-phenotypes-data", + extra_meta={ + "species_id": species["SpeciesId"], + "population_id": population["Id"], + "dataset_id": dataset["Id"], + "bundle_file": _meta["bundle"], + "publication_id": _meta["publicationid"], + "authserver": oauth2client.authserver_uri(), + "token": token["access_token"], + "success_handler": ( + "uploader.phenotypes.views" + ".load_phenotypes_success_handler") + }) + ).then( + lambda job: gnlibs_jobs.launch_job( + job, + jobs_db, + Path(f"{app.config['UPLOAD_FOLDER']}/job_errors"), + worker_manager="gn_libs.jobs.launcher", + loglevel=_loglevel) + ).either(__handle_error__, __handle_success__) + + def update_phenotype_metadata(conn, metadata: dict): """Update a phenotype's basic metadata values.""" with conn.cursor(cursorclass=DictCursor) as cursor: @@ -967,8 +1101,6 @@ def edit_upload_phenotype_data(# pylint: disable=[unused-argument] edit_file = save_file(request.files["file-upload-bulk-edit-upload"], Path(app.config["UPLOAD_FOLDER"])) - from gn_libs import jobs as gnlibs_jobs - from gn_libs import sqlite3 jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"] with sqlite3.connection(jobs_db) as conn: job_id = uuid.uuid4() @@ -1003,3 +1135,66 @@ def edit_upload_phenotype_data(# pylint: disable=[unused-argument] return redirect(url_for("background-jobs.job_status", job_id=job_id, job_type="phenotype-bulk-edit")) + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/load-data-success/<uuid:job_id>", + methods=["GET"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def load_data_success( + species: dict, + population: dict, + dataset: dict, + job_id: uuid.UUID, + **kwargs +):# pylint: disable=[unused-argument] + with (database_connection(app.config["SQL_URI"]) as conn, + sqlite3.connection(app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"]) + as jobsconn): + try: + gn2_uri = urlparse(app.config["GN2_SERVER_URL"]) + job = gnlibs_jobs.job(jobsconn, job_id, fulldetails=True) + app.logger.debug("THE JOB: %s", job) + _xref_ids = (str(item) for item + in json.loads(job["metadata"].get("xref_ids", "[]"))) + _publication = fetch_publication_by_id( + conn, int(job["metadata"].get("publication_id", "0"))) + _search_terms = (item for item in + (str(_publication["PubMed_ID"] or ""), + _publication["Authors"], + (_publication["Title"] or "")) + if item != "") + return render_template("phenotypes/load-phenotypes-success.html", + species=species, + population=population, + dataset=dataset, + job=job, + search_page_uri=urlunparse(ParseResult( + scheme=gn2_uri.scheme, + netloc=gn2_uri.netloc, + path="/search", + params="", + query=urlencode({ + "species": species["Name"], + "group": population["Name"], + "type": "Phenotypes", + "dataset": dataset["Name"], + "search_terms_or": ( + # Very long URLs will cause + # errors. + " ".join(_xref_ids) + if len(_xref_ids) <= 100 + else ""), + "search_terms_and": " ".join( + _search_terms).strip(), + "accession_id": "None", + "FormID": "searchResult" + }), + fragment=""))) + except JobNotFound as jnf: + return render_template("jobs/job-not-found.html", job_id=job_id) diff --git a/uploader/platforms/models.py b/uploader/platforms/models.py index a859371..0dd9368 100644 --- a/uploader/platforms/models.py +++ b/uploader/platforms/models.py @@ -56,7 +56,8 @@ def platform_by_species_and_id( return None -def save_new_platform(# pylint: disable=[too-many-arguments] +def save_new_platform( + # pylint: disable=[too-many-arguments, too-many-positional-arguments] cursor: Cursor, species_id: int, geo_platform: str, diff --git a/uploader/publications/datatables.py b/uploader/publications/datatables.py new file mode 100644 index 0000000..e07fafd --- /dev/null +++ b/uploader/publications/datatables.py @@ -0,0 +1,52 @@ +"""Fetch data for datatables.""" +import logging +from typing import Optional + +from MySQLdb.cursors import DictCursor + +from gn_libs.mysqldb import Connection, debug_query + +logger = logging.getLogger(__name__) + +def fetch_publications( + conn: Connection, + search: Optional[str] = None, + offset: int = 0, + limit: int = -1 +) -> tuple[dict, int, int, int]: + """Fetch publications from the database.""" + _query = "SELECT * FROM Publication" + _count_query = "SELECT COUNT(*) FROM Publication" + _params = None + _where_clause = "" + _limit_clause = "" + if search is not None and bool(search): + _where_clause = ("WHERE PubMed_ID LIKE %s " + "OR Authors LIKE %s " + "OR Title LIKE %s") + _params = (f"%{search}%",) * 3 + + if limit > 0: + _limit_clause = f"LIMIT {limit} OFFSET {offset}" + + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute("SELECT COUNT(*) FROM Publication") + _total_rows = int(cursor.fetchone()["COUNT(*)"]) + + cursor.execute(f"{_count_query} {_where_clause}", _params) + debug_query(cursor, logger) + _result = cursor.fetchone() + _total_filtered = int(_result["COUNT(*)"] if bool(_result) else 0) + + cursor.execute(f"{_query} {_where_clause} {_limit_clause}", _params) + debug_query(cursor, logger) + _current_filtered = tuple( + {**dict(row), "index": idx} + for idx, row + in enumerate(cursor.fetchall(), start=offset+1)) + + return ( + _current_filtered, + len(_current_filtered), + _total_filtered, + _total_rows) diff --git a/uploader/publications/models.py b/uploader/publications/models.py index 7d2862d..b199991 100644 --- a/uploader/publications/models.py +++ b/uploader/publications/models.py @@ -1,6 +1,6 @@ """Module to handle persistence and retrieval of publication to/from MariaDB""" import logging -from typing import Iterable +from typing import Iterable, Optional from MySQLdb.cursors import DictCursor @@ -42,15 +42,10 @@ def create_new_publications( "%(pubmed_id)s, %(abstract)s, %(authors)s, %(title)s, " "%(journal)s, %(volume)s, %(pages)s, %(month)s, %(year)s" ") " - "ON DUPLICATE KEY UPDATE " - "Abstract=VALUES(Abstract), Authors=VALUES(Authors), " - "Title=VALUES(Title), Journal=VALUES(Journal), " - "Volume=VALUES(Volume), Pages=VALUES(pages), " - "Month=VALUES(Month), Year=VALUES(Year) " "RETURNING *"), publications) return tuple({ - **row, "PublicationId": row["Id"] + **row, "publication_id": row["Id"] } for row in cursor.fetchall()) return tuple() @@ -74,20 +69,13 @@ def update_publications(conn: Connection , publications: tuple[dict, ...]) -> tu return tuple() -def fetch_publications(conn: Connection) -> Iterable[dict]: - """Fetch publications from the database.""" - with conn.cursor(cursorclass=DictCursor) as cursor: - cursor.execute("SELECT * FROM Publication") - for row in cursor.fetchall(): - yield dict(row) - - def fetch_publication_by_id(conn: Connection, publication_id: int) -> dict: """Fetch a specific publication from the database.""" with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute("SELECT * FROM Publication WHERE Id=%s", (publication_id,)) - return dict(cursor.fetchone()) + _res = cursor.fetchone() + return dict(_res) if _res else {} def fetch_publication_phenotypes( diff --git a/uploader/publications/views.py b/uploader/publications/views.py index 85d3aef..0608a35 100644 --- a/uploader/publications/views.py +++ b/uploader/publications/views.py @@ -1,12 +1,25 @@ """Endpoints for publications""" import json +from MySQLdb.cursors import DictCursor from gn_libs.mysqldb import database_connection -from flask import Blueprint, render_template, current_app as app +from flask import ( + flash, + request, + url_for, + redirect, + Blueprint, + render_template, + current_app as app) from uploader.authorisation import require_login -from .models import fetch_publications +from .models import ( + fetch_publication_by_id, + create_new_publications, + fetch_publication_phenotypes) + +from .datatables import fetch_publications from gn_libs.debug import __pk__ @@ -24,11 +37,71 @@ def index(): @pubbp.route("/list", methods=["GET"]) @require_login def list_publications(): - with database_connection(app.config["SQL_URI"]) as conn: + # request breakdown: + # https://datatables.net/manual/server-side + _page = int(request.args.get("draw")) + _length = int(request.args.get("length") or '-1') + _start = int(request.args.get("start") or '0') + _search = request.args["search[value]"] + with (database_connection(app.config["SQL_URI"]) as conn, + conn.cursor(cursorclass=DictCursor) as cursor): + _publications, _current_rows, _totalfiltered, _totalrows = fetch_publications( + conn, + _search, + offset=_start, + limit=_length) + return json.dumps({ - "publications": tuple({ - **row, "index": idx - } for idx,row in enumerate( - fetch_publications(conn), start=1)), + "draw": _page, + "recordsTotal": _totalrows, + "recordsFiltered": _totalfiltered, + "publications": _publications, "status": "success" }) + + +@pubbp.route("/view/<int:publication_id>", methods=["GET"]) +@require_login +def view_publication(publication_id: int): + """View more details on a particular publication.""" + with database_connection(app.config["SQL_URI"]) as conn: + return render_template( + "publications/view-publication.html", + publication=fetch_publication_by_id(conn, publication_id), + linked_phenotypes=tuple(fetch_publication_phenotypes( + conn, publication_id))) + + +@pubbp.route("/create", methods=["GET", "POST"]) +@require_login +def create_publication(): + """Create a new publication.""" + if(request.method == "GET"): + return render_template("publications/create-publication.html") + form = request.form + authors = form.get("publication-authors").encode("utf8") + if authors is None or authors == "": + flash("The publication's author(s) MUST be provided!", "alert alert-danger") + return redirect(url_for("publications.create", **request.args)) + + with database_connection(app.config["SQL_URI"]) as conn: + publications = create_new_publications(conn, ({ + "pubmed_id": form.get("pubmed-id") or None, + "abstract": form.get("publication-abstract").encode("utf8") or None, + "authors": authors, + "title": form.get("publication-title").encode("utf8") or None, + "journal": form.get("publication-journal").encode("utf8") or None, + "volume": form.get("publication-volume").encode("utf8") or None, + "pages": form.get("publication-pages").encode("utf8") or None, + "month": (form.get("publication-month") or "").encode("utf8").capitalize() or None, + "year": form.get("publication-year").encode("utf8") or None + },)) + flash("New publication created!", "alert alert-success") + return redirect(url_for( + request.args.get("return_to") or "publications.view_publication", + publication_id=publications[0]["publication_id"], + **request.args)) + + flash("Publication creation failed!", "alert alert-danger") + app.logger.debug("Failed to create the new publication.", exc_info=True) + return redirect(url_for("publications.create_publication")) diff --git a/uploader/route_utils.py b/uploader/route_utils.py index 18eadda..ce718fb 100644 --- a/uploader/route_utils.py +++ b/uploader/route_utils.py @@ -6,7 +6,8 @@ from gn_libs.mysqldb import database_connection from uploader.population.models import (populations_by_species, population_by_species_and_id) -def generic_select_population(# pylint: disable=[too-many-arguments] +def generic_select_population( + # pylint: disable=[too-many-arguments, too-many-positional-arguments] species: dict, template: str, population_id: str, diff --git a/uploader/samples/models.py b/uploader/samples/models.py index d7d5384..b419d61 100644 --- a/uploader/samples/models.py +++ b/uploader/samples/models.py @@ -15,11 +15,11 @@ def samples_by_species_and_population( """Fetch the samples by their species and population.""" with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( - "SELECT iset.InbredSetId, s.* FROM InbredSet AS iset " - "INNER JOIN StrainXRef AS sxr ON iset.InbredSetId=sxr.InbredSetId " - "INNER JOIN Strain AS s ON sxr.StrainId=s.Id " - "WHERE s.SpeciesId=%(species_id)s " - "AND iset.InbredSetId=%(population_id)s", + "SELECT InbredSet.InbredSetId, Strain.* FROM InbredSet " + "INNER JOIN StrainXRef ON InbredSet.InbredSetId=StrainXRef.InbredSetId " + "INNER JOIN Strain ON StrainXRef.StrainId=Strain.Id " + "WHERE Strain.SpeciesId=%(species_id)s " + "AND InbredSet.InbredSetId=%(population_id)s", {"species_id": species_id, "population_id": population_id}) return tuple(cursor.fetchall()) diff --git a/uploader/static/css/styles.css b/uploader/static/css/styles.css index 80c5a56..df50dec 100644 --- a/uploader/static/css/styles.css +++ b/uploader/static/css/styles.css @@ -5,7 +5,7 @@ body { margin: 0.7em; display: grid; - grid-template-columns: 1fr 9fr; + grid-template-columns: 2fr 8fr; grid-gap: 20px; font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; @@ -100,15 +100,32 @@ body { padding-left: 0.5em; } -#main #all-content { - /* Place it in the parent element */ - grid-column-start: 1; - grid-column-end: 3; +@media screen and (max-width: 20in) { + #main #all-content { + /* Place it in the parent element */ + grid-column-start: 1; + grid-column-end: 3; - /* Define layout for the children elements */ - display: grid; - grid-template-columns: 7fr 3fr; /* For a maximum screen width of 1366 pixels */ - grid-gap: 1.5em; + /* Define layout for the children elements */ + max-width: 80%; + } + + #sidebar-content { + display: none; + } +} + +@media screen and (min-width: 20.1in) { + #main #all-content { + /* Place it in the parent element */ + grid-column-start: 1; + grid-column-end: 3; + + /* Define layout for the children elements */ + display: grid; + grid-template-columns: 7fr 3fr; + grid-gap: 1.5em; + } } #main #all-content .row { @@ -162,3 +179,9 @@ table.dataTable thead th, table.dataTable tfoot th{ table.dataTable tbody tr.selected td { background-color: #ffee99 !important; } + +.form-group { + margin-bottom: 2em; + padding-bottom: 0.2em; + border-bottom: solid gray 1px; +} diff --git a/uploader/static/js/debug.js b/uploader/static/js/debug.js new file mode 100644 index 0000000..eb01209 --- /dev/null +++ b/uploader/static/js/debug.js @@ -0,0 +1,40 @@ +/** + * The entire purpose of this function is for use to debug values inline + * without changing the flow of the code too much. + * + * This **MUST** be a non-arrow function to allow access to the `arguments` + * object. + * + * This function expects at least one argument. + * + * If more than one argument is provided, then: + * a) the last argument is considered the value, and will be returned + * b) all other arguments will be converted to string and output + * + * If only one argument is provided, it is considered the value, and will be + * returned. + * + * Zero arguments is an error condition. + **/ +function __pk__(val) { + /* Handle zero arguments */ + if (arguments.length < 1) { + throw new Error("Invalid arguments: Expected at least one argument."); + } + + msg = "/********** DEBUG **********/"; + if (arguments.length > 1) { + msg = Array.from( + arguments + ).slice( + 0, + arguments.length - 1 + ).map((val) => { + return String(val); + }).join("; ") + } + + value = arguments[arguments.length - 1]; + console.debug("/********** " + msg + " **********/", value); + return value; +} diff --git a/uploader/static/js/misc.js b/uploader/static/js/misc.js deleted file mode 100644 index cf7b39e..0000000 --- a/uploader/static/js/misc.js +++ /dev/null @@ -1,6 +0,0 @@ -"Miscellaneous functions and event-handlers" - -$(".not-implemented").click((event) => { - event.preventDefault(); - alert("This feature is not implemented yet. Please bear with us."); -}); diff --git a/uploader/static/js/pubmed.js b/uploader/static/js/pubmed.js new file mode 100644 index 0000000..9afd4c3 --- /dev/null +++ b/uploader/static/js/pubmed.js @@ -0,0 +1,113 @@ +var extract_details = (pubmed_id, details) => { + var months = { + "jan": "January", + "feb": "February", + "mar": "March", + "apr": "April", + "may": "May", + "jun": "June", + "jul": "July", + "aug": "August", + "sep": "September", + "oct": "October", + "nov": "November", + "dec": "December" + }; + var _date = details[pubmed_id].pubdate.split(" "); + return { + "authors": details[pubmed_id].authors.map((authobj) => { + return authobj.name; + }), + "title": details[pubmed_id].title, + "journal": details[pubmed_id].fulljournalname, + "volume": details[pubmed_id].volume, + "pages": details[pubmed_id].pages, + "month": _date.length > 1 ? months[_date[1].toLowerCase()] : "jan", + "year": _date[0], + }; +}; + +var update_publication_details = (details) => { + Object.entries(details).forEach((entry) => {; + switch(entry[0]) { + case "authors": + $("#txt-publication-authors").val(entry[1].join(", ")); + break; + case "month": + $("#select-publication-month") + .children("option") + .each((index, child) => { + console.debug(entry[1].toLowerCase()); + child.selected = child.value == entry[1].toLowerCase(); + }); + default: + $("#txt-publication-" + entry[0]).val(entry[1]); + break; + } + }); +}; + +var fetch_publication_abstract = (pubmed_id, pub_details) => { + $.ajax("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi", + { + "method": "GET", + "data": { + "db": "pubmed", + "id": pubmed_id, + "rettype": "abstract", + "retmode": "xml" + }, + "success": (data, textStatus, jqXHR) => { + update_publication_details({ + ...pub_details, + ...{ + "abstract": Array.from(data + .getElementsByTagName( + "Abstract")[0] + .children) + .map((elt) => {return elt.textContent.trim();}) + .join("\r\n") + }}); + }, + "error": (jqXHR, textStatus, errorThrown) => {}, + "complete": (jqXHR, textStatus) => {}, + "dataType": "xml" + }); +}; + +var fetch_publication_details = (pubmed_id, complete_thunks) => { + error_display = $("#search-pubmed-id-error"); + error_display.text(""); + add_class(error_display, "visually-hidden"); + $.ajax("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi", + { + "method": "GET", + "data": {"db": "pubmed", "id": pubmed_id, "format": "json"}, + "success": (data, textStatus, jqXHR) => { + // process and update publication details + hasError = ( + Object.hasOwn(data, "error") || + Object.hasOwn(data.result[pubmed_id], "error")); + if(hasError) { + error_display.text( + "There was an error fetching a publication with " + + "the given PubMed ID! The error received " + + "was: '" + ( + data.error || + data.result[pubmed_id].error) + + "'. Please check ID you provided and try " + + "again."); + remove_class(error_display, "visually-hidden"); + } else { + fetch_publication_abstract( + pubmed_id, + extract_details(pubmed_id, data.result)); + } + }, + "error": (jqXHR, textStatus, errorThrown) => {}, + "complete": () => { + complete_thunks.forEach((thunk) => {thunk()}); + }, + "dataType": "json" + }); +}; diff --git a/uploader/static/js/utils.js b/uploader/static/js/utils.js index 045dd47..1b31661 100644 --- a/uploader/static/js/utils.js +++ b/uploader/static/js/utils.js @@ -8,3 +8,30 @@ function trigger_change_event(element) { evt = new Event("change"); element.dispatchEvent(evt); } + + +var remove_class = (element, classvalue) => { + new_classes = (element.attr("class") || "").split(" ").map((val) => { + return val.trim(); + }).filter((val) => { + return ((val !== classvalue) && + (val !== "")) + }).join(" "); + + if(new_classes === "") { + element.removeAttr("class"); + } else { + element.attr("class", new_classes); + } +}; + + +var add_class = (element, classvalue) => { + remove_class(element, classvalue); + element.attr("class", (element.attr("class") || "") + " " + classvalue); +}; + +$(".not-implemented").click((event) => { + event.preventDefault(); + alert("This feature is not implemented yet. Please bear with us."); +}); diff --git a/uploader/templates/base.html b/uploader/templates/base.html index de9c226..3c0d0d4 100644 --- a/uploader/templates/base.html +++ b/uploader/templates/base.html @@ -32,7 +32,7 @@ <a href="{{url_for('oauth2.logout')}}" title="Log out of the system"> <span class="glyphicon glyphicon-user"></span> - Sign Out</a> + {{user_email()}} Sign Out</a> {%else%} <a href="{{authserver_authorise_uri()}}" title="Log in to the system">Sign In</a> @@ -154,7 +154,7 @@ <!-- local dependencies --> - <script type="text/javascript" src="/static/js/misc.js"></script> + <script type="text/javascript" src="/static/js/utils.js"></script> <script type="text/javascript" src="/static/js/datatables.js"></script> {%block javascript%}{%endblock%} </body> diff --git a/uploader/templates/jobs/job-error.html b/uploader/templates/jobs/job-error.html new file mode 100644 index 0000000..b3015fc --- /dev/null +++ b/uploader/templates/jobs/job-error.html @@ -0,0 +1,17 @@ +{%extends "base.html"%} + +{%from "flash_messages.html" import flash_all_messages%} + +{%block title%}Background Jobs: Error{%endblock%} + +{%block pagetitle%}Background Jobs: Error{%endblock%} + +{%block contents%} + +<h1>Background Jobs: Error</h1> +<p>Job <strong>{{job["job_id"]}}</strong> failed!</p> +<p>The error details are in the "STDERR" section below.</p> + +<h2>STDERR</h2> +<pre>{{job["stderr"]}}</pre> +{%endblock%} diff --git a/uploader/templates/jobs/job-status.html b/uploader/templates/jobs/job-status.html index 2750fcd..83c02fd 100644 --- a/uploader/templates/jobs/job-status.html +++ b/uploader/templates/jobs/job-status.html @@ -13,7 +13,7 @@ {%block contents%} <p>Status: {{job["metadata"]["status"]}}</p> -<p>Status: {{job_type}}</p> +<p>Job Type: {{job["metadata"]["job-type"]}}</p> <h2>STDOUT</h2> <pre>{{job["stdout"]}}</pre> @@ -21,6 +21,4 @@ <h2>STDERR</h2> <pre>{{job["stderr"]}}</pre> -<hr /> -<p>The Job: {{job["metadata"]}}</p> {%endblock%} diff --git a/uploader/templates/phenotypes/add-phenotypes-base.html b/uploader/templates/phenotypes/add-phenotypes-base.html index a2d9484..9909c20 100644 --- a/uploader/templates/phenotypes/add-phenotypes-base.html +++ b/uploader/templates/phenotypes/add-phenotypes-base.html @@ -42,110 +42,30 @@ {%block frm_add_phenotypes_elements%}{%endblock%} - <div class="checkbox"> - <label> - <input id="chk-published" type="checkbox" name="published?" /> - These phenotypes are published</label> - </div> - - <fieldset id="fldset-publication-info" class="visually-hidden"> + <fieldset id="fldset-publication-info"> <legend>Publication Information</legend> - <div class="form-group"> - <label for="txt-pubmed-id" class="form-label">Pubmed ID</label> - <div class="input-group"> - <input id="txt-pubmed-id" name="pubmed-id" type="text" - class="form-control" /> - <span class="input-group-btn"> - <button id="btn-search-pubmed-id" class="btn btn-info">Search</button> - </span> - </div> - <span id="search-pubmed-id-error" - class="form-text text-muted text-danger visually-hidden"> - </span><br /> - <span class="form-text text-muted"> - Enter your publication's PubMed ID above and click "Search" to search - for some (or all) of the publication details requested below. - </span> - </div> - - <div class="form-group"> - <label for="txt-publication-authors" class="form-label">Authors</label> - <input id="txt-publication-authors" name="publication-authors" - type="text" class="form-control" /> - <span class="form-text text-muted"> - Enter the authors in the following format …</span> - </div> - - <div class="form-group"> - <label for="txt-publication-title" class="form-label"> - Publication Title</label> - <input id="txt-publication-title" name="publication-title" type="text" - class="form-control" /> - <span class="form-text text-muted"> - Enter your publication's title.</span> - </div> - - <div class="form-group"> - <label for="txt-publication-abstract" class="form-label"> - Publication Abstract</label> - <textarea id="txt-publication-abstract" name="publication-abstract" - class="form-control" rows="10"></textarea> - <span class="form-text text-muted"> - Enter the abstract for your publication.</span> - </div> - - <div class="form-group"> - <label for="txt-publication-journal" class="form-label">Journal</label> - <input id="txt-publication-journal" name="journal" type="text" - class="form-control" /> - <span class="form-text text-muted"> - Enter the name of the journal where your work was published.</span> - </div> - - <div class="form-group"> - <label for="txt-publication-volume" class="form-label">Volume</label> - <input id="txt-publication-volume" name="publication-volume" type="text" - class="form-control" /> - <span class="form-text text-muted"> - Enter the volume in the following format …</span> - </div> - - <div class="form-group"> - <label for="txt-publication-pages" class="form-label">Pages</label> - <input id="txt-publication-pages" name="publication-pages" type="text" - class="form-control" /> - <span class="form-text text-muted"> - Enter the journal volume where your work was published.</span> - </div> - - <div class="form-group"> - <label for="select-publication-month" class="form-label"> - Publication Month</label> - <select id="select-publication-month" name="publication-month" - class="form-control"> - {%for month in monthnames%} - <option value="{{month | lower}}" - {%if current_month | lower == month | lower%} - selected="selected" - {%endif%}>{{month | capitalize}}</option> - {%endfor%} - </select> - <span class="form-text text-muted"> - Select the month when the work was published. - <span class="text-danger"> - This cannot be before, say 1600 and cannot be in the future!</span></span> - </div> - - <div class="form-group"> - <label for="txt-publication-year" class="form-label">Publication Year</label> - <input id="txt-publication-year" name="publication-year" type="text" - class="form-control" value="{{current_year}}" /> - <span class="form-text text-muted"> - Enter the year your work was published. - <span class="text-danger"> - This cannot be before, say 1600 and cannot be in the future!</span> - </span> - </div> + <input type="hidden" name="publication-id" id="txt-publication-id" /> + <span class="form-text text-muted"> + Select a publication for your data. <br /> + Can't find a publication you can use? Go ahead and + <a href="{{url_for( + 'publications.create_publication', + return_to='species.populations.phenotypes.add_phenotypes', + species_id=species.SpeciesId, + population_id=population.Id, + dataset_id=dataset.Id)}}">create a new publication</a>.</span> + <table id="tbl-select-publication" class="table compact stripe"> + <thead> + <tr> + <th>#</th> + <th>PubMed ID</th> + <th>Title</th> + <th>Authors</th> + </tr> + </thead> + + <tbody></tbody> + </table> </fieldset> <div class="form-group"> @@ -165,165 +85,80 @@ {%block javascript%} <script type="text/javascript"> - var remove_class = (element, classvalue) => { - new_classes = (element.attr("class") || "").split(" ").map((val) => { - return val.trim(); - }).filter((val) => { - return ((val !== classvalue) && - (val !== "")) - }).join(" "); - - if(new_classes === "") { - element.removeAttr("class"); - } else { - element.attr("class", new_classes); - } - }; - - var add_class = (element, classvalue) => { - remove_class(element, classvalue); - element.attr("class", (element.attr("class") || "") + " " + classvalue); - }; - - $("#chk-published").on("click", (event) => { - pub_details = $("#fldset-publication-info") - if(event.target.checked) { - // display the publication details - remove_class(pub_details, "visually-hidden"); - } else { - // hide the publication details - add_class(pub_details, "visually-hidden"); - } - }); - - var extract_details = (pubmed_id, details) => { - var months = { - "jan": "January", - "feb": "February", - "mar": "March", - "apr": "April", - "may": "May", - "jun": "June", - "jul": "July", - "aug": "August", - "sep": "September", - "oct": "October", - "nov": "November", - "dec": "December" - }; - var _date = details[pubmed_id].pubdate.split(" "); - return { - "authors": details[pubmed_id].authors.map((authobj) => { - return authobj.name; - }), - "title": details[pubmed_id].title, - "journal": details[pubmed_id].fulljournalname, - "volume": details[pubmed_id].volume, - "pages": details[pubmed_id].pages, - "month": _date.length > 1 ? months[_date[1].toLowerCase()] : "jan", - "year": _date[0], - }; - }; - - var update_publication_details = (details) => { - Object.entries(details).forEach((entry) => {; - switch(entry[0]) { - case "authors": - $("#txt-publication-authors").val(entry[1].join(", ")); - break; - case "month": - $("#select-publication-month") - .children("option") - .each((index, child) => { - child.selected = child.value == entry[1].toLowerCase(); - }); - default: - $("#txt-publication-" + entry[0]).val(entry[1]); - break; - } + $(function() { + var publicationsDataTable = buildDataTable( + "#tbl-select-publication", + [], + [ + {data: "index"}, + { + searchable: true, + data: (pub) => { + if(pub.PubMed_ID) { + return `<a href="https://pubmed.ncbi.nlm.nih.gov/` + + `${pub.PubMed_ID}/" target="_blank" ` + + `title="Link to publication on NCBI.">` + + `${pub.PubMed_ID}</a>`; + } + return ""; + } + }, + { + searchable: true, + data: (pub) => { + var title = "⸻"; + if(pub.Title) { + title = pub.Title + } + return `<a href="/publications/view/${pub.Id}" ` + + `target="_blank" ` + + `title="Link to view publication details">` + + `${title}</a>`; + } + }, + { + searchable: true, + data: (pub) => { + authors = pub.Authors.split(",").map( + (item) => {return item.trim();}); + if(authors.length > 1) { + return authors[0] + ", et. al."; + } + return authors[0]; + } + } + ], + { + serverSide: true, + ajax: { + url: "/publications/list", + dataSrc: "publications" + }, + select: "single", + paging: true, + scrollY: 700, + deferRender: true, + scroller: true, + scrollCollapse: true, + layout: { + topStart: "info", + topEnd: "search" + } + }); + publicationsDataTable.on("select", (event, datatable, type, indexes) => { + indexes.forEach((element, index, thearray) => { + let row = datatable.row(element).node(); + console.debug(datatable.row(element).data()); + $("#frm-add-phenotypes #txt-publication-id").val( + datatable.row(element).data().Id); + }); + }); + publicationsDataTable.on("deselect", (event, datatable, type, indexes) => { + indexes.forEach((element, index, thearray) => { + let row = datatable.row(element).node(); + $("#frm-add-phenotypes #txt-publication-id").val(null); + }); }); - }; - - var fetch_publication_abstract = (pubmed_id, pub_details) => { - $.ajax("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi", - { - "method": "GET", - "data": { - "db": "pubmed", - "id": pubmed_id, - "rettype": "abstract", - "retmode": "xml" - }, - "success": (data, textStatus, jqXHR) => { - update_publication_details({ - ...pub_details, - ...{ - "abstract": Array.from(data - .getElementsByTagName( - "Abstract")[0] - .children) - .map((elt) => {return elt.textContent.trim();}) - .join("\r\n") - }}); - }, - "error": (jqXHR, textStatus, errorThrown) => {}, - "complete": (jqXHR, textStatus) => {}, - "dataType": "xml" - }); - }; - - var fetch_publication_details = (pubmed_id, complete_thunks) => { - error_display = $("#search-pubmed-id-error"); - error_display.text(""); - add_class(error_display, "visually-hidden"); - $.ajax("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi", - { - "method": "GET", - "data": {"db": "pubmed", "id": pubmed_id, "format": "json"}, - "success": (data, textStatus, jqXHR) => { - // process and update publication details - hasError = ( - Object.hasOwn(data, "error") || - Object.hasOwn(data.result[pubmed_id], "error")); - if(hasError) { - error_display.text( - "There was an error fetching a publication with " + - "the given PubMed ID! The error received " + - "was: '" + ( - data.error || - data.result[pubmed_id].error) + - "'. Please check ID you provided and try " + - "again."); - remove_class(error_display, "visually-hidden"); - } else { - fetch_publication_abstract( - pubmed_id, - extract_details(pubmed_id, data.result)); - } - }, - "error": (jqXHR, textStatus, errorThrown) => {}, - "complete": () => { - complete_thunks.forEach((thunk) => {thunk()}); - }, - "dataType": "json" - }); - }; - - $("#btn-search-pubmed-id").on("click", (event) => { - event.preventDefault(); - var search_button = event.target; - var pubmed_id = $("#txt-pubmed-id").val().trim(); - remove_class($("#txt-pubmed-id").parent(), "has-error"); - if(pubmed_id == "") { - add_class($("#txt-pubmed-id").parent(), "has-error"); - return false; - } - - search_button.disabled = true; - // Fetch publication details - fetch_publication_details(pubmed_id, - [() => {search_button.disabled = false;}]); - return false; }); </script> diff --git a/uploader/templates/phenotypes/add-phenotypes-raw-files.html b/uploader/templates/phenotypes/add-phenotypes-raw-files.html index 57ab776..67b56e3 100644 --- a/uploader/templates/phenotypes/add-phenotypes-raw-files.html +++ b/uploader/templates/phenotypes/add-phenotypes-raw-files.html @@ -105,111 +105,213 @@ </div> </fieldset> -<fieldset id="fldset-data-files"> +<fieldset id="fldset-files"> <legend>Data File(s)</legend> - <div class="form-group non-resumable-elements"> - <label for="finput-phenotype-descriptions" class="form-label"> - Phenotype Descriptions</label> - <input id="finput-phenotype-descriptions" - name="phenotype-descriptions" - class="form-control" - type="file" - data-preview-table="tbl-preview-pheno-desc" - required="required" /> - <span class="form-text text-muted"> - Provide a file that contains only the phenotype descriptions, - <a href="#docs-file-phenotype-description" - title="Documentation of the phenotype data file format."> - the documentation for the expected format of the file</a>.</span> - </div> - - {{display_resumable_elements( - "resumable-phenotype-descriptions", - "phenotype descriptions", - '<p>You can drop a CSV file that contains the phenotype descriptions here, - or you can click the "Browse" button (below and to the right) to select it - from your computer.</p> - <p>The CSV file must conform to some standards, as documented in the - <a href="#docs-file-phenotype-description" - title="Documentation of the phenotype data file format."> - "Phenotypes Descriptions" documentation</a> section below.</p>')}} - - - <div class="form-group non-resumable-elements"> - <label for="finput-phenotype-data" class="form-label">Phenotype Data</label> - <input id="finput-phenotype-data" - name="phenotype-data" - class="form-control" - type="file" - data-preview-table="tbl-preview-pheno-data" - required="required" /> - <span class="form-text text-muted"> - Provide a file that contains only the phenotype data. See - <a href="#docs-file-phenotype-data" - title="Documentation of the phenotype data file format."> - the documentation for the expected format of the file</a>.</span> - </div> - - {{display_resumable_elements( - "resumable-phenotype-data", - "phenotype data", - '<p>You can drop a CSV file that contains the phenotype data here, - or you can click the "Browse" button (below and to the right) to select it - from your computer.</p> - <p>The CSV file must conform to some standards, as documented in the - <a href="#docs-file-phenotype-data" - title="Documentation of the phenotype data file format."> - "Phenotypes Data" documentation</a> section below.</p>')}} - - {%if population.Family in families_with_se_and_n%} - <div class="form-group non-resumable-elements"> - <label for="finput-phenotype-se" class="form-label">Phenotype: Standard Errors</label> - <input id="finput-phenotype-se" - name="phenotype-se" - class="form-control" - type="file" - data-preview-table="tbl-preview-pheno-se" - required="required" /> - <span class="form-text text-muted"> - Provide a file that contains only the standard errors for the phenotypes, - computed from the data above.</span> - </div> - {{display_resumable_elements( - "resumable-phenotype-se", - "standard errors", - '<p>You can drop a CSV file that contains the computed standard-errors data - here, or you can click the "Browse" button (below and to the right) to - select it from your computer.</p> - <p>The CSV file must conform to some standards, as documented in the - <a href="#docs-file-phenotype-se" - title="Documentation of the phenotype data file format."> - "Phenotypes Data" documentation</a> section below.</p>')}} + <fieldset id="fldset-descriptions-file"> + <div class="form-group"> + <div class="form-check"> + <input id="chk-phenotype-descriptions-transposed" + name="phenotype-descriptions-transposed" + type="checkbox" + class="form-check-input" + style="border: solid #8EABF0" /> + <label for="chk-phenotype-descriptions-transposed" + class="form-check-label"> + Description file transposed?</label> + </div> + + <div class="non-resumable-elements"> + <label for="finput-phenotype-descriptions" class="form-label"> + Phenotype Descriptions</label> + <input id="finput-phenotype-descriptions" + name="phenotype-descriptions" + class="form-control" + type="file" + data-preview-table="tbl-preview-pheno-desc" + required="required" /> + <span class="form-text text-muted"> + Provide a file that contains only the phenotype descriptions, + <a href="#docs-file-phenotype-description" + title="Documentation of the phenotype data file format."> + the documentation for the expected format of the file</a>.</span> + </div> + {{display_resumable_elements( + "resumable-phenotype-descriptions", + "phenotype descriptions", + '<p>Drag and drop the CSV file that contains the descriptions of your + phenotypes here.</p> + + <p>The CSV file should be a matrix of + <strong>phenotypes × descriptions</strong> i.e. The first column + contains the phenotype names/identifiers whereas the first row is a list + of metadata fields like, "description", "units", etc.</p> + + <p>If the format is transposed (i.e. + <strong>descriptions × phenotypes</strong>) select the checkbox above. + </p> + + <p>Please see the + <a href="#docs-file-phenotype-description" + title="Documentation of the phenotype data file format."> + "Phenotypes Descriptions" documentation</a> section below for more + information on the expected format of the file provided here.</p>')}} + {{display_preview_table( + "tbl-preview-pheno-desc", "phenotype descriptions")}} + </div> + </fieldset> + + + <fieldset id="fldset-data-file"> + <div class="form-group"> + <div class="form-check"> + <input id="chk-phenotype-data-transposed" + name="phenotype-data-transposed" + type="checkbox" + class="form-check-input" + style="border: solid #8EABF0" /> + <label for="chk-phenotype-data-transposed" class="form-check-label"> + Data file transposed?</label> + </div> + + <div class="non-resumable-elements"> + <label for="finput-phenotype-data" class="form-label">Phenotype Data</label> + <input id="finput-phenotype-data" + name="phenotype-data" + class="form-control" + type="file" + data-preview-table="tbl-preview-pheno-data" + required="required" /> + <span class="form-text text-muted"> + Provide a file that contains only the phenotype data. See + <a href="#docs-file-phenotype-data" + title="Documentation of the phenotype data file format."> + the documentation for the expected format of the file</a>.</span> + </div> + + {{display_resumable_elements( + "resumable-phenotype-data", + "phenotype data", + '<p>Drag and drop a CSV file that contains the phenotypes numerical data + here. You can click the "Browse" button (below and to the right) to + select the file from your computer.</p> + + <p>The CSV should be a matrix of <strong>samples × phenotypes</strong>, + i.e. The first column contains the samples identifiers while the first + row is the list of phenotypes identifiers occurring in the phenotypes + descriptions file.</p> + + <p>If the format is transposed (i.e <strong>phenotypes × samples</strong>) + select the checkbox above.</p> + <p>Please see the + <a href="#docs-file-phenotype-data" + title="Documentation of the phenotype data file format."> + "Phenotypes Data" documentation</a> section below for more information + on the expected format for the file provided here.</p>')}} + {{display_preview_table("tbl-preview-pheno-data", "phenotype data")}} + </div> + </fieldset> - <div class="form-group non-resumable-elements"> - <label for="finput-phenotype-n" class="form-label">Phenotype: Number of Samples/Individuals</label> - <input id="finput-phenotype-n" - name="phenotype-n" - class="form-control" - type="file" - data-preview-table="tbl-preview-pheno-n" - required="required" /> - <span class="form-text text-muted"> - Provide a file that contains only the number of samples/individuals used in - the computation of the standard errors above.</span> - </div> - {{display_resumable_elements( - "resumable-phenotype-n", - "number of samples/individuals", - '<p>You can drop a CSV file that contains the number of samples/individuals - used in computation of the standard-errors here, or you can click the - "Browse" button (below and to the right) to select it from your computer. - </p> - <p>The CSV file must conform to some standards, as documented in the - <a href="#docs-file-phenotype-n" - title="Documentation of the phenotype data file format."> - "Phenotypes Data" documentation</a> section below.</p>')}} + {%if population.Family in families_with_se_and_n%} + <fieldset id="fldset-se-file"> + <div class="form-group"> + <div class="form-check"> + <input id="chk-phenotype-se-transposed" + name="phenotype-se-transposed" + type="checkbox" + class="form-check-input" + style="border: solid #8EABF0" /> + <label for="chk-phenotype-se-transposed" class="form-check-label"> + Standard-Errors file transposed?</label> + </div> + <div class="group non-resumable-elements"> + <label for="finput-phenotype-se" class="form-label">Phenotype: Standard Errors</label> + <input id="finput-phenotype-se" + name="phenotype-se" + class="form-control" + type="file" + data-preview-table="tbl-preview-pheno-se" + required="required" /> + <span class="form-text text-muted"> + Provide a file that contains only the standard errors for the phenotypes, + computed from the data above.</span> + </div> + + {{display_resumable_elements( + "resumable-phenotype-se", + "standard errors", + '<p>Drag and drop a CSV file that contains the phenotypes standard-errors + data here. You can click the "Browse" button (below and to the right) to + select the file from your computer.</p> + + <p>The CSV should be a matrix of <strong>samples × phenotypes</strong>, + i.e. The first column contains the samples identifiers while the first + row is the list of phenotypes identifiers occurring in the phenotypes + descriptions file.</p> + + <p>If the format is transposed (i.e <strong>phenotypes × samples</strong>) + select the checkbox above.</p> + + <p>Please see the + <a href="#docs-file-phenotype-se" + title="Documentation of the phenotype data file format."> + "Phenotypes Data" documentation</a> section below for more information + on the expected format of the file provided here.</p>')}} + + {{display_preview_table("tbl-preview-pheno-se", "standard errors")}} + </div> + </fieldset> + + + <fieldset id="fldset-n-file"> + <div class="form-group"> + <div class="form-check"> + <input id="chk-phenotype-n-transposed" + name="phenotype-n-transposed" + type="checkbox" + class="form-check-input" + style="border: solid #8EABF0" /> + <label for="chk-phenotype-n-transposed" class="form-check-label"> + Counts file transposed?</label> + </div> + <div class="non-resumable-elements"> + <label for="finput-phenotype-n" class="form-label">Phenotype: Number of Samples/Individuals</label> + <input id="finput-phenotype-n" + name="phenotype-n" + class="form-control" + type="file" + data-preview-table="tbl-preview-pheno-n" + required="required" /> + <span class="form-text text-muted"> + Provide a file that contains only the number of samples/individuals used in + the computation of the standard errors above.</span> + </div> + + {{display_resumable_elements( + "resumable-phenotype-n", + "number of samples/individuals", + '<p>Drag and drop a CSV file that contains the samples\' phenotypes counts + data here. You can click the "Browse" button (below and to the right) to + select the file from your computer.</p> + + <p>The CSV should be a matrix of <strong>samples × phenotypes</strong>, + i.e. The first column contains the samples identifiers while the first + row is the list of phenotypes identifiers occurring in the phenotypes + descriptions file.</p> + + <p>If the format is transposed (i.e <strong>phenotypes × samples</strong>) + select the checkbox above.</p> + + <p>Please see the + <a href="#docs-file-phenotype-se" + title="Documentation of the phenotype data file format."> + "Phenotypes Data" documentation</a> section below for more information + on the expected format of the file provided here.</p>')}} + + {{display_preview_table("tbl-preview-pheno-n", "number of samples/individuals")}} + </div> + </fieldset> </fieldset> {%endif%} {%endblock%} @@ -322,15 +424,15 @@ <span id="docs-file-phenotype-data"></span> <span id="docs-file-phenotype-se"></span> <span id="docs-file-phenotype-n"></span> - <p>The data is a matrix of <em>phenotypes × individuals</em>, e.g.</p> + <p>The data is a matrix of <em>samples(or individuals) × phenotypes</em>, e.g.</p> <code> # num-cases: 2549 # num-phenos: 13 - id,IND001,IND002,IND003,IND004,…<br /> - pheno10001,61.400002,54.099998,483,49.799999,…<br /> - pheno10002,49,50.099998,403,45.5,…<br /> - pheno10003,62.5,53.299999,501,62.900002,…<br /> - pheno10004,53.099998,55.099998,403,NA,…<br /> + id,pheno10001,pheno10002,pheno10003,pheno10004,53.099998,…<br /> + IND001,61.400002,49,62.5,55.099998,…<br /> + IND002,54.099998,50.099998,53.299999,55.099998,…<br /> + IND003,483,403,501,403,…<br /> + IND004,49.799999,45.5,62.900002,NA,…<br /> ⋮<br /></code> <p>where <code>IND001,IND002,IND003,IND004,…</code> are the @@ -346,12 +448,6 @@ {%endblock%} {%block sidebarcontents%} -{{display_preview_table("tbl-preview-pheno-desc", "descriptions")}} -{{display_preview_table("tbl-preview-pheno-data", "data")}} -{%if population.Family in families_with_se_and_n%} -{{display_preview_table("tbl-preview-pheno-se", "standard errors")}} -{{display_preview_table("tbl-preview-pheno-n", "number of samples")}} -{%endif%} {{display_pheno_dataset_card(species, population, dataset)}} {%endblock%} @@ -585,6 +681,7 @@ })); }); formdata.append("resumable-upload", "true"); + formdata.append("publication-id", $("#txt-publication-id").val()); return formdata; } @@ -653,7 +750,7 @@ file_input.parent(), $("#" + resumable_element_id), submit_button, - ["csv", "tsv"]), + ["csv", "tsv", "txt"]), file_input.parent(), $("#" + resumable_element_id), $("#" + resumable_element_id + "-browse-button")), @@ -698,6 +795,11 @@ $("#frm-add-phenotypes input[type=submit]").on("click", (event) => { event.preventDefault(); + console.debug(); + if ($("#txt-publication-id").val() == "") { + alert("You MUST provide a publication for the phenotypes."); + return false; + } // TODO: Check all the relevant files exist // TODO: Verify that files are not duplicated var filenames = []; diff --git a/uploader/templates/phenotypes/create-dataset.html b/uploader/templates/phenotypes/create-dataset.html index 8e45491..19a2b34 100644 --- a/uploader/templates/phenotypes/create-dataset.html +++ b/uploader/templates/phenotypes/create-dataset.html @@ -42,7 +42,7 @@ <input type="text" name="dataset-name" id="txt-dataset-name" - value="{{original_formdata.get('dataset-name') or (population.InbredSetCode + 'Publish')}}" + value="{{original_formdata.get('dataset-name') or (population.Name + 'Publish')}}" {%if errors["dataset-name"] is defined%} class="form-control danger" {%else%} @@ -51,7 +51,7 @@ required="required" /> <small class="form-text text-muted"> <p>A short representative name for the dataset.</p> - <p>Recommended: Use the population code and append "Publish" at the end. + <p>Recommended: Use the population name and append "Publish" at the end. <br />This field will only accept names composed of letters ('A-Za-z'), numbers (0-9), hyphens and underscores.</p> </small> @@ -86,7 +86,7 @@ name="dataset-shortname" type="text" class="form-control" - value="{{original_formdata.get('dataset-shortname') or (population.InbredSetCode + ' Publish')}}" /> + value="{{original_formdata.get('dataset-shortname') or (population.Name + 'Publish')}}" /> <small class="form-text text-muted"> <p>An optional, short name for the dataset. <br /> If this is not provided, it will default to the value provided for the diff --git a/uploader/templates/phenotypes/load-phenotypes-success.html b/uploader/templates/phenotypes/load-phenotypes-success.html new file mode 100644 index 0000000..645be16 --- /dev/null +++ b/uploader/templates/phenotypes/load-phenotypes-success.html @@ -0,0 +1,42 @@ +{%extends "phenotypes/base.html"%} +{%from "flash_messages.html" import flash_all_messages%} +{%from "macro-table-pagination.html" import table_pagination%} +{%from "phenotypes/macro-display-pheno-dataset-card.html" import display_pheno_dataset_card%} + +{%block title%}Phenotypes{%endblock%} + +{%block pagetitle%}Phenotypes{%endblock%} + +{%block lvl4_breadcrumbs%} +<li {%if activelink=="load-phenotypes-success"%} + class="breadcrumb-item active" + {%else%} + class="breadcrumb-item" + {%endif%}> + <a href="{{url_for('species.populations.phenotypes.add_phenotypes', + species_id=species.SpeciesId, + population_id=population.Id, + dataset_id=dataset.Id)}}">Add Phenotypes</a> +</li> +{%endblock%} + +{%block contents%} +<div class="row"> + <p>You have successfully loaded + <!-- maybe indicate the number of phenotypes here? -->your + new phenotypes into the database.</p> + <!-- TODO: Maybe notify user that they have sole access. --> + <!-- TODO: Maybe provide a link to go to GeneNetwork to view the data. --> + <p>View your data + <a href="{{search_page_uri}}" + target="_blank">on GeneNetwork2</a>. + You might need to login to GeneNetwork2 to view specific traits.</p> +</div> +{%endblock%} + +{%block sidebarcontents%} +{{display_pheno_dataset_card(species, population, dataset)}} +{%endblock%} + + +{%block more_javascript%}{%endblock%} diff --git a/uploader/templates/phenotypes/macro-display-preview-table.html b/uploader/templates/phenotypes/macro-display-preview-table.html index f54c53e..5a4c422 100644 --- a/uploader/templates/phenotypes/macro-display-preview-table.html +++ b/uploader/templates/phenotypes/macro-display-preview-table.html @@ -1,7 +1,7 @@ {%macro display_preview_table(tableid, filetype)%} -<div class="card" style="max-width: 676px;"> +<div class="card"> <div class="card-body"> - <h5 class="card-title">Phenotypes '{{filetype | title}}' File Preview</h5> + <h5 class="card-title">{{filetype | title}}: File Preview</h5> <div class="card-text" style="overflow: scroll;"> <table id="{{tableid}}" class="table table-condensed table-responsive"> <thead> @@ -9,9 +9,7 @@ </tr> <tbody> <tr> - <td class="data-row-template text-info"> - Provide a phenotype '{{filetype | lower}}' file to preview. - </td> + <td class="data-row-template text-info"></td> </tr> </tbody> </table> diff --git a/uploader/templates/phenotypes/macro-display-resumable-elements.html b/uploader/templates/phenotypes/macro-display-resumable-elements.html index 7cf3a87..ed14ea5 100644 --- a/uploader/templates/phenotypes/macro-display-resumable-elements.html +++ b/uploader/templates/phenotypes/macro-display-resumable-elements.html @@ -33,7 +33,7 @@ <a id="{{id}}-browse-button" class="resumable-browse-button btn btn-info" - href="#" + href="#{{id}}" style="margin-left: 80%;">Browse</a> <div id="{{id}}-progress-bar" class="progress visually-hidden"> diff --git a/uploader/templates/phenotypes/review-job-data.html b/uploader/templates/phenotypes/review-job-data.html index 7bc8c62..859df74 100644 --- a/uploader/templates/phenotypes/review-job-data.html +++ b/uploader/templates/phenotypes/review-job-data.html @@ -35,14 +35,28 @@ {%if job%} <div class="row"> <h3 class="heading">Data Review</h3> + <p class="text-info"><strong> + The data has <em>NOT</em> been added/saved yet. Review the details below + and click "Continue" to save the data.</strong></p> <p>The “<strong>{{dataset.FullName}}</strong>” dataset from the “<strong>{{population.FullName}}</strong>” population of the species “<strong>{{species.SpeciesName}} ({{species.FullName}})</strong>” will be updated as follows:</p> + <ul> + {%if publication%} + <li>All {{summary.get("pheno", {}).get("total-data-rows", "0")}} phenotypes + are linked to the following publication: + <ul> + <li><strong>Publication Title:</strong> + {{publication.Title or "—"}}</li> + <li><strong>Author(s):</strong> + {{publication.Authors or "—"}}</li> + </ul> + </li> + {%endif%} {%for ftype in ("phenocovar", "pheno", "phenose", "phenonum")%} {%if summary.get(ftype, False)%} - <ul> <li>A total of {{summary[ftype]["number-of-files"]}} files will be processed adding {%if ftype == "phenocovar"%}(possibly){%endif%} {{summary[ftype]["total-data-rows"]}} new @@ -53,11 +67,21 @@ {%endif%} to the database. </li> - </ul> {%endif%} {%endfor%} + </ul> - <a href="#" class="not-implemented btn btn-primary">continue</a> + <form id="frm-review-phenotype-data" + method="POST" + action="{{url_for('species.populations.phenotypes.load_data_to_database', + species_id=species.SpeciesId, + population_id=population.Id, + dataset_id=dataset.Id)}}"> + <input type="hidden" name="data-qc-job-id" value="{{job.jobid}}" /> + <input type="submit" + value="continue" + class="btn btn-primary" /> + </form> </div> {%else%} <div class="row"> diff --git a/uploader/templates/publications/create-publication.html b/uploader/templates/publications/create-publication.html new file mode 100644 index 0000000..3f828a9 --- /dev/null +++ b/uploader/templates/publications/create-publication.html @@ -0,0 +1,191 @@ +{%extends "publications/base.html"%} +{%from "flash_messages.html" import flash_all_messages%} + +{%block title%}View Publication{%endblock%} + +{%block pagetitle%}View Publication{%endblock%} + + +{%block contents%} +{{flash_all_messages()}} + +<div class="row"> + <form id="frm-create-publication" + method="POST" + action="{{url_for('publications.create_publication', **request.args)}}" + class="form-horizontal"> + + <div class="row mb-3"> + <label for="txt-pubmed-id" class="col-sm-2 col-form-label"> + PubMed ID</label> + <div class="col-sm-10"> + <div class="input-group"> + <input type="text" + id="txt-pubmed-id" + name="pubmed-id" + class="form-control"/> + <div class="input-group-text"> + <button class="btn btn-outline-primary" + id="btn-search-pubmed-id">search</button> + </div> + </div> + <span id="search-pubmed-id-error" + class="form-text text-muted text-danger visually-hidden"> + </span> + <span class="form-text text-muted">This is the publication's ID on + <a href="https://pubmed.ncbi.nlm.nih.gov/" + title="Link to NCBI's PubMed service">NCBI's Pubmed Service</a> + </span> + </div> + </div> + + <div class="row mb-3"> + <label for="txt-publication-title" class="col-sm-2 col-form-label"> + Title</label> + <div class="col-sm-10"> + <input type="text" + id="txt-publication-title" + name="publication-title" + class="form-control" /> + <span class="form-text text-muted">Provide the publication's title here.</span> + </div> + </div> + + <div class="row mb-3"> + <label for="txt-publication-authors" class="col-sm-2 col-form-label"> + Authors</label> + <div class="col-sm-10"> + <input type="text" + id="txt-publication-authors" + name="publication-authors" + required="required" + class="form-control" /> + <span class="form-text text-muted"> + A publication <strong>MUST</strong> have an author. You <em>must</em> + provide a value for the authors field. + </span> + </div> + </div> + + <div class="row mb-3"> + <label for="txt-publication-journal" class="col-sm-2 col-form-label"> + Journal</label> + <div class="col-sm-10"> + <input type="text" + id="txt-publication-journal" + name="publication-journal" + class="form-control" /> + <span class="form-text text-muted">Provide the name journal where the + publication was done, here.</span> + </div> + </div> + + <div class="row mb-3"> + <label for="select-publication-month" + class="col-sm-2 col-form-label"> + Month</label> + <div class="col-sm-4"> + <select class="form-control" + id="select-publication-month" + name="publication-month"> + <option value="">Select a month</option> + <option value="january">January</option> + <option value="february">February</option> + <option value="march">March</option> + <option value="april">April</option> + <option value="may">May</option> + <option value="june">June</option> + <option value="july">July</option> + <option value="august">August</option> + <option value="september">September</option> + <option value="october">October</option> + <option value="november">November</option> + <option value="december">December</option> + </select> + <span class="form-text text-muted">Month of publication</span> + </div> + + <label for="txt-publication-year" + class="col-sm-2 col-form-label"> + Year</label> + <div class="col-sm-4"> + <input type="number" + id="txt-publication-year" + name="publication-year" + class="form-control" + min="1960" /> + <span class="form-text text-muted">Year of publication</span> + </div> + </div> + + <div class="row mb-3"> + <label for="txt-publication-volume" + class="col-sm-2 col-form-label"> + Volume</label> + <div class="col-sm-4"> + <input type="text" + id="txt-publication-volume" + name="publication-volume" + class="form-control"> + <span class="form-text text-muted">Journal volume</span> + </div> + + <label for="txt-publication-pages" + class="col-sm-2 col-form-label"> + Pages</label> + <div class="col-sm-4"> + <input type="text" + id="txt-publication-pages" + name="publication-pages" + class="form-control" /> + <span class="form-text text-muted">Journal pages for the publication</span> + </div> + </div> + + <div class="row mb-3"> + <label for="txt-abstract" class="col-sm-2 col-form-label">Abstract</label> + <div class="col-sm-10"> + <textarea id="txt-publication-abstract" + name="publication-abstract" + class="form-control" + rows="7"></textarea> + </div> + </div> + + <div class="row mb-3"> + <div class="col-sm-2"></div> + <div class="col-sm-8"> + <input type="submit" class="btn btn-primary" value="Add" /> + <input type="reset" class="btn btn-danger" /> + </div> + </div> + +</form> +</div> + +{%endblock%} + + +{%block javascript%} +<script type="text/javascript" src="/static/js/pubmed.js"></script> +<script type="text/javascript"> + $(function() { + $("#btn-search-pubmed-id").on("click", (event) => { + event.preventDefault(); + var search_button = event.target; + var pubmed_id = $("#txt-pubmed-id").val().trim(); + remove_class($("#txt-pubmed-id").parent(), "has-error"); + if(pubmed_id == "") { + add_class($("#txt-pubmed-id").parent(), "has-error"); + return false; + } + + search_button.disabled = true; + // Fetch publication details + fetch_publication_details(pubmed_id, + [() => {search_button.disabled = false;}]); + return false; + }); + }); +</script> +{%endblock%} diff --git a/uploader/templates/publications/index.html b/uploader/templates/publications/index.html index f6f6fa0..f846d54 100644 --- a/uploader/templates/publications/index.html +++ b/uploader/templates/publications/index.html @@ -9,6 +9,12 @@ {%block contents%} {{flash_all_messages()}} +<div class="row" style="padding-bottom: 1em;"> + <a href="{{url_for('publications.create_publication')}}" + class="btn btn-primary"> + add new publication</a> +</div> + <div class="row"> <table id="tbl-list-publications" class="table compact stripe"> <thead> @@ -38,7 +44,8 @@ data: (pub) => { if(pub.PubMed_ID) { return `<a href="https://pubmed.ncbi.nlm.nih.gov/` + - `${pub.PubMed_ID}/" target="_blank">` + + `${pub.PubMed_ID}/" target="_blank" ` + + `title="Link to publication on NCBI.">` + `${pub.PubMed_ID}</a>`; } return ""; @@ -51,7 +58,9 @@ title = pub.Title } return `<a href="/publications/view/${pub.Id}" ` + - `target="_blank">${title}</a>`; + `target="_blank" ` + + `title="Link to view publication details">` + + `${title}</a>`; } }, { diff --git a/uploader/templates/publications/view-publication.html b/uploader/templates/publications/view-publication.html new file mode 100644 index 0000000..388547a --- /dev/null +++ b/uploader/templates/publications/view-publication.html @@ -0,0 +1,78 @@ +{%extends "publications/base.html"%} +{%from "flash_messages.html" import flash_all_messages%} + +{%block title%}View Publication{%endblock%} + +{%block pagetitle%}View Publication{%endblock%} + + +{%block contents%} +{{flash_all_messages()}} + +<div class="row"> + <table class="table"> + <tr> + <th>PubMed</th> + <td> + {%if publication.PubMed_ID%} + <a href="https://pubmed.ncbi.nlm.nih.gov/{{publication.PubMed_ID}}/" + target="_blank">{{publication.PubMed_ID}}</a> + {%else%} + — + {%endif%} + </td> + </tr> + <tr> + <th>Title</th> + <td>{{publication.Title or "—"}}</td> + </tr> + <tr> + <th>Authors</th> + <td>{{publication.Authors or "—"}}</td> + </tr> + <tr> + <th>Journal</th> + <td>{{publication.Journal or "—"}}</td> + </tr> + <tr> + <th>Published</th> + <td>{{publication.Month or ""}} {{publication.Year or "—"}}</td> + </tr> + <tr> + <th>Volume</th> + <td>{{publication.Volume or "—"}}</td> + </tr> + <tr> + <th>Pages</th> + <td>{{publication.Pages or "—"}}</td> + </tr> + <tr> + <th>Abstract</th> + <td> + {%for line in (publication.Abstract or "—").replace("\r\n", "<br />").replace("\n", "<br />").split("<br />")%} + <p>{{line}}</p> + {%endfor%} + </td> + </tr> + </table> +</div> + +<div class="row"> + <form id="frm-edit-delete-publication" method="POST" action="#"> + <input type="hidden" name="publication_id" value="{{publication.Id}}" /> + <div class="form-group"> + <input type="submit" value="edit" class="btn btn-primary not-implemented" /> + {%if linked_phenotypes | length == 0%} + <input type="submit" value="delete" class="btn btn-danger not-implemented" /> + {%endif%} + </div> + </form> +</div> +{%endblock%} + + +{%block javascript%} +<script type="text/javascript"> + $(function() {}); +</script> +{%endblock%} |