From 95aeaecd87f19aa42a2e0e2c475126d4b859ed67 Mon Sep 17 00:00:00 2001 From: zsloan Date: Thu, 10 Aug 2023 16:30:23 +0000 Subject: Implement probeset sample data editing up until the approval step --- wqflask/wqflask/metadata_edits.py | 180 +++++++++++++++++++++++++++++++++++--- 1 file changed, 169 insertions(+), 11 deletions(-) diff --git a/wqflask/wqflask/metadata_edits.py b/wqflask/wqflask/metadata_edits.py index 585ddbb3..60b2ffd6 100644 --- a/wqflask/wqflask/metadata_edits.py +++ b/wqflask/wqflask/metadata_edits.py @@ -42,13 +42,16 @@ from gn3.csvcmp import csv_diff from gn3.csvcmp import extract_invalid_csv_headers from gn3.csvcmp import remove_insignificant_edits from gn3.db import diff_from_dict -from gn3.db.datasets import retrieve_sample_list, retrieve_phenotype_group_name +from gn3.db.datasets import ( + retrieve_sample_list, + retrieve_mrna_group_name, + retrieve_phenotype_group_name) from gn3.db.metadata_audit import ( create_metadata_audit, fetch_probeset_metadata_audit_by_trait_name, fetch_phenotype_metadata_audit_by_dataset_id) from gn3.db.probesets import ( - update_probeset, + update_probeset as _update_probeset, fetch_probeset_metadata_by_name) from gn3.db.phenotypes import ( fetch_trait, @@ -57,12 +60,15 @@ from gn3.db.phenotypes import ( fetch_publication_by_id, fetch_publication_by_pubmed_id, update_phenotype as _update_phenotype) +from gn3.db.sample_data import delete_sample_data from gn3.db.sample_data import ( delete_sample_data, insert_sample_data, update_sample_data, - get_trait_sample_data, - get_trait_csv_sample_data) + get_pheno_sample_data, + get_pheno_csv_sample_data, + get_mrna_sample_data, + get_mrna_csv_sample_data) metadata_edit = Blueprint("metadata_edit", __name__) @@ -116,7 +122,7 @@ def display_phenotype_metadata(dataset_id: str, name: str): group_name = retrieve_phenotype_group_name(conn, dataset_id) sample_list = retrieve_sample_list(group_name) - sample_data = get_trait_sample_data(conn, name, _d["publish_xref"]["phenotype_id"]) + sample_data = get_pheno_sample_data(conn, name, _d["publish_xref"]["phenotype_id"]) return render_template( "edit_phenotype.html", @@ -139,14 +145,23 @@ def display_probeset_metadata(name: str): from utility.tools import get_setting with database_connection(get_setting("SQL_URI")) as conn: _d = {"probeset": fetch_probeset_metadata_by_name(conn, name)} + + dataset_name=request.args["dataset_name"] + group_name = retrieve_mrna_group_name(conn, _d["probeset"]["id_"]) + sample_list = retrieve_sample_list(group_name) + sample_data = get_mrna_sample_data(conn, _d["probeset"]["id_"], dataset_name) + return render_template( "edit_probeset.html", diff=_d.get("diff"), probeset=_d.get("probeset"), + probeset_id=_d["probeset"]["id_"], name=name, resource_id=request.args.get("resource-id"), version=get_setting("GN_VERSION"), - dataset_name=request.args["dataset_name"] + dataset_name=request.args["dataset_name"], + sample_list=sample_list, + sample_data=sample_data ) @@ -180,7 +195,7 @@ def update_phenotype(dataset_id: str, name: str): group_name = retrieve_phenotype_group_name(conn, dataset_id) sample_list = retrieve_sample_list(group_name) headers = ["Strain Name", "Value", "SE", "Count"] - base_csv = get_trait_csv_sample_data( + base_csv = get_pheno_csv_sample_data( conn=conn, trait_name=str(name), phenotype_id=str(phenotype_id), @@ -375,6 +390,114 @@ View the diffs here", "success") dataset_key="dataset_id", trait_key="name") def update_probeset(name: str): from utility.tools import get_setting + data_ = request.form.to_dict() + TMPDIR = current_app.config.get("TMPDIR") + author = session.session_info()["user"]["user_id"] + probeset_id=str(data_.get("id")) + trait_name = str(data_.get("probeset_name")) + dataset_name = str(data_.get("dataset_name")) + + if not (file_ := request.files.get("file")) and data_.get('edited') == "false": + flash("No sample-data has been uploaded", "warning") + else: + create_dirs_if_not_exists( + [ + SAMPLE_DATADIR := os.path.join(TMPDIR, "sample-data"), + DIFF_DATADIR := os.path.join(SAMPLE_DATADIR, "diffs"), + UPLOAD_DATADIR := os.path.join(SAMPLE_DATADIR, "updated"), + ] + ) + + current_time = str(datetime.datetime.now().isoformat()) + _file_name = ( + f"{author}.{request.args.get('resource-id')}." f"{current_time}" + ) + diff_data = {} + with database_connection(get_setting("SQL_URI")) as conn: + group_name = retrieve_mrna_group_name(conn, str(data_.get("id"))) + sample_list = retrieve_sample_list(group_name) + headers = ["Strain Name", "Value", "SE", "Count"] + + base_csv = get_mrna_csv_sample_data( + conn=conn, + probeset_id=probeset_id, + dataset_name=dataset_name, + sample_list=retrieve_sample_list( + retrieve_mrna_group_name(conn, probeset_id)) + ) + if not (file_) and data_.get('edited') == "true": + delta_csv = create_delta_csv(base_csv, data_, sample_list) + diff_data = remove_insignificant_edits( + diff_data=csv_diff( + base_csv=base_csv, + delta_csv=delta_csv, + tmp_dir=TMPDIR, + ), + epsilon=0.001, + ) + else: + diff_data = remove_insignificant_edits( + diff_data=csv_diff( + base_csv=base_csv, + delta_csv=(delta_csv := file_.read().decode()), + tmp_dir=TMPDIR, + ), + epsilon=0.001, + ) + + invalid_headers = extract_invalid_csv_headers( + allowed_headers=headers, csv_text=delta_csv + ) + if invalid_headers: + flash( + "You have invalid headers: " + f"""{', '.join(invalid_headers)}. Valid headers """ + f"""are: {', '.join(headers)}""", + "warning", + ) + return redirect( + f"/datasets/{dataset_id}/traits/{name}" + f"?resource-id={request.args.get('resource-id')}" + f"&dataset_name={request.args['dataset_name']}" + ) + # Edge case where the csv file has not been edited! + if not any(diff_data.values()): + flash( + "You have not modified the csv file you downloaded!", "warning" + ) + return redirect( + f"/datasets/{dataset_id}/traits/{name}" + f"?resource-id={request.args.get('resource-id')}" + f"&dataset_name={request.args['dataset_name']}" + ) + + with open( + os.path.join(UPLOAD_DATADIR, f"{_file_name}.csv"), "w" + ) as f_: + f_.write(base_csv) + with open( + os.path.join(UPLOAD_DATADIR, f"{_file_name}.delta.csv"), "w" + ) as f_: + f_.write(delta_csv) + + with open(os.path.join(DIFF_DATADIR, f"{_file_name}.json"), "w") as f: + diff_data.update( + { + "trait_name": str(trait_name), + "probeset_id": str(probeset_id), + "dataset_name": dataset_name, + "resource_id": request.args.get("resource-id"), + "author": author, + "timestamp": ( + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + ), + } + ) + f.write(json.dumps(diff_data, cls=CustomJSONEncoder)) + url = url_for("metadata_edit.list_diffs") + flash(f"Sample-data has been successfully uploaded. \ +View the diffs here", "success") + with database_connection(get_setting("SQL_URI")) as conn: data_ = request.form.to_dict() probeset_ = { @@ -408,7 +531,14 @@ def update_probeset(name: str): or g.user_session.record.get("user_id") or "" ) - if update_probeset(conn, data["id"], probeset_): + + updated_probesets = "" + updated_probesets = _update_probeset( + conn, probeset_id, {"id_": data_["id"], **{ + key: value for key,value in probeset_.items() + if value is not None}}) + + if updated_probesets: diff_data.update( { "Probeset": diff_from_dict( @@ -453,13 +583,13 @@ def update_probeset(name: str): ) -@metadata_edit.route("//traits//csv") +@metadata_edit.route("/pheno//traits//csv") @login_required() -def get_sample_data_as_csv(dataset_id: str, phenotype_id: int): +def get_pheno_sample_data_as_csv(dataset_id: str, phenotype_id: int): from utility.tools import get_setting with database_connection(get_setting("SQL_URI")) as conn: return Response( - get_trait_csv_sample_data( + get_pheno_csv_sample_data( conn=conn, trait_name=str(dataset_id), phenotype_id=str(phenotype_id), @@ -473,6 +603,34 @@ filename=sample-data-{dataset_id}.csv" }, ) +@metadata_edit.route("/mrna//dataset//csv") +@login_required() +def get_mrna_sample_data_as_csv(probeset_id: int, dataset_name: str): + from utility.tools import get_setting + + with database_connection(get_setting("SQL_URI")) as conn: + csv_data = get_mrna_csv_sample_data( + conn=conn, + probeset_id=str(probeset_id), + dataset_name=str(dataset_name), + sample_list=retrieve_sample_list( + retrieve_mrna_group_name(conn, probeset_id)) + ) + return Response( + get_mrna_csv_sample_data( + conn=conn, + probeset_id=str(probeset_id), + dataset_name=str(dataset_name), + sample_list=retrieve_sample_list( + retrieve_mrna_group_name(conn, probeset_id)) + ), + mimetype="text/csv", + headers={ + "Content-disposition": f"attachment; \ +filename=sample-data-{probeset_id}.csv" + }, + ) + @metadata_edit.route("/diffs") @login_required(pagename="Sample Data Diffs") -- cgit v1.2.3