diff options
Diffstat (limited to 'gn2/wqflask/api/router.py')
-rw-r--r-- | gn2/wqflask/api/router.py | 1037 |
1 files changed, 1037 insertions, 0 deletions
diff --git a/gn2/wqflask/api/router.py b/gn2/wqflask/api/router.py new file mode 100644 index 00000000..bcd08e8d --- /dev/null +++ b/gn2/wqflask/api/router.py @@ -0,0 +1,1037 @@ +# GN2 API + +import os +import io +import csv +import json +import datetime +import requests + +from zipfile import ZipFile, ZIP_DEFLATED + + +import flask +from flask import current_app +from gn2.wqflask.database import database_connection +from flask import request +from flask import make_response +from flask import send_file + +from gn2.wqflask import app + +from gn2.wqflask.api import correlation, mapping, gen_menu + +from gn2.utility.tools import flat_files, get_setting + +from gn2.wqflask.database import database_connection + + +version = "pre1" + + +@app.route("/api/v_{}/".format(version)) +def hello_world(): + return flask.jsonify({"hello": "world"}) + + +@app.route("/api/v_{}/species".format(version)) +def get_species_list(): + species_list = [] + with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT SpeciesId, Name, FullName, TaxonomyId FROM Species" + ) + for species in cursor.fetchall(): + species_dict = { + "Id": species[0], + "Name": species[1], + "FullName": species[2], + "TaxonomyId": species[3] + } + species_list.append(species_dict) + return flask.jsonify(species_list) + + +@app.route("/api/v_{}/species/<path:species_name>".format(version)) +@app.route("/api/v_{}/species/<path:species_name>.<path:file_format>".format(version)) +def get_species_info(species_name, file_format="json"): + with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT SpeciesId, Name, FullName, TaxonomyId " + "FROM Species WHERE (Name=%s OR FullName=%s " + "OR SpeciesName=%s)", ((species_name,)*3)) + _species = cursor.fetchone() + species_dict = { + "Id": _species[0], + "Name": _species[1], + "FullName": _species[2], + "TaxonomyId": _species[3] + } + + return flask.jsonify(species_dict) + + +@app.route("/api/v_{}/groups".format(version)) +@app.route("/api/v_{}/groups/<path:species_name>".format(version)) +def get_groups_list(species_name=None): + _groups = () + with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: + if species_name: + cursor.execute( + "SELECT InbredSet.InbredSetId, " + "InbredSet.SpeciesId, InbredSet.InbredSetName, " + "InbredSet.Name, InbredSet.FullName, " + "InbredSet.public, IFNULL(InbredSet.MappingMethodId, " + "'None'), IFNULL(InbredSet.GeneticType, 'None') " + "FROM InbredSet, Species WHERE " + "InbredSet.SpeciesId = Species.Id AND " + "(Species.Name = %s OR Species.FullName=%s " + "OR Species.SpeciesName=%s)", ((species_name,) * 3) + ) + else: + cursor.execute( + "SELECT InbredSet.InbredSetId, " + "InbredSet.SpeciesId, InbredSet.InbredSetName, " + "InbredSet.Name, InbredSet.FullName, " + "InbredSet.public, IFNULL(InbredSet.MappingMethodId, " + "'None'), IFNULL(InbredSet.GeneticType, 'None') " + "FROM InbredSet" + ) + _groups = cursor.fetchall() + + if _groups: + groups_list = [] + for group in _groups: + group_dict = { + "Id": group[0], + "SpeciesId": group[1], + "DisplayName": group[2], + "Name": group[3], + "FullName": group[4], + "public": group[5], + "MappingMethodId": group[6], + "GeneticType": group[7] + } + groups_list.append(group_dict) + return flask.jsonify(groups_list) + return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + + +@app.route("/api/v_{}/group/<path:group_name>".format(version)) +@app.route("/api/v_{}/group/<path:group_name>.<path:file_format>".format(version)) +@app.route("/api/v_{}/group/<path:species_name>/<path:group_name>".format(version)) +@app.route("/api/v_{}/group/<path:species_name>/<path:group_name>.<path:file_format>".format(version)) +def get_group_info(group_name, species_name=None, file_format="json"): + group = tuple() + with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: + if species_name: + cursor.execute( + "SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, " + "InbredSet.InbredSetName, InbredSet.Name, " + "InbredSet.FullName, InbredSet.public, " + "IFNULL(InbredSet.MappingMethodId, 'None'), " + "IFNULL(InbredSet.GeneticType, 'None') " + "FROM InbredSet, Species WHERE " + "InbredSet.SpeciesId = Species.Id " + "AND (InbredSet.InbredSetName = %s OR " + "InbredSet.Name = %s OR InbredSet.FullName = %s) " + "AND (Species.Name = %s OR " + "Species.FullName = %s OR Species.SpeciesName = %s)", + (*(group_name,)*3, *(species_name,)*3) + ) + else: + cursor.execute( + "SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, " + "InbredSet.InbredSetName, InbredSet.Name, " + "InbredSet.FullName, InbredSet.public, " + "IFNULL(InbredSet.MappingMethodId, 'None'), " + "IFNULL(InbredSet.GeneticType, 'None') " + "FROM InbredSet WHERE " + "(InbredSet.InbredSetName = %s OR " + "InbredSet.Name = %s OR " + "InbredSet.FullName = %s)", + ((group_name,)*3) + ) + group = cursor.fetchone() + + if group: + group_dict = { + "Id": group[0], + "SpeciesId": group[1], + "DisplayName": group[2], + "Name": group[3], + "FullName": group[4], + "public": group[5], + "MappingMethodId": group[6], + "GeneticType": group[7] + } + + return flask.jsonify(group_dict) + else: + return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + + +@app.route("/api/v_{}/datasets/<path:group_name>".format(version)) +@app.route("/api/v_{}/datasets/<path:species_name>/<path:group_name>".format(version)) +def get_datasets_for_group(group_name, species_name=None): + _datasets = () + with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: + if species_name: + cursor.execute( + "SELECT ProbeSetFreeze.Id, ProbeSetFreeze.ProbeFreezeId, " + "ProbeSetFreeze.AvgID, ProbeSetFreeze.Name, " + "ProbeSetFreeze.Name2, " + "ProbeSetFreeze.FullName, ProbeSetFreeze.ShortName, " + "ProbeSetFreeze.CreateTime, ProbeSetFreeze.public, " + "ProbeSetFreeze.confidentiality, ProbeSetFreeze.DataScale " + "FROM ProbeSetFreeze, ProbeFreeze, InbredSet, Species " + "WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id " + "AND ProbeFreeze.InbredSetId = InbredSet.Id " + "AND (InbredSet.Name = %s OR " + "InbredSet.InbredSetName = %s OR " + "InbredSet.FullName = %s) AND " + "InbredSet.SpeciesId = Species.Id AND " + "(Species.SpeciesName = %s OR " + "Species.MenuName = %s OR Species.FullName = %s);", + (*(group_name,)*3, *(species_name)*3) + ) + else: + cursor.execute( + "SELECT ProbeSetFreeze.Id, ProbeSetFreeze.ProbeFreezeId, " + "ProbeSetFreeze.AvgID, ProbeSetFreeze.Name, " + "ProbeSetFreeze.Name2, ProbeSetFreeze.FullName, " + "ProbeSetFreeze.ShortName, ProbeSetFreeze.CreateTime, " + "ProbeSetFreeze.public, ProbeSetFreeze.confidentiality, " + "ProbeSetFreeze.DataScale FROM ProbeSetFreeze, " + "ProbeFreeze, InbredSet WHERE " + "ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id " + "AND ProbeFreeze.InbredSetId = InbredSet.Id " + "AND (InbredSet.Name = %s OR " + "InbredSet.InbredSetName = %s OR " + "InbredSet.FullName = %s)", + ((group_name,) * 3) + ) + _datasets = cursor.fetchall() + + if _datasets: + datasets_list = [] + for dataset in _datasets: + dataset_dict = { + "Id": dataset[0], + "ProbeFreezeId": dataset[1], + "AvgID": dataset[2], + "Short_Abbreviation": dataset[3], + "Long_Abbreviation": dataset[4], + "FullName": dataset[5], + "ShortName": dataset[6], + "CreateTime": dataset[7], + "public": dataset[8], + "confidentiality": dataset[9], + "DataScale": dataset[10] + } + datasets_list.append(dataset_dict) + + return flask.jsonify(datasets_list) + else: + return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + + +@app.route("/api/v_{}/dataset/<path:dataset_name>".format(version)) +@app.route("/api/v_{}/dataset/<path:dataset_name>.<path:file_format>".format(version)) +@app.route("/api/v_{}/dataset/<path:group_name>/<path:dataset_name>".format(version)) +@app.route("/api/v_{}/dataset/<path:group_name>/<path:dataset_name>.<path:file_format>".format(version)) +def get_dataset_info(dataset_name, group_name=None, file_format="json"): + # ZS: First get ProbeSet (mRNA expression) datasets and then get Phenotype datasets + + # ZS: I figure I might as well return a list if there are multiple + # matches, though I don"t know if this will actually happen in + # practice + datasets_list, dataset_dict = [], {} + probeset_query = """ + SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName, + ProbeSetFreeze.ShortName, ProbeSetFreeze.DataScale, ProbeFreeze.TissueId, + Tissue.Name, ProbeSetFreeze.public, ProbeSetFreeze.confidentiality + FROM ProbeSetFreeze, ProbeFreeze, Tissue + """ + + where_statement = """ + WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND + ProbeFreeze.TissueId = Tissue.Id AND + ProbeSetFreeze.public > 0 AND + ProbeSetFreeze.confidentiality < 1 AND + """ + if dataset_name.isdigit(): + where_statement += """ + ProbeSetFreeze.Id = "{}" + """.format(dataset_name) + else: + where_statement += """ + (ProbeSetFreeze.Name = "{0}" OR ProbeSetFreeze.Name2 = "{0}" OR + ProbeSetFreeze.FullName = "{0}" OR ProbeSetFreeze.ShortName = "{0}") + """.format(dataset_name) + with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: + cursor.execute(f"{probeset_query}{where_statement}") + + if dataset := cursor.fetchone(): + dataset_dict = { + "dataset_type": "mRNA expression", + "id": dataset[0], + "name": dataset[1], + "full_name": dataset[2], + "short_name": dataset[3], + "data_scale": dataset[4], + "tissue_id": dataset[5], + "tissue": dataset[6], + "public": dataset[7], + "confidential": dataset[8] + } + + datasets_list.append(dataset_dict) + + if group_name: + cursor.execute( + "SELECT PublishXRef.Id, " + "Phenotype.Post_publication_abbreviation, " + "Phenotype.Post_publication_description, " + "Phenotype.Pre_publication_abbreviation, " + "Phenotype.Pre_publication_description, " + "Publication.PubMed_ID, Publication.Title, " + "Publication.Year FROM PublishXRef, Phenotype, " + "Publication, InbredSet, PublishFreeze WHERE " + "PublishXRef.InbredSetId = InbredSet.Id " + "AND PublishXRef.PhenotypeId = Phenotype.Id " + "AND PublishXRef.PublicationId = Publication.Id " + "AND PublishFreeze.InbredSetId = InbredSet.Id " + "AND PublishFreeze.public > 0 AND " + "PublishFreeze.confidentiality < 1 " + "AND InbredSet.Name = %s AND PublishXRef.Id = %s", + (group_name, dataset_name,) + ) + + if dataset := cursor.fetchone(): + if dataset[5]: + dataset_dict = { + "dataset_type": "phenotype", + "id": dataset[0], + "name": dataset[1], + "description": dataset[2], + "pubmed_id": dataset[5], + "title": dataset[6], + "year": dataset[7] + } + elif dataset[4]: + dataset_dict = { + "dataset_type": "phenotype", + "id": dataset[0], + "name": dataset[3], + "description": dataset[4] + } + else: + dataset_dict = { + "dataset_type": "phenotype", + "id": dataset[0] + } + + datasets_list.append(dataset_dict) + + if len(datasets_list) > 1: + return flask.jsonify(datasets_list) + elif len(datasets_list) == 1: + return flask.jsonify(dataset_dict) + else: + return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + + +@app.route("/api/v_{}/traits/<path:dataset_name>".format(version), methods=("GET",)) +@app.route("/api/v_{}/traits/<path:dataset_name>.<path:file_format>".format(version), methods=("GET",)) +def fetch_traits(dataset_name, file_format="json"): + trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids( + dataset_name, request.args) + if ("ids_only" in request.args) and (len(trait_ids) > 0): + if file_format == "json": + filename = dataset_name + "_trait_ids.json" + return flask.jsonify(trait_ids) + else: + filename = dataset_name + "_trait_ids.csv" + + si = io.StringIO() + csv_writer = csv.writer(si) + csv_writer.writerows([[trait_id] for trait_id in trait_ids]) + output = make_response(si.getvalue()) + output.headers["Content-Disposition"] = "attachment; filename=" + filename + output.headers["Content-type"] = "text/csv" + return output + elif ("names_only" in request.args) and (len(trait_ids) > 0): + if file_format == "json": + filename = dataset_name + "_trait_names.json" + return flask.jsonify(trait_names) + else: + filename = dataset_name + "_trait_names.csv" + + si = io.StringIO() + csv_writer = csv.writer(si) + csv_writer.writerows([[trait_name] for trait_name in trait_names]) + output = make_response(si.getvalue()) + output.headers["Content-Disposition"] = "attachment; filename=" + filename + output.headers["Content-type"] = "text/csv" + return output + else: + if len(trait_ids) > 0: + if data_type == "ProbeSet": + query = """ + SELECT + ProbeSet.Id, ProbeSet.Name, ProbeSet.Symbol, ProbeSet.description, ProbeSet.Chr, ProbeSet.Mb, ProbeSet.alias, + ProbeSetXRef.mean, ProbeSetXRef.se, ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.additive, ProbeSetXRef.h2 + FROM + ProbeSet, ProbeSetXRef, ProbeSetFreeze + WHERE + ProbeSetXRef.ProbeSetFreezeId = "{0}" AND + ProbeSetXRef.ProbeSetId = ProbeSet.Id AND + ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND + ProbeSetFreeze.public > 0 AND + ProbeSetFreeze.confidentiality < 1 + ORDER BY + ProbeSet.Id + """ + + field_list = ["Id", "Name", "Symbol", "Description", "Chr", "Mb", + "Aliases", "Mean", "SE", "Locus", "LRS", "P-Value", "Additive", "h2"] + elif data_type == "Geno": + query = """ + SELECT + Geno.Id, Geno.Name, Geno.Marker_Name, Geno.Chr, Geno.Mb, Geno.Sequence, Geno.Source + FROM + Geno, GenoXRef, GenoFreeze + WHERE + GenoXRef.GenoFreezeId = "{0}" AND + GenoXRef.GenoId = Geno.Id AND + GenoXRef.GenoFreezeId = GenoFreeze.Id AND + GenoFreeze.public > 0 AND + GenoFreeze.confidentiality < 1 + ORDER BY + Geno.Id + """ + + field_list = ["Id", "Name", "Marker_Name", + "Chr", "Mb", "Sequence", "Source"] + else: + query = """SELECT PublishXRef.Id, + Phenotype.`Original_description`, + Publication.`Authors`, + Publication.`Year`, + Publication.`PubMed_ID`, + PublishXRef.`mean`, + PublishXRef.`LRS`, + PublishXRef.`additive`, + PublishXRef.`Locus`, + Geno.`Chr`, Geno.`Mb` + FROM Species + INNER JOIN InbredSet ON InbredSet.`SpeciesId` = Species.`Id` + INNER JOIN PublishXRef ON PublishXRef.`InbredSetId` = InbredSet.`Id` + INNER JOIN PublishFreeze ON PublishFreeze.`InbredSetId` = InbredSet.`Id` + INNER JOIN Publication ON Publication.`Id` = PublishXRef.`PublicationId` + INNER JOIN Phenotype ON Phenotype.`Id` = PublishXRef.`PhenotypeId` + LEFT JOIN Geno ON PublishXRef.Locus = Geno.Name AND Geno.SpeciesId = Species.Id + WHERE + PublishXRef.InbredSetId = {0} AND + PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND + PublishFreeze.public > 0 AND + PublishFreeze.confidentiality < 1 + ORDER BY + PublishXRef.Id""" + + field_list = ["Id", "Description", "Authors", "Year", "PubMedID", "Mean", + "LRS", "Additive", "Locus", "Chr", "Mb"] + + if 'limit_to' in request.args: + limit_number = request.args['limit_to'] + query += "LIMIT " + str(limit_number) + with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: + if file_format == "json": + filename = dataset_name + "_traits.json" + cursor.execute(query.format(dataset_id)) + result_list = [] + for result in cursor.fetchall(): + trait_dict = {} + for i, field in enumerate(field_list): + if result[i]: + trait_dict[field] = result[i] + result_list.append(trait_dict) + return flask.jsonify(result_list) + elif file_format == "csv": + filename = dataset_name + "_traits.csv" + + results_list = [] + header_list = [] + header_list += field_list + results_list.append(header_list) + cursor.execute(query.format(dataset_id)) + for result in cursor.fetchall(): + results_list.append(result) + + si = io.StringIO() + csv_writer = csv.writer(si) + csv_writer.writerows(results_list) + output = make_response(si.getvalue()) + output.headers["Content-Disposition"] = "attachment; filename=" + filename + output.headers["Content-type"] = "text/csv" + return output + else: + return return_error( + code=400, + source=request.url_rule.rule, + title="Invalid Output Format", + details="Current formats available are JSON and CSV, with CSV as default" + ) + else: + return return_error( + code=204, + source=request.url_rule.rule, + title="No Results", + details="") + + +@app.route("/api/v_{}/sample_data/<path:dataset_name>".format(version)) +@app.route("/api/v_{}/sample_data/<path:dataset_name>.<path:file_format>".format(version)) +def all_sample_data(dataset_name, file_format="csv"): + trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids( + dataset_name, request.args) + + if len(trait_ids) > 0: + sample_list = get_samplelist(dataset_name) + + if data_type == "ProbeSet": + query = """ + SELECT + Strain.Name, Strain.Name2, ProbeSetData.value, ProbeSetData.Id, ProbeSetSE.error + FROM + (ProbeSetData, Strain, ProbeSetXRef, ProbeSetFreeze) + LEFT JOIN ProbeSetSE ON + (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId) + WHERE + ProbeSetXRef.ProbeSetFreezeId = "{0}" AND + ProbeSetXRef.ProbeSetId = "{1}" AND + ProbeSetXRef.DataId = ProbeSetData.Id AND + ProbeSetData.StrainId = Strain.Id AND + ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND + ProbeSetFreeze.public > 0 AND + ProbeSetFreeze.confidentiality < 1 + ORDER BY + Strain.Name + """ + elif data_type == "Geno": + query = """ + SELECT + Strain.Name, Strain.Name2, GenoData.value, GenoData.Id, GenoSE.error + FROM + (GenoData, Strain, GenoXRef, GenoFreeze) + LEFT JOIN GenoSE ON + (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId) + WHERE + GenoXRef.GenoFreezeId = "{0}" AND + GenoXRef.GenoId = "{1}" AND + GenoXRef.DataId = GenoData.Id AND + GenoData.StrainId = Strain.Id AND + GenoXRef.GenoFreezeId = GenoFreeze.Id AND + GenoFreeze.public > 0 AND + GenoFreeze.confidentiality < 1 + ORDER BY + Strain.Name + """ + else: + query = """ + SELECT + Strain.Name, Strain.Name2, PublishData.value, PublishData.Id, PublishSE.error, NStrain.count + FROM + (PublishData, Strain, PublishXRef, PublishFreeze) + LEFT JOIN PublishSE ON + (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId) + LEFT JOIN NStrain ON + (NStrain.DataId = PublishData.Id AND + NStrain.StrainId = PublishData.StrainId) + WHERE + PublishXRef.InbredSetId = "{0}" AND + PublishXRef.PhenotypeId = "{1}" AND + PublishData.Id = PublishXRef.DataId AND + PublishData.StrainId = Strain.Id AND + PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND + PublishFreeze.public > 0 AND + PublishFreeze.confidentiality < 1 + ORDER BY + Strain.Name + """ + + if file_format == "csv": + filename = dataset_name + "_sample_data.csv" + + results_list = [] + header_list = [] + header_list.append("id") + header_list += sample_list + results_list.append(header_list) + with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: + for i, trait_id in enumerate(trait_ids): + line_list = [] + line_list.append(str(trait_names[i])) + cursor.execute(query.format(dataset_id, trait_id)) + results = cursor.fetchall() + results_dict = {} + for item in results: + results_dict[item[0]] = item[2] + for sample in sample_list: + if sample in results_dict: + line_list.append(results_dict[sample]) + else: + line_list.append("x") + results_list.append(line_list) + + results_list = list(map(list, zip(*results_list))) + + si = io.StringIO() + csv_writer = csv.writer(si) + csv_writer.writerows(results_list) + output = make_response(si.getvalue()) + output.headers["Content-Disposition"] = "attachment; filename=" + filename + output.headers["Content-type"] = "text/csv" + return output + else: + return return_error(code=415, source=request.url_rule.rule, title="Unsupported file format", details="") + else: + return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + + +@app.route("/api/v_{}/sample_data/<path:dataset_name>/<path:trait_name>".format(version)) +@app.route("/api/v_{}/sample_data/<path:dataset_name>/<path:trait_name>.<path:file_format>".format(version)) +def trait_sample_data(dataset_name, trait_name, file_format="json"): + with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT Strain.Name, Strain.Name2, " + "ProbeSetData.value, ProbeSetData.Id, " + "ProbeSetSE.error FROM (ProbeSetData, " + "ProbeSetFreeze, Strain, ProbeSet, " + "ProbeSetXRef) LEFT JOIN ProbeSetSE ON " + "(ProbeSetSE.DataId = ProbeSetData.Id AND " + "ProbeSetSE.StrainId = ProbeSetData.StrainId) " + "WHERE ProbeSet.Name = %s AND " + "ProbeSetXRef.ProbeSetId = ProbeSet.Id " + "AND ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id " + "AND ProbeSetFreeze.Name = %s AND " + "ProbeSetXRef.DataId = ProbeSetData.Id " + "AND ProbeSetData.StrainId = Strain.Id " + "ORDER BY Strain.Name", + (trait_name, dataset_name,) + ) + + sample_data = cursor.fetchall() + if len(sample_data) > 0: + sample_list = [] + for sample in sample_data: + sample_dict = { + "sample_name": sample[0], + "sample_name_2": sample[1], + "value": sample[2], + "data_id": sample[3], + } + if sample[4]: + sample_dict["se"] = sample[4] + sample_list.append(sample_dict) + + return flask.jsonify(sample_list) + else: + if not dataset_name.isdigit(): + group_id = get_group_id(dataset_name) + if group_id: + dataset_or_group = group_id + else: + dataset_or_group = dataset_name + else: + dataset_or_group = dataset_name + + cursor.execute( + "SELECT DISTINCT Strain.Name, Strain.Name2, " + "PublishData.value, PublishData.Id, PublishSE.error, " + "NStrain.count FROM (PublishData, Strain, " + "PublishXRef, PublishFreeze) LEFT JOIN " + "PublishSE ON (PublishSE.DataId = PublishData.Id " + "AND PublishSE.StrainId = PublishData.StrainId) " + "LEFT JOIN NStrain ON " + "(NStrain.DataId = PublishData.Id AND " + "NStrain.StrainId = PublishData.StrainId) " + "WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId " + "AND PublishData.Id = PublishXRef.DataId AND " + "PublishXRef.Id = %s AND (PublishFreeze.Id = %s " + "OR PublishFreeze.Name = %s OR " + "PublishFreeze.ShortName = %s OR " + "PublishXRef.InbredSetId = %s) AND " + "PublishData.StrainId = Strain.Id " + "ORDER BY Strain.Name", + (trait_name, *(dataset_or_group,)*4) + ) + if len(sample_data := cursor.fetchall()) > 0: + sample_list = [] + for sample in sample_data: + sample_dict = { + "sample_name": sample[0], + "sample_name_2": sample[1], + "value": sample[2], + "data_id": sample[3] + } + if sample[4]: + sample_dict["se"] = sample[4] + if sample[5]: + sample_dict["n_cases"] = sample[5] + sample_list.append(sample_dict) + + return flask.jsonify(sample_list) + else: + return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + + +@app.route("/api/v_{}/trait/<path:dataset_name>/<path:trait_name>".format(version)) +@app.route("/api/v_{}/trait/<path:dataset_name>/<path:trait_name>.<path:file_format>".format(version)) +@app.route("/api/v_{}/trait_info/<path:dataset_name>/<path:trait_name>".format(version)) +@app.route("/api/v_{}/trait_info/<path:dataset_name>/<path:trait_name>.<path:file_format>".format(version)) +def get_trait_info(dataset_name, trait_name, file_format="json"): + with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT ProbeSet.Id, ProbeSet.Name, ProbeSet.Symbol, " + "ProbeSet.description, ProbeSet.Chr, ProbeSet.Mb, " + "ProbeSet.alias, ProbeSetXRef.mean, ProbeSetXRef.se, " + "ProbeSetXRef.Locus, ProbeSetXRef.LRS, " + "ProbeSetXRef.pValue, ProbeSetXRef.additive " + "FROM ProbeSet, ProbeSetXRef, ProbeSetFreeze " + "WHERE ProbeSet.Name = %s AND " + "ProbeSetXRef.ProbeSetId = ProbeSet.Id AND " + "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id " + "AND ProbeSetFreeze.Name = %s", + (trait_name, dataset_name,) + ) + if trait_info := cursor.fetchone(): + trait_dict = { + "id": trait_info[0], + "name": trait_info[1], + "symbol": trait_info[2], + "description": trait_info[3], + "chr": trait_info[4], + "mb": trait_info[5], + "alias": trait_info[6], + "mean": trait_info[7], + "se": trait_info[8], + "locus": trait_info[9], + "lrs": trait_info[10], + "p_value": trait_info[11], + "additive": trait_info[12] + } + + return flask.jsonify(trait_dict) + else: + # ZS: Check if the user input the dataset_name as BXDPublish, etc (which is always going to be the group name + "Publish" + if "Publish" in dataset_name: + dataset_name = dataset_name.replace("Publish", "") + + group_id = get_group_id(dataset_name) + cursor.execute( + "SELECT PublishXRef.PhenotypeId, " + "PublishXRef.Locus, PublishXRef.LRS, " + "PublishXRef.additive FROM " + "PublishXRef WHERE " + "PublishXRef.Id = %s AND " + "PublishXRef.InbredSetId = %s", + (trait_name, group_id,) + ) + if trait_info := cursor.fetchone(): + trait_dict = { + "id": trait_info[0], + "locus": trait_info[1], + "lrs": trait_info[2], + "additive": trait_info[3] + } + + return flask.jsonify(trait_dict) + else: + return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + + +@app.route("/api/v_{}/correlation".format(version), methods=("GET",)) +def get_corr_results(): + results = correlation.do_correlation(request.args) + + if len(results) > 0: + # ZS: I think flask.jsonify expects a dict/list instead of JSON + return flask.jsonify(results) + else: + return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + + +@app.route("/api/v_{}/mapping".format(version), methods=("GET",)) +def get_mapping_results(): + results, format = mapping.do_mapping_for_api(request.args) + + if len(results) > 0: + if format == "csv": + filename = "mapping_" + datetime.datetime.utcnow().strftime("%b_%d_%Y_%I:%M%p") + ".csv" + + si = io.StringIO() + csv_writer = csv.writer(si) + csv_writer.writerows(results) + output = make_response(si.getvalue()) + output.headers["Content-Disposition"] = "attachment; filename=" + filename + output.headers["Content-type"] = "text/csv" + + return output + elif format == "json": + return flask.jsonify(results) + else: + return return_error(code=415, source=request.url_rule.rule, title="Unsupported Format", details="") + else: + return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + + +@app.route("/api/v_{}/genotypes/view/<string:group_name>".format(version)) +def view_genotype_files(group_name): + if os.path.isfile("{0}/{1}.json".format(flat_files("genotype"), group_name)): + with open("{0}/{1}.json".format(flat_files("genotype"), group_name)) as geno_json: + return flask.jsonify(json.load(geno_json)) + + +@app.route("/api/v_{}/genotypes/<string:file_format>/<string:group_name>/<string:dataset_name>.zip".format(version)) +@app.route("/api/v_{}/genotypes/<string:file_format>/<string:group_name>/<string:dataset_name>".format(version)) +@app.route("/api/v_{}/genotypes/<string:file_format>/<string:group_name>.zip".format(version)) +@app.route("/api/v_{}/genotypes/<string:file_format>/<string:group_name>".format(version)) +@app.route("/api/v_{}/genotypes/<string:group_name>.<string:file_format>".format(version)) +def get_genotypes(group_name, file_format="csv", dataset_name=None): + limit_num = None + if 'limit_to' in request.args: + if request.args['limit_to'].isdigit(): + limit_num = int(request.args['limit_to']) + + si = io.StringIO() + if file_format == "csv" or file_format == "geno": + filename = group_name + ".geno" + + if os.path.isfile("{0}/{1}.geno".format(flat_files("genotype"), group_name)): + output_lines = [] + with open("{0}/{1}.geno".format(flat_files("genotype"), group_name)) as genofile: + i = 0 + for line in genofile: + if line[0] == "#" or line[0] == "@": + output_lines.append([line.strip()]) + else: + if limit_num and i >= limit_num: + break + output_lines.append(line.split()) + i += 1 + + csv_writer = csv.writer( + si, delimiter="\t", escapechar="\\", quoting=csv.QUOTE_NONE) + else: + return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + elif file_format == "rqtl2": + memory_file = io.BytesIO() + if dataset_name: + filename = dataset_name + else: + filename = group_name + + if os.path.isfile("{0}/{1}_geno.csv".format(flat_files("genotype/rqtl2"), group_name)): + yaml_file = json.load( + open("{0}/{1}.json".format(flat_files("genotype/rqtl2"), group_name))) + yaml_file["geno"] = filename + "_geno.csv" + yaml_file["gmap"] = filename + "_gmap.csv" + yaml_file["pheno"] = filename + "_pheno.csv" + config_file = [filename + ".json", json.dumps(yaml_file)] + #config_file = [filename + ".yaml", open("{0}/{1}.yaml".format(flat_files("genotype/rqtl2"), group_name))] + geno_file = [filename + "_geno.csv", + open("{0}/{1}_geno.csv".format(flat_files("genotype/rqtl2"), group_name))] + gmap_file = [filename + "_gmap.csv", + open("{0}/{1}_gmap.csv".format(flat_files("genotype/rqtl2"), group_name))] + if dataset_name: + phenotypes = requests.get( + "http://gn2.genenetwork.org/api/v_pre1/sample_data/" + dataset_name) + else: + phenotypes = requests.get( + "http://gn2.genenetwork.org/api/v_pre1/sample_data/" + group_name + "Publish") + + with ZipFile(memory_file, 'w', compression=ZIP_DEFLATED) as zf: + zf.writestr(config_file[0], config_file[1]) + for this_file in [geno_file, gmap_file]: + zf.writestr(this_file[0], this_file[1].read()) + zf.writestr(filename + "_pheno.csv", phenotypes.content) + + memory_file.seek(0) + + return send_file(memory_file, attachment_filename=filename + ".zip", as_attachment=True) + else: + return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + else: + filename = group_name + ".bimbam" + + if os.path.isfile("{0}/{1}.geno".format(flat_files("genotype"), group_name)): + output_lines = [] + with open("{0}/{1}_geno.txt".format(flat_files("genotype/bimbam"), group_name)) as genofile: + i = 0 + for line in genofile: + if limit_num and i >= limit_num: + break + output_lines.append([line.strip() + for line in line.split(",")]) + i += 1 + + csv_writer = csv.writer(si, delimiter=",") + else: + return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") + + csv_writer.writerows(output_lines) + output = make_response(si.getvalue()) + output.headers["Content-Disposition"] = "attachment; filename=" + filename + output.headers["Content-type"] = "text/csv" + + return output + + +@app.route("/api/v_{}/gen_dropdown".format(version), methods=("GET",)) +def gen_dropdown_menu(): + with database_connection(get_setting("SQL_URI")) as conn: + results = gen_menu.gen_dropdown_json(conn) + + if len(results) > 0: + return flask.jsonify(results) + else: + return return_error(code=500, source=request.url_rule.rule, title="Some error occurred", details="") + + +def return_error(code, source, title, details): + json_ob = {"errors": [ + { + "status": code, + "source": {"pointer": source}, + "title": title, + "detail": details + } + ]} + + return flask.jsonify(json_ob) + + +def get_dataset_trait_ids(dataset_name, start_vars): + + if 'limit_to' in start_vars: + limit_string = "LIMIT " + str(start_vars['limit_to']) + else: + limit_string = "" + with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: + if "Geno" in dataset_name: + data_type = "Geno" # ZS: Need to pass back the dataset type + cursor.execute( + "SELECT GenoXRef.GenoId, Geno.Name, " + "GenoXRef.GenoFreezeId FROM Geno, " + "GenoXRef, GenoFreeze WHERE " + "Geno.Id = GenoXRef.GenoId AND " + "GenoXRef.GenoFreezeId = GenoFreeze.Id " + f"AND GenoFreeze.Name = %s {limit_string}", + (dataset_name,)) + + results = cursor.fetchall() + + trait_ids = [result[0] for result in results] + trait_names = [result[1] for result in results] + dataset_id = results[0][2] + return trait_ids, trait_names, data_type, dataset_id + + elif "Publish" in dataset_name or get_group_id(dataset_name): + data_type = "Publish" + dataset_name = dataset_name.replace("Publish", "") + dataset_id = get_group_id(dataset_name) + cursor.execute( + "SELECT PublishXRef.PhenotypeId, " + "PublishXRef.Id, InbredSet.InbredSetCode " + "FROM PublishXRef, InbredSet WHERE " + "PublishXRef.InbredSetId = %s AND " + "InbredSet.Id = PublishXRef.InbredSetId " + f"{limit_string}", + (dataset_id,) + ) + results = cursor.fetchall() + + trait_ids = [result[0] for result in results] + trait_names = [str(result[2]) + "_" + str(result[1]) + for result in results] + + return trait_ids, trait_names, data_type, dataset_id + + else: + data_type = "ProbeSet" + cursor.execute( + "SELECT ProbeSetXRef.ProbeSetId, " + "ProbeSet.Name, ProbeSetXRef.ProbeSetFreezeId " + "FROM ProbeSet, ProbeSetXRef, " + "ProbeSetFreeze WHERE " + "ProbeSet.Id = ProbeSetXRef.ProbeSetId AND " + "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id " + f"AND ProbeSetFreeze.Name = %s {limit_string}", + (dataset_name,) + ) + results = cursor.fetchall() + trait_ids = [result[0] for result in results] + trait_names = [result[1] for result in results] + dataset_id = results[0][2] + return trait_ids, trait_names, data_type, dataset_id + + +def get_samplelist(dataset_name): + group_id = get_group_id_from_dataset(dataset_name) + with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT Strain.Name FROM Strain, StrainXRef " + "WHERE StrainXRef.StrainId = Strain.Id AND " + "StrainXRef.InbredSetId = %s", + (group_id,) + ) + # sample list + return [result[0] for result in cursor.fetchall()] + + +def get_group_id_from_dataset(dataset_name): + result = () + with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: + if "Publish" in dataset_name: + cursor.execute( + "SELECT InbredSet.Id FROM " + "InbredSet, PublishFreeze " + "WHERE PublishFreeze.InbredSetId = InbredSet.Id " + "AND PublishFreeze.Name = %s", + (dataset_name,) + ) + elif "Geno" in dataset_name: + cursor.execute( + "SELECT InbredSet.Id FROM " + "InbredSet, GenoFreeze WHERE " + "GenoFreeze.InbredSetId = InbredSet.Id " + "AND GenoFreeze.Name = %s", + (dataset_name,) + ) + else: + cursor.execute( + "SELECT InbredSet.Id FROM " + "InbredSet, ProbeSetFreeze, " + "ProbeFreeze WHERE " + "ProbeFreeze.InbredSetId = InbredSet.Id " + "AND ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId " + "AND ProbeSetFreeze.Name = %s", + (dataset_name,) + ) + if result := cursor.fetchone(): + return result[0] + return None + + +def get_group_id(group_name): + with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT InbredSet.Id FROM InbredSet " + "WHERE InbredSet.Name = %s", + (group_name,) + ) + if group_id := cursor.fetchone(): + return group_id[0] + return None |