# GN2 API
import os
import io
import csv
import json
import datetime
import requests
from zipfile import ZipFile, ZIP_DEFLATED
import flask
from flask import current_app
from gn2.wqflask.database import database_connection
from flask import request
from flask import make_response
from flask import send_file
from gn2.wqflask import app
from gn2.wqflask.api import correlation, mapping, gen_menu
from gn2.utility.tools import flat_files, get_setting
from gn2.wqflask.database import database_connection
version = "pre1"
@app.route("/api/v_{}/".format(version))
def hello_world():
return flask.jsonify({"hello": "world"})
@app.route("/api/v_{}/species".format(version))
def get_species_list():
species_list = []
with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor:
cursor.execute(
"SELECT SpeciesId, Name, FullName, TaxonomyId FROM Species"
)
for species in cursor.fetchall():
species_dict = {
"Id": species[0],
"Name": species[1],
"FullName": species[2],
"TaxonomyId": species[3]
}
species_list.append(species_dict)
return flask.jsonify(species_list)
@app.route("/api/v_{}/species/<path:species_name>".format(version))
@app.route("/api/v_{}/species/<path:species_name>.<path:file_format>".format(version))
def get_species_info(species_name, file_format="json"):
with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor:
cursor.execute(
"SELECT SpeciesId, Name, FullName, TaxonomyId "
"FROM Species WHERE (Name=%s OR FullName=%s "
"OR SpeciesName=%s)", ((species_name,)*3))
_species = cursor.fetchone()
species_dict = {
"Id": _species[0],
"Name": _species[1],
"FullName": _species[2],
"TaxonomyId": _species[3]
}
return flask.jsonify(species_dict)
@app.route("/api/v_{}/groups".format(version))
@app.route("/api/v_{}/groups/<path:species_name>".format(version))
def get_groups_list(species_name=None):
_groups = ()
with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor:
if species_name:
cursor.execute(
"SELECT InbredSet.InbredSetId, "
"InbredSet.SpeciesId, InbredSet.InbredSetName, "
"InbredSet.Name, InbredSet.FullName, "
"InbredSet.public, IFNULL(InbredSet.MappingMethodId, "
"'None'), IFNULL(InbredSet.GeneticType, 'None') "
"FROM InbredSet, Species WHERE "
"InbredSet.SpeciesId = Species.Id AND "
"(Species.Name = %s OR Species.FullName=%s "
"OR Species.SpeciesName=%s)", ((species_name,) * 3)
)
else:
cursor.execute(
"SELECT InbredSet.InbredSetId, "
"InbredSet.SpeciesId, InbredSet.InbredSetName, "
"InbredSet.Name, InbredSet.FullName, "
"InbredSet.public, IFNULL(InbredSet.MappingMethodId, "
"'None'), IFNULL(InbredSet.GeneticType, 'None') "
"FROM InbredSet"
)
_groups = cursor.fetchall()
if _groups:
groups_list = []
for group in _groups:
group_dict = {
"Id": group[0],
"SpeciesId": group[1],
"DisplayName": group[2],
"Name": group[3],
"FullName": group[4],
"public": group[5],
"MappingMethodId": group[6],
"GeneticType": group[7]
}
groups_list.append(group_dict)
return flask.jsonify(groups_list)
return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
@app.route("/api/v_{}/group/<path:group_name>".format(version))
@app.route("/api/v_{}/group/<path:group_name>.<path:file_format>".format(version))
@app.route("/api/v_{}/group/<path:species_name>/<path:group_name>".format(version))
@app.route("/api/v_{}/group/<path:species_name>/<path:group_name>.<path:file_format>".format(version))
def get_group_info(group_name, species_name=None, file_format="json"):
group = tuple()
with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor:
if species_name:
cursor.execute(
"SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, "
"InbredSet.InbredSetName, InbredSet.Name, "
"InbredSet.FullName, InbredSet.public, "
"IFNULL(InbredSet.MappingMethodId, 'None'), "
"IFNULL(InbredSet.GeneticType, 'None') "
"FROM InbredSet, Species WHERE "
"InbredSet.SpeciesId = Species.Id "
"AND (InbredSet.InbredSetName = %s OR "
"InbredSet.Name = %s OR InbredSet.FullName = %s) "
"AND (Species.Name = %s OR "
"Species.FullName = %s OR Species.SpeciesName = %s)",
(*(group_name,)*3, *(species_name,)*3)
)
else:
cursor.execute(
"SELECT InbredSet.InbredSetId, InbredSet.SpeciesId, "
"InbredSet.InbredSetName, InbredSet.Name, "
"InbredSet.FullName, InbredSet.public, "
"IFNULL(InbredSet.MappingMethodId, 'None'), "
"IFNULL(InbredSet.GeneticType, 'None') "
"FROM InbredSet WHERE "
"(InbredSet.InbredSetName = %s OR "
"InbredSet.Name = %s OR "
"InbredSet.FullName = %s)",
((group_name,)*3)
)
group = cursor.fetchone()
if group:
group_dict = {
"Id": group[0],
"SpeciesId": group[1],
"DisplayName": group[2],
"Name": group[3],
"FullName": group[4],
"public": group[5],
"MappingMethodId": group[6],
"GeneticType": group[7]
}
return flask.jsonify(group_dict)
else:
return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
@app.route("/api/v_{}/datasets/<path:group_name>".format(version))
@app.route("/api/v_{}/datasets/<path:species_name>/<path:group_name>".format(version))
def get_datasets_for_group(group_name, species_name=None):
_datasets = ()
with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor:
if species_name:
cursor.execute(
"SELECT ProbeSetFreeze.Id, ProbeSetFreeze.ProbeFreezeId, "
"ProbeSetFreeze.AvgID, ProbeSetFreeze.Name, "
"ProbeSetFreeze.Name2, "
"ProbeSetFreeze.FullName, ProbeSetFreeze.ShortName, "
"ProbeSetFreeze.CreateTime, ProbeSetFreeze.public, "
"ProbeSetFreeze.confidentiality, ProbeSetFreeze.DataScale "
"FROM ProbeSetFreeze, ProbeFreeze, InbredSet, Species "
"WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id "
"AND ProbeFreeze.InbredSetId = InbredSet.Id "
"AND (InbredSet.Name = %s OR "
"InbredSet.InbredSetName = %s OR "
"InbredSet.FullName = %s) AND "
"InbredSet.SpeciesId = Species.Id AND "
"(Species.SpeciesName = %s OR "
"Species.MenuName = %s OR Species.FullName = %s);",
(*(group_name,)*3, *(species_name)*3)
)
else:
cursor.execute(
"SELECT ProbeSetFreeze.Id, ProbeSetFreeze.ProbeFreezeId, "
"ProbeSetFreeze.AvgID, ProbeSetFreeze.Name, "
"ProbeSetFreeze.Name2, ProbeSetFreeze.FullName, "
"ProbeSetFreeze.ShortName, ProbeSetFreeze.CreateTime, "
"ProbeSetFreeze.public, ProbeSetFreeze.confidentiality, "
"ProbeSetFreeze.DataScale FROM ProbeSetFreeze, "
"ProbeFreeze, InbredSet WHERE "
"ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id "
"AND ProbeFreeze.InbredSetId = InbredSet.Id "
"AND (InbredSet.Name = %s OR "
"InbredSet.InbredSetName = %s OR "
"InbredSet.FullName = %s)",
((group_name,) * 3)
)
_datasets = cursor.fetchall()
if _datasets:
datasets_list = []
for dataset in _datasets:
dataset_dict = {
"Id": dataset[0],
"ProbeFreezeId": dataset[1],
"AvgID": dataset[2],
"Short_Abbreviation": dataset[3],
"Long_Abbreviation": dataset[4],
"FullName": dataset[5],
"ShortName": dataset[6],
"CreateTime": dataset[7],
"public": dataset[8],
"confidentiality": dataset[9],
"DataScale": dataset[10]
}
datasets_list.append(dataset_dict)
return flask.jsonify(datasets_list)
else:
return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
@app.route("/api/v_{}/dataset/<path:dataset_name>".format(version))
@app.route("/api/v_{}/dataset/<path:dataset_name>.<path:file_format>".format(version))
@app.route("/api/v_{}/dataset/<path:group_name>/<path:dataset_name>".format(version))
@app.route("/api/v_{}/dataset/<path:group_name>/<path:dataset_name>.<path:file_format>".format(version))
def get_dataset_info(dataset_name, group_name=None, file_format="json"):
# ZS: First get ProbeSet (mRNA expression) datasets and then get Phenotype datasets
# ZS: I figure I might as well return a list if there are multiple
# matches, though I don"t know if this will actually happen in
# practice
datasets_list, dataset_dict = [], {}
probeset_query = """
SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName,
ProbeSetFreeze.ShortName, ProbeSetFreeze.DataScale, ProbeFreeze.TissueId,
Tissue.Name, ProbeSetFreeze.public, ProbeSetFreeze.confidentiality
FROM ProbeSetFreeze, ProbeFreeze, Tissue
"""
where_statement = """
WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND
ProbeFreeze.TissueId = Tissue.Id AND
ProbeSetFreeze.public > 0 AND
ProbeSetFreeze.confidentiality < 1 AND
"""
if dataset_name.isdigit():
where_statement += """
ProbeSetFreeze.Id = "{}"
""".format(dataset_name)
else:
where_statement += """
(ProbeSetFreeze.Name = "{0}" OR ProbeSetFreeze.Name2 = "{0}" OR
ProbeSetFreeze.FullName = "{0}" OR ProbeSetFreeze.ShortName = "{0}")
""".format(dataset_name)
with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor:
cursor.execute(f"{probeset_query}{where_statement}")
if dataset := cursor.fetchone():
dataset_dict = {
"dataset_type": "mRNA expression",
"id": dataset[0],
"name": dataset[1],
"full_name": dataset[2],
"short_name": dataset[3],
"data_scale": dataset[4],
"tissue_id": dataset[5],
"tissue": dataset[6],
"public": dataset[7],
"confidential": dataset[8]
}
datasets_list.append(dataset_dict)
if group_name:
cursor.execute(
"SELECT PublishXRef.Id, "
"Phenotype.Post_publication_abbreviation, "
"Phenotype.Post_publication_description, "
"Phenotype.Pre_publication_abbreviation, "
"Phenotype.Pre_publication_description, "
"Publication.PubMed_ID, Publication.Title, "
"Publication.Year FROM PublishXRef, Phenotype, "
"Publication, InbredSet, PublishFreeze WHERE "
"PublishXRef.InbredSetId = InbredSet.Id "
"AND PublishXRef.PhenotypeId = Phenotype.Id "
"AND PublishXRef.PublicationId = Publication.Id "
"AND PublishFreeze.InbredSetId = InbredSet.Id "
"AND PublishFreeze.public > 0 AND "
"PublishFreeze.confidentiality < 1 "
"AND InbredSet.Name = %s AND PublishXRef.Id = %s",
(group_name, dataset_name,)
)
if dataset := cursor.fetchone():
if dataset[5]:
dataset_dict = {
"dataset_type": "phenotype",
"id": dataset[0],
"name": dataset[1],
"description": dataset[2],
"pubmed_id": dataset[5],
"title": dataset[6],
"year": dataset[7]
}
elif dataset[4]:
dataset_dict = {
"dataset_type": "phenotype",
"id": dataset[0],
"name": dataset[3],
"description": dataset[4]
}
else:
dataset_dict = {
"dataset_type": "phenotype",
"id": dataset[0]
}
datasets_list.append(dataset_dict)
if len(datasets_list) > 1:
return flask.jsonify(datasets_list)
elif len(datasets_list) == 1:
return flask.jsonify(dataset_dict)
else:
return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
@app.route("/api/v_{}/traits/<path:dataset_name>".format(version), methods=("GET",))
@app.route("/api/v_{}/traits/<path:dataset_name>.<path:file_format>".format(version), methods=("GET",))
def fetch_traits(dataset_name, file_format="json"):
trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids(
dataset_name, request.args)
if ("ids_only" in request.args) and (len(trait_ids) > 0):
if file_format == "json":
filename = dataset_name + "_trait_ids.json"
return flask.jsonify(trait_ids)
else:
filename = dataset_name + "_trait_ids.csv"
si = io.StringIO()
csv_writer = csv.writer(si)
csv_writer.writerows([[trait_id] for trait_id in trait_ids])
output = make_response(si.getvalue())
output.headers["Content-Disposition"] = "attachment; filename=" + filename
output.headers["Content-type"] = "text/csv"
return output
elif ("names_only" in request.args) and (len(trait_ids) > 0):
if file_format == "json":
filename = dataset_name + "_trait_names.json"
return flask.jsonify(trait_names)
else:
filename = dataset_name + "_trait_names.csv"
si = io.StringIO()
csv_writer = csv.writer(si)
csv_writer.writerows([[trait_name] for trait_name in trait_names])
output = make_response(si.getvalue())
output.headers["Content-Disposition"] = "attachment; filename=" + filename
output.headers["Content-type"] = "text/csv"
return output
else:
if len(trait_ids) > 0:
if data_type == "ProbeSet":
query = """
SELECT DISTINCT
ProbeSet.`Id`, ProbeSet.`Name`, ProbeSet.`Symbol`, ProbeSet.`description`,
ProbeSet.`Chr`, ProbeSet.`Mb`, ProbeSet.`alias`, ProbeSetXRef.`mean`,
ProbeSetXRef.`se`, ProbeSetXRef.`Locus`, ProbeSetXRef.`LRS`,
ProbeSetXRef.`pValue`, ProbeSetXRef.`additive`, ProbeSetXRef.`h2`,
Geno.`Chr`, Geno.`Mb`
FROM
Species
INNER JOIN InbredSet ON InbredSet.`SpeciesId`= Species.`Id`
INNER JOIN ProbeFreeze ON ProbeFreeze.`InbredSetId` = InbredSet.`Id`
INNER JOIN Tissue ON ProbeFreeze.`TissueId` = Tissue.`Id`
INNER JOIN ProbeSetFreeze ON ProbeSetFreeze.`ProbeFreezeId` = ProbeFreeze.`Id`
INNER JOIN ProbeSetXRef ON ProbeSetXRef.`ProbeSetFreezeId` = ProbeSetFreeze.`Id`
INNER JOIN ProbeSet ON ProbeSet.`Id` = ProbeSetXRef.`ProbeSetId`
LEFT JOIN Geno ON ProbeSetXRef.`Locus` = Geno.`Name` AND Geno.`SpeciesId` = Species.`Id`
WHERE
ProbeSetXRef.ProbeSetFreezeId = "{0}"
ORDER BY
ProbeSet.Id"""
field_list = ["Id", "Name", "Symbol", "Description", "Chr", "Mb", "Aliases", "Mean",
"SE", "Locus", "LRS", "P-Value", "Additive", "h2", "Peak Chr", "Peak Mb"]
elif data_type == "Geno":
query = """
SELECT
Geno.Id, Geno.Name, Geno.Marker_Name, Geno.Chr, Geno.Mb, Geno.Sequence, Geno.Source
FROM
Geno, GenoXRef, GenoFreeze
WHERE
GenoXRef.GenoFreezeId = "{0}" AND
GenoXRef.GenoId = Geno.Id AND
GenoXRef.GenoFreezeId = GenoFreeze.Id AND
GenoFreeze.public > 0 AND
GenoFreeze.confidentiality < 1
ORDER BY
Geno.Id
"""
field_list = ["Id", "Name", "Marker_Name",
"Chr", "Mb", "Sequence", "Source"]
else:
query = """SELECT PublishXRef.Id,
Phenotype.`Original_description`,
Publication.`Authors`,
Publication.`Year`,
Publication.`PubMed_ID`,
PublishXRef.`mean`,
PublishXRef.`LRS`,
PublishXRef.`additive`,
PublishXRef.`Locus`,
Geno.`Chr`, Geno.`Mb`
FROM Species
INNER JOIN InbredSet ON InbredSet.`SpeciesId` = Species.`Id`
INNER JOIN PublishXRef ON PublishXRef.`InbredSetId` = InbredSet.`Id`
INNER JOIN PublishFreeze ON PublishFreeze.`InbredSetId` = InbredSet.`Id`
INNER JOIN Publication ON Publication.`Id` = PublishXRef.`PublicationId`
INNER JOIN Phenotype ON Phenotype.`Id` = PublishXRef.`PhenotypeId`
LEFT JOIN Geno ON PublishXRef.Locus = Geno.Name AND Geno.SpeciesId = Species.Id
WHERE
PublishXRef.InbredSetId = {0} AND
PublishFreeze.InbredSetId = PublishXRef.InbredSetId AND
PublishFreeze.public > 0 AND
PublishFreeze.confidentiality < 1
ORDER BY
PublishXRef.Id"""
field_list = ["Id", "Description", "Authors", "Year", "PubMedID", "Mean",
"LRS", "Additive", "Locus", "Chr", "Mb"]
if 'limit_to' in request.args:
limit_number = request.args['limit_to']
query += "LIMIT " + str(limit_number)
with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor:
if file_format == "json":
filename = dataset_name + "_traits.json"
cursor.execute(query.format(dataset_id))
result_list = []
for result in cursor.fetchall():
trait_dict = {}
for i, field in enumerate(field_list):
if result[i]:
trait_dict[field] = result[i]
result_list.append(trait_dict)
return flask.jsonify(result_list)
elif file_format == "csv":
filename = dataset_name + "_traits.csv"
results_list = []
header_list = []
header_list += field_list
results_list.append(header_list)
cursor.execute(query.format(dataset_id))
for result in cursor.fetchall():
results_list.append(result)
si = io.StringIO()
csv_writer = csv.writer(si)
csv_writer.writerows(results_list)
output = make_response(si.getvalue())
output.headers["Content-Disposition"] = "attachment; filename=" + filename
output.headers["Content-type"] = "text/csv"
return output
else:
return return_error(
code=400,
source=request.url_rule.rule,
title="Invalid Output Format",
details="Current formats available are JSON and CSV, with CSV as default"
)
else:
return return_error(
code=204,
source=request.url_rule.rule,
title="No Results",
details="")
@app.route("/api/v_{}/sample_data/<path:dataset_name>".format(version))
@app.route("/api/v_{}/sample_data/<path:dataset_name>.<path:file_format>".format(version))
def all_sample_data(dataset_name, file_format="csv"):
trait_ids, trait_names, data_type, dataset_id = get_dataset_trait_ids(
dataset_name, request.args)
if len(trait_ids) > 0:
sample_list = get_samplelist(dataset_name)
if data_type == "ProbeSet":
query = """
SELECT
Strain.Name, Strain.Name2, ProbeSetData.value, ProbeSetData.Id, ProbeSetSE.error
FROM
(ProbeSetData, Strain, ProbeSetXRef, ProbeSetFreeze)
LEFT JOIN ProbeSetSE ON
(ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
WHERE
ProbeSetXRef.ProbeSetFreezeId = "{0}" AND
ProbeSetXRef.ProbeSetId = "{1}" AND
ProbeSetXRef.DataId = ProbeSetData.Id AND
ProbeSetData.StrainId = Strain.Id AND
ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
ProbeSetFreeze.public > 0 AND
ProbeSetFreeze.confidentiality < 1
ORDER BY
Strain.Name
"""
elif data_type == "Geno":
query = """
SELECT
Strain.Name, Strain.Name2, GenoData.value, GenoData.Id, GenoSE.error
FROM
(GenoData, Strain, GenoXRef, GenoFreeze)
LEFT JOIN GenoSE ON
(GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId)
WHERE
GenoXRef.GenoFreezeId = "{0}" AND
GenoXRef.GenoId = "{1}" AND
GenoXRef.DataId = GenoData.Id AND
GenoData.StrainId = Strain.Id AND
GenoXRef.GenoFreezeId = GenoFreeze.Id AND
GenoFreeze.public > 0 AND
GenoFreeze.confidentiality < 1
ORDER BY
Strain.Name
"""
else:
query = """
SELECT
Strain.Name, Strain.Name2, PublishData.value, PublishData.Id, PublishSE.error, NStrain.count
FROM
(PublishData, Strain, PublishXRef, PublishFreeze)
LEFT JOIN PublishSE ON
(PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId)
LEFT JOIN NStrain ON
(NStrain.DataId = PublishData.Id AND
NStrain.StrainId = PublishData.StrainId)
WHERE
PublishXRef.InbredSetId = "{0}" AND
PublishXRef.PhenotypeId = "{1}" AND
PublishData.Id = PublishXRef.DataId AND
PublishData.StrainId = Strain.Id AND
PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
PublishFreeze.public > 0 AND
PublishFreeze.confidentiality < 1
ORDER BY
Strain.Name
"""
if file_format == "csv":
filename = dataset_name + "_sample_data.csv"
results_list = []
header_list = []
header_list.append("id")
header_list += sample_list
results_list.append(header_list)
with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor:
for i, trait_id in enumerate(trait_ids):
line_list = []
line_list.append(str(trait_names[i]))
cursor.execute(query.format(dataset_id, trait_id))
results = cursor.fetchall()
results_dict = {}
for item in results:
results_dict[item[0]] = item[2]
for sample in sample_list:
if sample in results_dict:
line_list.append(results_dict[sample])
else:
line_list.append("x")
results_list.append(line_list)
results_list = list(map(list, zip(*results_list)))
si = io.StringIO()
csv_writer = csv.writer(si)
csv_writer.writerows(results_list)
output = make_response(si.getvalue())
output.headers["Content-Disposition"] = "attachment; filename=" + filename
output.headers["Content-type"] = "text/csv"
return output
else:
return return_error(code=415, source=request.url_rule.rule, title="Unsupported file format", details="")
else:
return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
@app.route("/api/v_{}/sample_data/<path:dataset_name>/<path:trait_name>".format(version))
@app.route("/api/v_{}/sample_data/<path:dataset_name>/<path:trait_name>.<path:file_format>".format(version))
def trait_sample_data(dataset_name, trait_name, file_format="json"):
with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor:
cursor.execute(
"SELECT Strain.Name, Strain.Name2, "
"ProbeSetData.value, ProbeSetData.Id, "
"ProbeSetSE.error FROM (ProbeSetData, "
"ProbeSetFreeze, Strain, ProbeSet, "
"ProbeSetXRef) LEFT JOIN ProbeSetSE ON "
"(ProbeSetSE.DataId = ProbeSetData.Id AND "
"ProbeSetSE.StrainId = ProbeSetData.StrainId) "
"WHERE ProbeSet.Name = %s AND "
"ProbeSetXRef.ProbeSetId = ProbeSet.Id "
"AND ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id "
"AND ProbeSetFreeze.Name = %s AND "
"ProbeSetXRef.DataId = ProbeSetData.Id "
"AND ProbeSetData.StrainId = Strain.Id "
"ORDER BY Strain.Name",
(trait_name, dataset_name,)
)
sample_data = cursor.fetchall()
if len(sample_data) > 0:
sample_list = []
for sample in sample_data:
sample_dict = {
"sample_name": sample[0],
"sample_name_2": sample[1],
"value": sample[2],
"data_id": sample[3],
}
if sample[4]:
sample_dict["se"] = sample[4]
sample_list.append(sample_dict)
return flask.jsonify(sample_list)
else:
if not dataset_name.isdigit():
group_id = get_group_id(dataset_name)
if group_id:
dataset_or_group = group_id
else:
dataset_or_group = dataset_name
else:
dataset_or_group = dataset_name
cursor.execute(
"SELECT DISTINCT Strain.Name, Strain.Name2, "
"PublishData.value, PublishData.Id, PublishSE.error, "
"NStrain.count FROM (PublishData, Strain, "
"PublishXRef, PublishFreeze) LEFT JOIN "
"PublishSE ON (PublishSE.DataId = PublishData.Id "
"AND PublishSE.StrainId = PublishData.StrainId) "
"LEFT JOIN NStrain ON "
"(NStrain.DataId = PublishData.Id AND "
"NStrain.StrainId = PublishData.StrainId) "
"WHERE PublishXRef.InbredSetId = PublishFreeze.InbredSetId "
"AND PublishData.Id = PublishXRef.DataId AND "
"PublishXRef.Id = %s AND (PublishFreeze.Id = %s "
"OR PublishFreeze.Name = %s OR "
"PublishFreeze.ShortName = %s OR "
"PublishXRef.InbredSetId = %s) AND "
"PublishData.StrainId = Strain.Id "
"ORDER BY Strain.Name",
(trait_name, *(dataset_or_group,)*4)
)
if len(sample_data := cursor.fetchall()) > 0:
sample_list = []
for sample in sample_data:
sample_dict = {
"sample_name": sample[0],
"sample_name_2": sample[1],
"value": sample[2],
"data_id": sample[3]
}
if sample[4]:
sample_dict["se"] = sample[4]
if sample[5]:
sample_dict["n_cases"] = sample[5]
sample_list.append(sample_dict)
return flask.jsonify(sample_list)
else:
return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
@app.route("/api/v_{}/trait/<path:dataset_name>/<path:trait_name>".format(version))
@app.route("/api/v_{}/trait/<path:dataset_name>/<path:trait_name>.<path:file_format>".format(version))
@app.route("/api/v_{}/trait_info/<path:dataset_name>/<path:trait_name>".format(version))
@app.route("/api/v_{}/trait_info/<path:dataset_name>/<path:trait_name>.<path:file_format>".format(version))
def get_trait_info(dataset_name, trait_name, file_format="json"):
with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor:
cursor.execute(
"SELECT ProbeSet.Id, ProbeSet.Name, ProbeSet.Symbol, "
"ProbeSet.description, ProbeSet.Chr, ProbeSet.Mb, "
"ProbeSet.alias, ProbeSetXRef.mean, ProbeSetXRef.se, "
"ProbeSetXRef.Locus, ProbeSetXRef.LRS, "
"ProbeSetXRef.pValue, ProbeSetXRef.additive "
"FROM ProbeSet, ProbeSetXRef, ProbeSetFreeze "
"WHERE ProbeSet.Name = %s AND "
"ProbeSetXRef.ProbeSetId = ProbeSet.Id AND "
"ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id "
"AND ProbeSetFreeze.Name = %s",
(trait_name, dataset_name,)
)
if trait_info := cursor.fetchone():
trait_dict = {
"id": trait_info[0],
"name": trait_info[1],
"symbol": trait_info[2],
"description": trait_info[3],
"chr": trait_info[4],
"mb": trait_info[5],
"alias": trait_info[6],
"mean": trait_info[7],
"se": trait_info[8],
"locus": trait_info[9],
"lrs": trait_info[10],
"p_value": trait_info[11],
"additive": trait_info[12]
}
return flask.jsonify(trait_dict)
else:
# ZS: Check if the user input the dataset_name as BXDPublish, etc (which is always going to be the group name + "Publish"
if "Publish" in dataset_name:
dataset_name = dataset_name.replace("Publish", "")
group_id = get_group_id(dataset_name)
cursor.execute(
"SELECT PublishXRef.PhenotypeId, "
"PublishXRef.Locus, PublishXRef.LRS, "
"PublishXRef.additive FROM "
"PublishXRef WHERE "
"PublishXRef.Id = %s AND "
"PublishXRef.InbredSetId = %s",
(trait_name, group_id,)
)
if trait_info := cursor.fetchone():
trait_dict = {
"id": trait_info[0],
"locus": trait_info[1],
"lrs": trait_info[2],
"additive": trait_info[3]
}
return flask.jsonify(trait_dict)
else:
return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
@app.route("/api/v_{}/correlation".format(version), methods=("GET",))
def get_corr_results():
results = correlation.do_correlation(request.args)
if len(results) > 0:
# ZS: I think flask.jsonify expects a dict/list instead of JSON
return flask.jsonify(results)
else:
return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
@app.route("/api/v_{}/mapping".format(version), methods=("GET",))
def get_mapping_results():
results, format = mapping.do_mapping_for_api(request.args)
if len(results) > 0:
if format == "csv":
filename = "mapping_" + datetime.datetime.utcnow().strftime("%b_%d_%Y_%I:%M%p") + ".csv"
si = io.StringIO()
csv_writer = csv.writer(si)
csv_writer.writerows(results)
output = make_response(si.getvalue())
output.headers["Content-Disposition"] = "attachment; filename=" + filename
output.headers["Content-type"] = "text/csv"
return output
elif format == "json":
return flask.jsonify(results)
else:
return return_error(code=415, source=request.url_rule.rule, title="Unsupported Format", details="")
else:
return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
@app.route("/api/v_{}/genotypes/view/<string:group_name>".format(version))
def view_genotype_files(group_name):
if os.path.isfile("{0}/{1}.json".format(flat_files("genotype"), group_name)):
with open("{0}/{1}.json".format(flat_files("genotype"), group_name)) as geno_json:
return flask.jsonify(json.load(geno_json))
@app.route("/api/v_{}/genotypes/<string:file_format>/<string:group_name>/<string:dataset_name>.zip".format(version))
@app.route("/api/v_{}/genotypes/<string:file_format>/<string:group_name>/<string:dataset_name>".format(version))
@app.route("/api/v_{}/genotypes/<string:file_format>/<string:group_name>.zip".format(version))
@app.route("/api/v_{}/genotypes/<string:file_format>/<string:group_name>".format(version))
@app.route("/api/v_{}/genotypes/<string:group_name>.<string:file_format>".format(version))
def get_genotypes(group_name, file_format="csv", dataset_name=None):
limit_num = None
if 'limit_to' in request.args:
if request.args['limit_to'].isdigit():
limit_num = int(request.args['limit_to'])
si = io.StringIO()
if file_format == "csv" or file_format == "geno":
filename = group_name + ".geno"
if os.path.isfile("{0}/{1}.geno".format(flat_files("genotype"), group_name)):
output_lines = []
with open("{0}/{1}.geno".format(flat_files("genotype"), group_name)) as genofile:
i = 0
for line in genofile:
if line[0] == "#" or line[0] == "@":
output_lines.append([line.strip()])
else:
if limit_num and i >= limit_num:
break
output_lines.append(line.split())
i += 1
csv_writer = csv.writer(
si, delimiter="\t", escapechar="\\", quoting=csv.QUOTE_NONE)
else:
return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
elif file_format == "rqtl2":
memory_file = io.BytesIO()
if dataset_name:
filename = dataset_name
else:
filename = group_name
if os.path.isfile("{0}/{1}_geno.csv".format(flat_files("genotype/rqtl2"), group_name)):
yaml_file = json.load(
open("{0}/{1}.json".format(flat_files("genotype/rqtl2"), group_name)))
yaml_file["geno"] = filename + "_geno.csv"
yaml_file["gmap"] = filename + "_gmap.csv"
yaml_file["pheno"] = filename + "_pheno.csv"
config_file = [filename + ".json", json.dumps(yaml_file)]
#config_file = [filename + ".yaml", open("{0}/{1}.yaml".format(flat_files("genotype/rqtl2"), group_name))]
geno_file = [filename + "_geno.csv",
open("{0}/{1}_geno.csv".format(flat_files("genotype/rqtl2"), group_name))]
gmap_file = [filename + "_gmap.csv",
open("{0}/{1}_gmap.csv".format(flat_files("genotype/rqtl2"), group_name))]
if dataset_name:
phenotypes = requests.get(
"http://gn2.genenetwork.org/api/v_pre1/sample_data/" + dataset_name)
else:
phenotypes = requests.get(
"http://gn2.genenetwork.org/api/v_pre1/sample_data/" + group_name + "Publish")
with ZipFile(memory_file, 'w', compression=ZIP_DEFLATED) as zf:
zf.writestr(config_file[0], config_file[1])
for this_file in [geno_file, gmap_file]:
zf.writestr(this_file[0], this_file[1].read())
zf.writestr(filename + "_pheno.csv", phenotypes.content)
memory_file.seek(0)
return send_file(memory_file, attachment_filename=filename + ".zip", as_attachment=True)
else:
return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
else:
filename = group_name + ".bimbam"
if os.path.isfile("{0}/{1}.geno".format(flat_files("genotype"), group_name)):
output_lines = []
with open("{0}/{1}_geno.txt".format(flat_files("genotype/bimbam"), group_name)) as genofile:
i = 0
for line in genofile:
if limit_num and i >= limit_num:
break
output_lines.append([line.strip()
for line in line.split(",")])
i += 1
csv_writer = csv.writer(si, delimiter=",")
else:
return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
csv_writer.writerows(output_lines)
output = make_response(si.getvalue())
output.headers["Content-Disposition"] = "attachment; filename=" + filename
output.headers["Content-type"] = "text/csv"
return output
@app.route("/api/v_{}/gen_dropdown".format(version), methods=("GET",))
def gen_dropdown_menu():
with database_connection(get_setting("SQL_URI")) as conn:
results = gen_menu.gen_dropdown_json(conn)
if len(results) > 0:
return flask.jsonify(results)
else:
return return_error(code=500, source=request.url_rule.rule, title="Some error occurred", details="")
def return_error(code, source, title, details):
json_ob = {"errors": [
{
"status": code,
"source": {"pointer": source},
"title": title,
"detail": details
}
]}
return flask.jsonify(json_ob)
def get_dataset_trait_ids(dataset_name, start_vars):
if 'limit_to' in start_vars:
limit_string = "LIMIT " + str(start_vars['limit_to'])
else:
limit_string = ""
with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor:
if "Geno" in dataset_name:
data_type = "Geno" # ZS: Need to pass back the dataset type
cursor.execute(
"SELECT GenoXRef.GenoId, Geno.Name, "
"GenoXRef.GenoFreezeId FROM Geno, "
"GenoXRef, GenoFreeze WHERE "
"Geno.Id = GenoXRef.GenoId AND "
"GenoXRef.GenoFreezeId = GenoFreeze.Id "
f"AND GenoFreeze.Name = %s {limit_string}",
(dataset_name,))
results = cursor.fetchall()
trait_ids = [result[0] for result in results]
trait_names = [result[1] for result in results]
dataset_id = results[0][2]
return trait_ids, trait_names, data_type, dataset_id
elif "Publish" in dataset_name or get_group_id(dataset_name):
data_type = "Publish"
dataset_name = dataset_name.replace("Publish", "")
dataset_id = get_group_id(dataset_name)
cursor.execute(
"SELECT PublishXRef.PhenotypeId, "
"PublishXRef.Id, InbredSet.InbredSetCode "
"FROM PublishXRef, InbredSet WHERE "
"PublishXRef.InbredSetId = %s AND "
"InbredSet.Id = PublishXRef.InbredSetId "
f"{limit_string}",
(dataset_id,)
)
results = cursor.fetchall()
trait_ids = [result[0] for result in results]
trait_names = [str(result[2]) + "_" + str(result[1])
for result in results]
return trait_ids, trait_names, data_type, dataset_id
else:
data_type = "ProbeSet"
cursor.execute(
"SELECT ProbeSetXRef.ProbeSetId, "
"ProbeSet.Name, ProbeSetXRef.ProbeSetFreezeId "
"FROM ProbeSet, ProbeSetXRef, "
"ProbeSetFreeze WHERE "
"ProbeSet.Id = ProbeSetXRef.ProbeSetId AND "
"ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id "
f"AND ProbeSetFreeze.Name = %s {limit_string}",
(dataset_name,)
)
results = cursor.fetchall()
trait_ids = [result[0] for result in results]
trait_names = [result[1] for result in results]
dataset_id = results[0][2]
return trait_ids, trait_names, data_type, dataset_id
def get_samplelist(dataset_name):
group_id = get_group_id_from_dataset(dataset_name)
with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor:
cursor.execute(
"SELECT Strain.Name FROM Strain, StrainXRef "
"WHERE StrainXRef.StrainId = Strain.Id AND "
"StrainXRef.InbredSetId = %s",
(group_id,)
)
# sample list
return [result[0] for result in cursor.fetchall()]
def get_group_id_from_dataset(dataset_name):
result = ()
with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor:
if "Publish" in dataset_name:
cursor.execute(
"SELECT InbredSet.Id FROM "
"InbredSet, PublishFreeze "
"WHERE PublishFreeze.InbredSetId = InbredSet.Id "
"AND PublishFreeze.Name = %s",
(dataset_name,)
)
elif "Geno" in dataset_name:
cursor.execute(
"SELECT InbredSet.Id FROM "
"InbredSet, GenoFreeze WHERE "
"GenoFreeze.InbredSetId = InbredSet.Id "
"AND GenoFreeze.Name = %s",
(dataset_name,)
)
else:
cursor.execute(
"SELECT InbredSet.Id FROM "
"InbredSet, ProbeSetFreeze, "
"ProbeFreeze WHERE "
"ProbeFreeze.InbredSetId = InbredSet.Id "
"AND ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId "
"AND ProbeSetFreeze.Name = %s",
(dataset_name,)
)
if result := cursor.fetchone():
return result[0]
return None
def get_group_id(group_name):
with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor:
cursor.execute(
"SELECT InbredSet.Id FROM InbredSet "
"WHERE InbredSet.Name = %s",
(group_name,)
)
if group_id := cursor.fetchone():
return group_id[0]
return None