diff options
-rw-r--r-- | wqflask/wqflask/db_info.py | 127 | ||||
-rw-r--r-- | wqflask/wqflask/templates/info_page.html | 92 |
2 files changed, 219 insertions, 0 deletions
diff --git a/wqflask/wqflask/db_info.py b/wqflask/wqflask/db_info.py new file mode 100644 index 00000000..f04e38bf --- /dev/null +++ b/wqflask/wqflask/db_info.py @@ -0,0 +1,127 @@ +import httplib, urllib2 +import re + +from flask import Flask, g + +from utility.logger import getLogger +logger = getLogger(__name__ ) + +class InfoPage(object): + def __init__(self, start_vars): + self.info = None + self.gn_accession_id = None + if 'gn_accession_id' in start_vars: + self.gn_accession_id = start_vars['gn_accession_id'] + self.info_page_name = start_vars['info_page_name'] + + self.get_info() + self.get_datasets_list() + + def get_info(self, create=False): + query_base = ("SELECT InfoPageName, GN_AccesionId, Species.MenuName, Species.TaxonomyId, Tissue.Name, InbredSet.Name, " + + "GeneChip.GeneChipName, GeneChip.GeoPlatform, AvgMethod.Name, Datasets.DatasetName, Datasets.GeoSeries, " + + "Datasets.PublicationTitle, DatasetStatus.DatasetStatusName, Datasets.Summary, Datasets.AboutCases, " + + "Datasets.AboutTissue, Datasets.AboutDataProcessing, Datasets.Acknowledgment, Datasets.ExperimentDesign, " + + "Datasets.Contributors, Datasets.Citation, Datasets.Notes, Investigators.FirstName, Investigators.LastName, " + + "Investigators.Address, Investigators.City, Investigators.State, Investigators.ZipCode, Investigators.Country, " + + "Investigators.Phone, Investigators.Email, Investigators.Url, Organizations.OrganizationName, " + + "InvestigatorId, DatasetId, DatasetStatusId, Datasets.AboutPlatform, InfoFileTitle, Specifics " + + "FROM InfoFiles " + + "LEFT JOIN Species USING (SpeciesId) " + + "LEFT JOIN Tissue USING (TissueId) " + + "LEFT JOIN InbredSet USING (InbredSetId) " + + "LEFT JOIN GeneChip USING (GeneChipId) " + + "LEFT JOIN AvgMethod USING (AvgMethodId) " + + "LEFT JOIN Datasets USING (DatasetId) " + + "LEFT JOIN Investigators USING (InvestigatorId) " + + "LEFT JOIN Organizations USING (OrganizationId) " + + "LEFT JOIN DatasetStatus USING (DatasetStatusId) WHERE ") + + if self.gn_accession_id: + final_query = query_base + "GN_AccesionId = {}".format(self.gn_accession_id) + results = g.db.execute(final_query).fetchone() + if self.info_page_name and not results: + final_query = query_base + "InfoPageName={}".format(self.info_page_name) + elif self.info_page_name: + final_query = query_base + "InfoPageName={}".format(self.info_page_name) + results = g.db.execute(final_query).fetchone() + else: + raise 'No correct parameter found' + + if results: + self.info = process_query_results(results) + + if (not results or len(results) < 1) and self.info_page_name and create: + insert_sql = "INSERT INTO InfoFiles SET InfoFiles.InfoPageName={}".format(self.info_page_name) + return self.get_info() + + if not self.gn_accession_id and self.info: + self.gn_accession_id = self.info['accession_id'] + if not self.info_page_name and self.info: + self.info_page_name = self.info['info_page_name'] + + def get_datasets_list(self): + self.filelist = [] + try: + response = urllib2.urlopen("http://datafiles.genenetwork.org/download/GN%s" % self.gn_accession_id) + data = response.read() + + matches = re.findall(r"<tr>.+?</tr>", data, re.DOTALL) + for i, match in enumerate(matches): + if i == 0: + continue + cells = re.findall(r"<td.+?>.+?</td>", match, re.DOTALL) + full_filename = re.search(r"<a href=\"(.+?)\"", cells[1], re.DOTALL).group(1).strip() + filename = full_filename.split("/")[-1] + filesize = re.search(r">(.+?)<", cells[2]).group(1).strip() + filedate = "N/A" #ZS: Since we can't get it for now + + self.filelist.append([filename, filedate, filesize]) + except Exception, e: + pass + +def process_query_results(results): + info_ob = { + 'info_page_name': results[0], + 'accession_id': results[1], + 'menu_name': results[2], + 'taxonomy_id': results[3], + 'tissue_name': results[4], + 'group_name': results[5], + 'gene_chip_name': results[6], + 'geo_platform': results[7], + 'avg_method_name': results[8], + 'dataset_name': results[9], + 'geo_series': results[10], + 'publication_title': results[11], + 'dataset_status_name': results[12], + 'dataset_summary': results[13], + 'about_cases': results[14], + 'about_tissue': results[15], + 'about_data_processing': results[16], + 'acknowledgement': results[17], + 'experiment_design': results[18], + 'contributors': results[19], + 'citation': results[20], + 'notes': results[21], + 'investigator_firstname': results[22], + 'investigator_lastname': results[23], + 'investigator_address': results[24], + 'investigator_city': results[25], + 'investigator_state': results[26], + 'investigator_zipcode': results[27], + 'investigator_country': results[28], + 'investigator_phone': results[29], + 'investigator_email': results[30], + 'investigator_url': results[31], + 'organization_name': results[32], + 'investigator_id': results[33], + 'dataset_id': results[34], + 'dataset_status_is': results[35], + 'about_platform': results[36], + 'info_file_title': results[37], + 'specifics': results[38] + } + + return info_ob + \ No newline at end of file diff --git a/wqflask/wqflask/templates/info_page.html b/wqflask/wqflask/templates/info_page.html new file mode 100644 index 00000000..d8b7d74c --- /dev/null +++ b/wqflask/wqflask/templates/info_page.html @@ -0,0 +1,92 @@ +{% extends "base.html" %} +{% block title %}Policies{% endblock %} +{% block content %} + +<h1 id="parent-fieldname-title">Data Set Group: {{ info.dataset_name }} +<!--<a href="/infoshare/manager/member-studies-edit.html?DatasetId=%s"><img src="/images/modify.gif" alt="modify this page" border="0" valign="middle"></a>--> +<span style="color:red;">{{ info.info_page_name }}</span> +</h1> +<table border="0" width="100%"> +<tr> +<td valign="top" width="50%"> +<table name="info_table" cellSpacing=0 cellPadding=5 width=100% border=0> + <tr><td><b>Data Set:</b> {{ info.info_file_title }} <!--<a href="/infoshare/manager/member-infofile-edit.html?GN_AccesionId=%s"><img src="/images/modify.gif" alt="modify this page" border="0" valign="middle"></a>--></td></tr> + <tr><td><b>GN Accession:</b> GN{{ gn_accession_id }}</td></tr> + <tr><td><b>GEO Series:</b> <a href="http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc={{ info.geo_series }}" target="_blank">{{ info.geo_series }}</a></td></tr> + <tr><td><b>Title:</b> {{ info.publication_title }}</td></tr> + <tr><td><b>Organism:</b> <a href="http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id={{ info.taxonomy_id }}" target="_blank">{{ info.menu_name }}</a></td></tr> + <tr><td><b>Group:</b> {{ info.group_name }}</td></tr> + <tr><td><b>Tissue:</b> {{ info.tissue_name }}</td></tr> + <tr><td><b>Dataset Status:</b> {{ info.dataset_status_name }}</td></tr> + <tr><td><b>Platforms:</b> <a href="http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc={{ info.geo_platform }}" target="_blank">{{ info.gene_chip_name }}</a></td></tr> + <tr><td><b>Normalization:</b> {{ info.avg_method_name }}</td></tr> +</table> +</td> +<td valign="top" width="50%"> +<table border="0" width="100%"> + <tr> + <td><b>Contact Information</b></td> + </tr> + <tr> + <td> + {{ info.investigator_first_name }} {{ info.inveestigator_last_name }}<br> + {{ info.organization_name }} <br> + {{ info.investigator_address }}<br> + {{ info.investigator_city }}, {{ info.investigator_state }} {{ info.investigator_zipcode }} {{ info.investigator_country }}<br> + Tel. {{ info.investigator_phone }}<br> + {{ info.investigator_email }}<br> + <a href="{{ info.investigator_url }}" target="_blank">Website</a> + </td> + </tr> + +<tr> + <td><b>Download datasets and supplementary data files</b></td> +</tr> +<tr> + <td> + <ul style="line-height: 160%"> + {% for file in filelist %} + <li><a href="http://datafiles.genenetwork.org/download/GN{{ gn_accession_id }}/{{ file[0] }}">{{ file[0] }} ({{ file[2] }})</a></li> + {% endfor %} + </ul> + </td> +</tr> + +<tr><td> +</td></tr> + +</table> +</td> +</tr> +</table> +<HR> +<p> +<table name="info_table" width="100%" border="0" cellpadding="5" cellspacing="0"> +<tr><td><span style="font-size:115%%;font-weight:bold;">Specifics of this Data Set:</span></td></tr> + <tr><td> {{ info.specifics|safe }}<br><br></td></tr> +<tr><td><span style="font-size:115%%;font-weight:bold;">Summary:</span></td></tr> + <tr><td> {{ info.dataset_summary|safe }}<br><br></td></tr> +<tr><td><span style="font-size:115%; font-weight:bold;">About the cases used to generate this set of data:</span></td></tr> + <tr><td> {{ info.about_cases|safe }}<br><br></td></tr> +<tr><td><span style="font-size:115%; font-weight:bold;">About the tissue used to generate this set of data:</span></td></tr> + <tr><td> {{ info.about_tissue|safe }}<br><br></td></tr> + <tr><td><span style="font-size:115%; font-weight:bold;">About the array platform:</span></td></tr> + <tr><td> {{ info.about_platform|safe }}<br><br></td></tr> +<tr><td><span style="font-size:115%; font-weight:bold;">About data values and data processing:</span></td></tr> + <tr><td> {{ info.about_data_processing|safe }}<br><br></td></tr> +<tr><td><span style="font-size:115%; font-weight:bold;">Notes:</span></td></tr> + <tr><td> {{ info.notes|safe }}<br><br></td></tr> +<tr><td><span style="font-size:115%; font-weight:bold;">Experiment Type:</span></td></tr> + <tr><td> {{ info.experiment_design|safe }}<br><br></td></tr> +<tr><td><span style="font-size:115%; font-weight:bold;">Contributor:</span></td></tr> + <tr><td> {{ info.contributors|safe }}<br><br></td></tr> +<tr><td><span style="font-size:115%; font-weight:bold;">Citation:</span></td></tr> + <tr><td> {{ info.citation|safe }}<br><br></td></tr> +<tr><td><span style="font-size:115%; font-weight:bold;">Data source acknowledgment:</span></td></tr> + <tr><td> {{ info.acknowledgement|safe }}<br><br></td></tr> +<tr><td><span style="font-size:115%; font-weight:bold;">Study Id:</span></td></tr> + <tr><td> {{ info.dataset_id }}<br><br></td></tr> +</table> +</p> + +{% endblock %} \ No newline at end of file |