diff options
author | zsloan | 2020-09-11 16:14:02 -0500 |
---|---|---|
committer | zsloan | 2020-09-11 16:14:02 -0500 |
commit | 9f458b92aa28ac2d00b9bf822c516d0545a79bc9 (patch) | |
tree | 101ddd7906f0d65e08eb08de0b5920a410c322b2 /wqflask | |
parent | 03240c4d22e894b977f7a1a2204a32408245c470 (diff) | |
download | genenetwork2-9f458b92aa28ac2d00b9bf822c516d0545a79bc9.tar.gz |
Wrote code and template for reproducing GN1's Info Page (page that
displays info about a dataset + download links for its data)
* wqflask/wqflask/db_info.py - Python that pulls the info out of the DB
and parses the download links from the ipfs response
* wqflask/wqflask/templates/info_page.html - template for displaying
db info; need to make a lot of changes to its format/aesthetics
Diffstat (limited to 'wqflask')
-rw-r--r-- | wqflask/wqflask/db_info.py | 127 | ||||
-rw-r--r-- | wqflask/wqflask/templates/info_page.html | 92 |
2 files changed, 219 insertions, 0 deletions
diff --git a/wqflask/wqflask/db_info.py b/wqflask/wqflask/db_info.py new file mode 100644 index 00000000..f04e38bf --- /dev/null +++ b/wqflask/wqflask/db_info.py @@ -0,0 +1,127 @@ +import httplib, urllib2
+import re
+
+from flask import Flask, g
+
+from utility.logger import getLogger
+logger = getLogger(__name__ )
+
+class InfoPage(object):
+ def __init__(self, start_vars):
+ self.info = None
+ self.gn_accession_id = None
+ if 'gn_accession_id' in start_vars:
+ self.gn_accession_id = start_vars['gn_accession_id']
+ self.info_page_name = start_vars['info_page_name']
+
+ self.get_info()
+ self.get_datasets_list()
+
+ def get_info(self, create=False):
+ query_base = ("SELECT InfoPageName, GN_AccesionId, Species.MenuName, Species.TaxonomyId, Tissue.Name, InbredSet.Name, " +
+ "GeneChip.GeneChipName, GeneChip.GeoPlatform, AvgMethod.Name, Datasets.DatasetName, Datasets.GeoSeries, " +
+ "Datasets.PublicationTitle, DatasetStatus.DatasetStatusName, Datasets.Summary, Datasets.AboutCases, " +
+ "Datasets.AboutTissue, Datasets.AboutDataProcessing, Datasets.Acknowledgment, Datasets.ExperimentDesign, " +
+ "Datasets.Contributors, Datasets.Citation, Datasets.Notes, Investigators.FirstName, Investigators.LastName, " +
+ "Investigators.Address, Investigators.City, Investigators.State, Investigators.ZipCode, Investigators.Country, " +
+ "Investigators.Phone, Investigators.Email, Investigators.Url, Organizations.OrganizationName, " +
+ "InvestigatorId, DatasetId, DatasetStatusId, Datasets.AboutPlatform, InfoFileTitle, Specifics " +
+ "FROM InfoFiles " +
+ "LEFT JOIN Species USING (SpeciesId) " +
+ "LEFT JOIN Tissue USING (TissueId) " +
+ "LEFT JOIN InbredSet USING (InbredSetId) " +
+ "LEFT JOIN GeneChip USING (GeneChipId) " +
+ "LEFT JOIN AvgMethod USING (AvgMethodId) " +
+ "LEFT JOIN Datasets USING (DatasetId) " +
+ "LEFT JOIN Investigators USING (InvestigatorId) " +
+ "LEFT JOIN Organizations USING (OrganizationId) " +
+ "LEFT JOIN DatasetStatus USING (DatasetStatusId) WHERE ")
+
+ if self.gn_accession_id:
+ final_query = query_base + "GN_AccesionId = {}".format(self.gn_accession_id)
+ results = g.db.execute(final_query).fetchone()
+ if self.info_page_name and not results:
+ final_query = query_base + "InfoPageName={}".format(self.info_page_name)
+ elif self.info_page_name:
+ final_query = query_base + "InfoPageName={}".format(self.info_page_name)
+ results = g.db.execute(final_query).fetchone()
+ else:
+ raise 'No correct parameter found'
+
+ if results:
+ self.info = process_query_results(results)
+
+ if (not results or len(results) < 1) and self.info_page_name and create:
+ insert_sql = "INSERT INTO InfoFiles SET InfoFiles.InfoPageName={}".format(self.info_page_name)
+ return self.get_info()
+
+ if not self.gn_accession_id and self.info:
+ self.gn_accession_id = self.info['accession_id']
+ if not self.info_page_name and self.info:
+ self.info_page_name = self.info['info_page_name']
+
+ def get_datasets_list(self):
+ self.filelist = []
+ try:
+ response = urllib2.urlopen("http://datafiles.genenetwork.org/download/GN%s" % self.gn_accession_id)
+ data = response.read()
+
+ matches = re.findall(r"<tr>.+?</tr>", data, re.DOTALL)
+ for i, match in enumerate(matches):
+ if i == 0:
+ continue
+ cells = re.findall(r"<td.+?>.+?</td>", match, re.DOTALL)
+ full_filename = re.search(r"<a href=\"(.+?)\"", cells[1], re.DOTALL).group(1).strip()
+ filename = full_filename.split("/")[-1]
+ filesize = re.search(r">(.+?)<", cells[2]).group(1).strip()
+ filedate = "N/A" #ZS: Since we can't get it for now
+
+ self.filelist.append([filename, filedate, filesize])
+ except Exception, e:
+ pass
+
+def process_query_results(results):
+ info_ob = {
+ 'info_page_name': results[0],
+ 'accession_id': results[1],
+ 'menu_name': results[2],
+ 'taxonomy_id': results[3],
+ 'tissue_name': results[4],
+ 'group_name': results[5],
+ 'gene_chip_name': results[6],
+ 'geo_platform': results[7],
+ 'avg_method_name': results[8],
+ 'dataset_name': results[9],
+ 'geo_series': results[10],
+ 'publication_title': results[11],
+ 'dataset_status_name': results[12],
+ 'dataset_summary': results[13],
+ 'about_cases': results[14],
+ 'about_tissue': results[15],
+ 'about_data_processing': results[16],
+ 'acknowledgement': results[17],
+ 'experiment_design': results[18],
+ 'contributors': results[19],
+ 'citation': results[20],
+ 'notes': results[21],
+ 'investigator_firstname': results[22],
+ 'investigator_lastname': results[23],
+ 'investigator_address': results[24],
+ 'investigator_city': results[25],
+ 'investigator_state': results[26],
+ 'investigator_zipcode': results[27],
+ 'investigator_country': results[28],
+ 'investigator_phone': results[29],
+ 'investigator_email': results[30],
+ 'investigator_url': results[31],
+ 'organization_name': results[32],
+ 'investigator_id': results[33],
+ 'dataset_id': results[34],
+ 'dataset_status_is': results[35],
+ 'about_platform': results[36],
+ 'info_file_title': results[37],
+ 'specifics': results[38]
+ }
+
+ return info_ob
+
\ No newline at end of file diff --git a/wqflask/wqflask/templates/info_page.html b/wqflask/wqflask/templates/info_page.html new file mode 100644 index 00000000..d8b7d74c --- /dev/null +++ b/wqflask/wqflask/templates/info_page.html @@ -0,0 +1,92 @@ +{% extends "base.html" %}
+{% block title %}Policies{% endblock %}
+{% block content %}
+
+<h1 id="parent-fieldname-title">Data Set Group: {{ info.dataset_name }}
+<!--<a href="/infoshare/manager/member-studies-edit.html?DatasetId=%s"><img src="/images/modify.gif" alt="modify this page" border="0" valign="middle"></a>-->
+<span style="color:red;">{{ info.info_page_name }}</span>
+</h1>
+<table border="0" width="100%">
+<tr>
+<td valign="top" width="50%">
+<table name="info_table" cellSpacing=0 cellPadding=5 width=100% border=0>
+ <tr><td><b>Data Set:</b> {{ info.info_file_title }} <!--<a href="/infoshare/manager/member-infofile-edit.html?GN_AccesionId=%s"><img src="/images/modify.gif" alt="modify this page" border="0" valign="middle"></a>--></td></tr>
+ <tr><td><b>GN Accession:</b> GN{{ gn_accession_id }}</td></tr>
+ <tr><td><b>GEO Series:</b> <a href="http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc={{ info.geo_series }}" target="_blank">{{ info.geo_series }}</a></td></tr>
+ <tr><td><b>Title:</b> {{ info.publication_title }}</td></tr>
+ <tr><td><b>Organism:</b> <a href="http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id={{ info.taxonomy_id }}" target="_blank">{{ info.menu_name }}</a></td></tr>
+ <tr><td><b>Group:</b> {{ info.group_name }}</td></tr>
+ <tr><td><b>Tissue:</b> {{ info.tissue_name }}</td></tr>
+ <tr><td><b>Dataset Status:</b> {{ info.dataset_status_name }}</td></tr>
+ <tr><td><b>Platforms:</b> <a href="http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc={{ info.geo_platform }}" target="_blank">{{ info.gene_chip_name }}</a></td></tr>
+ <tr><td><b>Normalization:</b> {{ info.avg_method_name }}</td></tr>
+</table>
+</td>
+<td valign="top" width="50%">
+<table border="0" width="100%">
+ <tr>
+ <td><b>Contact Information</b></td>
+ </tr>
+ <tr>
+ <td>
+ {{ info.investigator_first_name }} {{ info.inveestigator_last_name }}<br>
+ {{ info.organization_name }} <br>
+ {{ info.investigator_address }}<br>
+ {{ info.investigator_city }}, {{ info.investigator_state }} {{ info.investigator_zipcode }} {{ info.investigator_country }}<br>
+ Tel. {{ info.investigator_phone }}<br>
+ {{ info.investigator_email }}<br>
+ <a href="{{ info.investigator_url }}" target="_blank">Website</a>
+ </td>
+ </tr>
+
+<tr>
+ <td><b>Download datasets and supplementary data files</b></td>
+</tr>
+<tr>
+ <td>
+ <ul style="line-height: 160%">
+ {% for file in filelist %}
+ <li><a href="http://datafiles.genenetwork.org/download/GN{{ gn_accession_id }}/{{ file[0] }}">{{ file[0] }} ({{ file[2] }})</a></li>
+ {% endfor %}
+ </ul>
+ </td>
+</tr>
+
+<tr><td>
+</td></tr>
+
+</table>
+</td>
+</tr>
+</table>
+<HR>
+<p>
+<table name="info_table" width="100%" border="0" cellpadding="5" cellspacing="0">
+<tr><td><span style="font-size:115%%;font-weight:bold;">Specifics of this Data Set:</span></td></tr>
+ <tr><td> {{ info.specifics|safe }}<br><br></td></tr>
+<tr><td><span style="font-size:115%%;font-weight:bold;">Summary:</span></td></tr>
+ <tr><td> {{ info.dataset_summary|safe }}<br><br></td></tr>
+<tr><td><span style="font-size:115%; font-weight:bold;">About the cases used to generate this set of data:</span></td></tr>
+ <tr><td> {{ info.about_cases|safe }}<br><br></td></tr>
+<tr><td><span style="font-size:115%; font-weight:bold;">About the tissue used to generate this set of data:</span></td></tr>
+ <tr><td> {{ info.about_tissue|safe }}<br><br></td></tr>
+ <tr><td><span style="font-size:115%; font-weight:bold;">About the array platform:</span></td></tr>
+ <tr><td> {{ info.about_platform|safe }}<br><br></td></tr>
+<tr><td><span style="font-size:115%; font-weight:bold;">About data values and data processing:</span></td></tr>
+ <tr><td> {{ info.about_data_processing|safe }}<br><br></td></tr>
+<tr><td><span style="font-size:115%; font-weight:bold;">Notes:</span></td></tr>
+ <tr><td> {{ info.notes|safe }}<br><br></td></tr>
+<tr><td><span style="font-size:115%; font-weight:bold;">Experiment Type:</span></td></tr>
+ <tr><td> {{ info.experiment_design|safe }}<br><br></td></tr>
+<tr><td><span style="font-size:115%; font-weight:bold;">Contributor:</span></td></tr>
+ <tr><td> {{ info.contributors|safe }}<br><br></td></tr>
+<tr><td><span style="font-size:115%; font-weight:bold;">Citation:</span></td></tr>
+ <tr><td> {{ info.citation|safe }}<br><br></td></tr>
+<tr><td><span style="font-size:115%; font-weight:bold;">Data source acknowledgment:</span></td></tr>
+ <tr><td> {{ info.acknowledgement|safe }}<br><br></td></tr>
+<tr><td><span style="font-size:115%; font-weight:bold;">Study Id:</span></td></tr>
+ <tr><td> {{ info.dataset_id }}<br><br></td></tr>
+</table>
+</p>
+
+{% endblock %}
\ No newline at end of file |