about summary refs log tree commit diff
diff options
context:
space:
mode:
authorzsloan2020-09-11 16:14:02 -0500
committerzsloan2020-09-11 16:14:02 -0500
commit9f458b92aa28ac2d00b9bf822c516d0545a79bc9 (patch)
tree101ddd7906f0d65e08eb08de0b5920a410c322b2
parent03240c4d22e894b977f7a1a2204a32408245c470 (diff)
downloadgenenetwork2-9f458b92aa28ac2d00b9bf822c516d0545a79bc9.tar.gz
Wrote code and template for reproducing GN1's Info Page (page that
displays info about a dataset + download links for its data)

* wqflask/wqflask/db_info.py - Python that pulls the info out of the DB
and parses the download links from the ipfs response
* wqflask/wqflask/templates/info_page.html - template for displaying
db info; need to make a lot of changes to its format/aesthetics
-rw-r--r--wqflask/wqflask/db_info.py127
-rw-r--r--wqflask/wqflask/templates/info_page.html92
2 files changed, 219 insertions, 0 deletions
diff --git a/wqflask/wqflask/db_info.py b/wqflask/wqflask/db_info.py
new file mode 100644
index 00000000..f04e38bf
--- /dev/null
+++ b/wqflask/wqflask/db_info.py
@@ -0,0 +1,127 @@
+import httplib, urllib2

+import re

+

+from flask import Flask, g

+

+from utility.logger import getLogger

+logger = getLogger(__name__ )

+

+class InfoPage(object):

+    def __init__(self, start_vars):

+        self.info = None

+        self.gn_accession_id = None

+        if 'gn_accession_id' in start_vars:

+            self.gn_accession_id = start_vars['gn_accession_id']

+        self.info_page_name = start_vars['info_page_name']

+

+        self.get_info()

+        self.get_datasets_list()

+

+    def get_info(self, create=False):

+        query_base = ("SELECT InfoPageName, GN_AccesionId, Species.MenuName, Species.TaxonomyId, Tissue.Name, InbredSet.Name, " +

+                      "GeneChip.GeneChipName, GeneChip.GeoPlatform, AvgMethod.Name, Datasets.DatasetName, Datasets.GeoSeries, " +

+                      "Datasets.PublicationTitle, DatasetStatus.DatasetStatusName, Datasets.Summary, Datasets.AboutCases, " +

+                      "Datasets.AboutTissue, Datasets.AboutDataProcessing, Datasets.Acknowledgment, Datasets.ExperimentDesign, " +

+                      "Datasets.Contributors, Datasets.Citation, Datasets.Notes, Investigators.FirstName, Investigators.LastName, " +

+                      "Investigators.Address, Investigators.City, Investigators.State, Investigators.ZipCode, Investigators.Country, " +

+                      "Investigators.Phone, Investigators.Email, Investigators.Url, Organizations.OrganizationName, " +

+                      "InvestigatorId, DatasetId, DatasetStatusId, Datasets.AboutPlatform, InfoFileTitle, Specifics " +

+                      "FROM InfoFiles " +

+                      "LEFT JOIN Species USING (SpeciesId) " +

+                      "LEFT JOIN Tissue USING (TissueId) " +

+                      "LEFT JOIN InbredSet USING (InbredSetId) " +

+                      "LEFT JOIN GeneChip USING (GeneChipId) " +

+                      "LEFT JOIN AvgMethod USING (AvgMethodId) " +

+                      "LEFT JOIN Datasets USING (DatasetId) " +

+                      "LEFT JOIN Investigators USING (InvestigatorId) " +

+                      "LEFT JOIN Organizations USING (OrganizationId) " +

+                      "LEFT JOIN DatasetStatus USING (DatasetStatusId) WHERE ")

+

+        if self.gn_accession_id:

+            final_query = query_base + "GN_AccesionId = {}".format(self.gn_accession_id)

+            results = g.db.execute(final_query).fetchone()

+            if self.info_page_name and not results:

+				final_query = query_base + "InfoPageName={}".format(self.info_page_name)

+        elif self.info_page_name:

+            final_query = query_base + "InfoPageName={}".format(self.info_page_name)

+            results = g.db.execute(final_query).fetchone()

+        else:

+            raise 'No correct parameter found'

+

+        if results:

+            self.info = process_query_results(results)

+

+        if (not results or len(results) < 1) and self.info_page_name and create:

+            insert_sql = "INSERT INTO InfoFiles SET InfoFiles.InfoPageName={}".format(self.info_page_name)

+            return self.get_info()

+

+        if not self.gn_accession_id and self.info:

+            self.gn_accession_id = self.info['accession_id']

+        if not self.info_page_name and self.info:

+            self.info_page_name = self.info['info_page_name'] 

+

+    def get_datasets_list(self):

+        self.filelist = []

+        try:

+            response = urllib2.urlopen("http://datafiles.genenetwork.org/download/GN%s" % self.gn_accession_id)

+            data = response.read()

+

+            matches = re.findall(r"<tr>.+?</tr>", data, re.DOTALL)

+            for i, match in enumerate(matches):

+                if i == 0:

+                    continue

+                cells = re.findall(r"<td.+?>.+?</td>", match, re.DOTALL)

+                full_filename = re.search(r"<a href=\"(.+?)\"", cells[1], re.DOTALL).group(1).strip()

+                filename = full_filename.split("/")[-1]

+                filesize = re.search(r">(.+?)<", cells[2]).group(1).strip()

+                filedate = "N/A" #ZS: Since we can't get it for now

+

+                self.filelist.append([filename, filedate, filesize])

+        except Exception, e:

+            pass

+

+def process_query_results(results):

+    info_ob = {

+        'info_page_name': results[0],

+        'accession_id': results[1],

+        'menu_name': results[2],

+        'taxonomy_id': results[3],

+        'tissue_name': results[4],

+        'group_name': results[5],

+        'gene_chip_name': results[6],

+        'geo_platform': results[7],

+        'avg_method_name': results[8],

+        'dataset_name': results[9],

+        'geo_series': results[10],

+        'publication_title': results[11],

+        'dataset_status_name': results[12],

+        'dataset_summary': results[13],

+        'about_cases': results[14],

+        'about_tissue': results[15],

+        'about_data_processing': results[16],

+        'acknowledgement': results[17],

+        'experiment_design': results[18],

+        'contributors': results[19],

+        'citation': results[20],

+        'notes': results[21],

+        'investigator_firstname': results[22],

+        'investigator_lastname': results[23],

+        'investigator_address': results[24],

+        'investigator_city': results[25],

+        'investigator_state': results[26],

+        'investigator_zipcode': results[27],

+        'investigator_country': results[28],

+        'investigator_phone': results[29],

+        'investigator_email': results[30],

+        'investigator_url': results[31],

+        'organization_name': results[32],

+        'investigator_id': results[33],

+        'dataset_id': results[34],

+        'dataset_status_is': results[35],

+        'about_platform': results[36],

+        'info_file_title': results[37],

+        'specifics': results[38]

+    }

+

+    return info_ob

+        
\ No newline at end of file
diff --git a/wqflask/wqflask/templates/info_page.html b/wqflask/wqflask/templates/info_page.html
new file mode 100644
index 00000000..d8b7d74c
--- /dev/null
+++ b/wqflask/wqflask/templates/info_page.html
@@ -0,0 +1,92 @@
+{% extends "base.html" %}

+{% block title %}Policies{% endblock %}

+{% block content %}

+

+<h1 id="parent-fieldname-title">Data Set Group: {{ info.dataset_name }}

+<!--<a href="/infoshare/manager/member-studies-edit.html?DatasetId=%s"><img src="/images/modify.gif" alt="modify this page" border="0" valign="middle"></a>-->

+<span style="color:red;">{{ info.info_page_name }}</span>

+</h1>

+<table border="0" width="100%">

+<tr>

+<td valign="top" width="50%">

+<table name="info_table" cellSpacing=0 cellPadding=5 width=100% border=0>

+    <tr><td><b>Data Set:</b>  {{ info.info_file_title }} <!--<a href="/infoshare/manager/member-infofile-edit.html?GN_AccesionId=%s"><img src="/images/modify.gif" alt="modify this page" border="0" valign="middle"></a>--></td></tr>

+    <tr><td><b>GN Accession:</b> GN{{ gn_accession_id }}</td></tr>

+    <tr><td><b>GEO Series:</b> <a href="http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc={{ info.geo_series }}" target="_blank">{{ info.geo_series }}</a></td></tr>

+    <tr><td><b>Title:</b> {{ info.publication_title }}</td></tr>

+    <tr><td><b>Organism:</b> <a href="http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id={{ info.taxonomy_id }}" target="_blank">{{ info.menu_name }}</a></td></tr>

+    <tr><td><b>Group:</b> {{ info.group_name }}</td></tr>

+    <tr><td><b>Tissue:</b> {{ info.tissue_name }}</td></tr>

+    <tr><td><b>Dataset Status:</b> {{ info.dataset_status_name }}</td></tr>

+    <tr><td><b>Platforms:</b> <a href="http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc={{ info.geo_platform }}" target="_blank">{{ info.gene_chip_name }}</a></td></tr>

+    <tr><td><b>Normalization:</b> {{ info.avg_method_name }}</td></tr>

+</table>

+</td>

+<td valign="top" width="50%">

+<table border="0" width="100%">

+    <tr>

+        <td><b>Contact Information</b></td>

+    </tr>

+    <tr>

+        <td>

+            {{ info.investigator_first_name }} {{ info.inveestigator_last_name }}<br>

+            {{ info.organization_name }} <br>

+            {{ info.investigator_address }}<br>

+            {{ info.investigator_city }}, {{ info.investigator_state }} {{ info.investigator_zipcode }} {{ info.investigator_country }}<br>

+            Tel. {{ info.investigator_phone }}<br>

+            {{ info.investigator_email }}<br>

+            <a href="{{ info.investigator_url }}" target="_blank">Website</a>

+        </td>

+    </tr>

+

+<tr>

+    <td><b>Download datasets and supplementary data files</b></td>

+</tr>

+<tr>

+    <td>

+        <ul style="line-height: 160%">

+            {% for file in filelist %}

+            <li><a href="http://datafiles.genenetwork.org/download/GN{{ gn_accession_id }}/{{ file[0] }}">{{ file[0] }} ({{ file[2] }})</a></li>

+            {% endfor %}

+        </ul>

+    </td>

+</tr>

+

+<tr><td>

+</td></tr>

+

+</table>

+</td>

+</tr>

+</table>

+<HR>

+<p>

+<table name="info_table" width="100%" border="0" cellpadding="5" cellspacing="0">

+<tr><td><span style="font-size:115%%;font-weight:bold;">Specifics of this Data Set:</span></td></tr>

+    <tr><td> {{ info.specifics|safe }}<br><br></td></tr>

+<tr><td><span style="font-size:115%%;font-weight:bold;">Summary:</span></td></tr>

+    <tr><td> {{ info.dataset_summary|safe }}<br><br></td></tr>

+<tr><td><span style="font-size:115%; font-weight:bold;">About the cases used to generate this set of data:</span></td></tr>

+    <tr><td> {{ info.about_cases|safe }}<br><br></td></tr>

+<tr><td><span style="font-size:115%; font-weight:bold;">About the tissue used to generate this set of data:</span></td></tr>

+    <tr><td> {{ info.about_tissue|safe }}<br><br></td></tr>

+    <tr><td><span style="font-size:115%; font-weight:bold;">About the array platform:</span></td></tr>

+    <tr><td> {{ info.about_platform|safe }}<br><br></td></tr>

+<tr><td><span style="font-size:115%; font-weight:bold;">About data values and data processing:</span></td></tr>

+    <tr><td> {{ info.about_data_processing|safe }}<br><br></td></tr>

+<tr><td><span style="font-size:115%; font-weight:bold;">Notes:</span></td></tr>

+    <tr><td> {{ info.notes|safe }}<br><br></td></tr>

+<tr><td><span style="font-size:115%; font-weight:bold;">Experiment Type:</span></td></tr>

+    <tr><td> {{ info.experiment_design|safe }}<br><br></td></tr>

+<tr><td><span style="font-size:115%; font-weight:bold;">Contributor:</span></td></tr>

+    <tr><td> {{ info.contributors|safe }}<br><br></td></tr>

+<tr><td><span style="font-size:115%; font-weight:bold;">Citation:</span></td></tr>

+    <tr><td> {{ info.citation|safe }}<br><br></td></tr>

+<tr><td><span style="font-size:115%; font-weight:bold;">Data source acknowledgment:</span></td></tr>

+    <tr><td> {{ info.acknowledgement|safe }}<br><br></td></tr>

+<tr><td><span style="font-size:115%; font-weight:bold;">Study Id:</span></td></tr>

+    <tr><td> {{ info.dataset_id }}<br><br></td></tr>

+</table>

+</p>

+ 

+{% endblock %}
\ No newline at end of file