From 9f458b92aa28ac2d00b9bf822c516d0545a79bc9 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Fri, 11 Sep 2020 16:14:02 -0500
Subject: Wrote code and template for reproducing GN1's Info Page (page that
displays info about a dataset + download links for its data)
* wqflask/wqflask/db_info.py - Python that pulls the info out of the DB
and parses the download links from the ipfs response
* wqflask/wqflask/templates/info_page.html - template for displaying
db info; need to make a lot of changes to its format/aesthetics
---
wqflask/wqflask/db_info.py | 127 +++++++++++++++++++++++++++++++
wqflask/wqflask/templates/info_page.html | 92 ++++++++++++++++++++++
2 files changed, 219 insertions(+)
create mode 100644 wqflask/wqflask/db_info.py
create mode 100644 wqflask/wqflask/templates/info_page.html
diff --git a/wqflask/wqflask/db_info.py b/wqflask/wqflask/db_info.py
new file mode 100644
index 00000000..f04e38bf
--- /dev/null
+++ b/wqflask/wqflask/db_info.py
@@ -0,0 +1,127 @@
+import httplib, urllib2
+import re
+
+from flask import Flask, g
+
+from utility.logger import getLogger
+logger = getLogger(__name__ )
+
+class InfoPage(object):
+ def __init__(self, start_vars):
+ self.info = None
+ self.gn_accession_id = None
+ if 'gn_accession_id' in start_vars:
+ self.gn_accession_id = start_vars['gn_accession_id']
+ self.info_page_name = start_vars['info_page_name']
+
+ self.get_info()
+ self.get_datasets_list()
+
+ def get_info(self, create=False):
+ query_base = ("SELECT InfoPageName, GN_AccesionId, Species.MenuName, Species.TaxonomyId, Tissue.Name, InbredSet.Name, " +
+ "GeneChip.GeneChipName, GeneChip.GeoPlatform, AvgMethod.Name, Datasets.DatasetName, Datasets.GeoSeries, " +
+ "Datasets.PublicationTitle, DatasetStatus.DatasetStatusName, Datasets.Summary, Datasets.AboutCases, " +
+ "Datasets.AboutTissue, Datasets.AboutDataProcessing, Datasets.Acknowledgment, Datasets.ExperimentDesign, " +
+ "Datasets.Contributors, Datasets.Citation, Datasets.Notes, Investigators.FirstName, Investigators.LastName, " +
+ "Investigators.Address, Investigators.City, Investigators.State, Investigators.ZipCode, Investigators.Country, " +
+ "Investigators.Phone, Investigators.Email, Investigators.Url, Organizations.OrganizationName, " +
+ "InvestigatorId, DatasetId, DatasetStatusId, Datasets.AboutPlatform, InfoFileTitle, Specifics " +
+ "FROM InfoFiles " +
+ "LEFT JOIN Species USING (SpeciesId) " +
+ "LEFT JOIN Tissue USING (TissueId) " +
+ "LEFT JOIN InbredSet USING (InbredSetId) " +
+ "LEFT JOIN GeneChip USING (GeneChipId) " +
+ "LEFT JOIN AvgMethod USING (AvgMethodId) " +
+ "LEFT JOIN Datasets USING (DatasetId) " +
+ "LEFT JOIN Investigators USING (InvestigatorId) " +
+ "LEFT JOIN Organizations USING (OrganizationId) " +
+ "LEFT JOIN DatasetStatus USING (DatasetStatusId) WHERE ")
+
+ if self.gn_accession_id:
+ final_query = query_base + "GN_AccesionId = {}".format(self.gn_accession_id)
+ results = g.db.execute(final_query).fetchone()
+ if self.info_page_name and not results:
+ final_query = query_base + "InfoPageName={}".format(self.info_page_name)
+ elif self.info_page_name:
+ final_query = query_base + "InfoPageName={}".format(self.info_page_name)
+ results = g.db.execute(final_query).fetchone()
+ else:
+ raise 'No correct parameter found'
+
+ if results:
+ self.info = process_query_results(results)
+
+ if (not results or len(results) < 1) and self.info_page_name and create:
+ insert_sql = "INSERT INTO InfoFiles SET InfoFiles.InfoPageName={}".format(self.info_page_name)
+ return self.get_info()
+
+ if not self.gn_accession_id and self.info:
+ self.gn_accession_id = self.info['accession_id']
+ if not self.info_page_name and self.info:
+ self.info_page_name = self.info['info_page_name']
+
+ def get_datasets_list(self):
+ self.filelist = []
+ try:
+ response = urllib2.urlopen("http://datafiles.genenetwork.org/download/GN%s" % self.gn_accession_id)
+ data = response.read()
+
+ matches = re.findall(r"
.+?
", data, re.DOTALL)
+ for i, match in enumerate(matches):
+ if i == 0:
+ continue
+ cells = re.findall(r".+?", match, re.DOTALL)
+ full_filename = re.search(r"(.+?)<", cells[2]).group(1).strip()
+ filedate = "N/A" #ZS: Since we can't get it for now
+
+ self.filelist.append([filename, filedate, filesize])
+ except Exception, e:
+ pass
+
+def process_query_results(results):
+ info_ob = {
+ 'info_page_name': results[0],
+ 'accession_id': results[1],
+ 'menu_name': results[2],
+ 'taxonomy_id': results[3],
+ 'tissue_name': results[4],
+ 'group_name': results[5],
+ 'gene_chip_name': results[6],
+ 'geo_platform': results[7],
+ 'avg_method_name': results[8],
+ 'dataset_name': results[9],
+ 'geo_series': results[10],
+ 'publication_title': results[11],
+ 'dataset_status_name': results[12],
+ 'dataset_summary': results[13],
+ 'about_cases': results[14],
+ 'about_tissue': results[15],
+ 'about_data_processing': results[16],
+ 'acknowledgement': results[17],
+ 'experiment_design': results[18],
+ 'contributors': results[19],
+ 'citation': results[20],
+ 'notes': results[21],
+ 'investigator_firstname': results[22],
+ 'investigator_lastname': results[23],
+ 'investigator_address': results[24],
+ 'investigator_city': results[25],
+ 'investigator_state': results[26],
+ 'investigator_zipcode': results[27],
+ 'investigator_country': results[28],
+ 'investigator_phone': results[29],
+ 'investigator_email': results[30],
+ 'investigator_url': results[31],
+ 'organization_name': results[32],
+ 'investigator_id': results[33],
+ 'dataset_id': results[34],
+ 'dataset_status_is': results[35],
+ 'about_platform': results[36],
+ 'info_file_title': results[37],
+ 'specifics': results[38]
+ }
+
+ return info_ob
+
\ No newline at end of file
diff --git a/wqflask/wqflask/templates/info_page.html b/wqflask/wqflask/templates/info_page.html
new file mode 100644
index 00000000..d8b7d74c
--- /dev/null
+++ b/wqflask/wqflask/templates/info_page.html
@@ -0,0 +1,92 @@
+{% extends "base.html" %}
+{% block title %}Policies{% endblock %}
+{% block content %}
+
+Data Set Group: {{ info.dataset_name }}
+
+{{ info.info_page_name }}
+
+
+
+
+
+ Data Set: {{ info.info_file_title }} |
+ GN Accession: GN{{ gn_accession_id }} |
+ GEO Series: {{ info.geo_series }} |
+ Title: {{ info.publication_title }} |
+ Organism: {{ info.menu_name }} |
+ Group: {{ info.group_name }} |
+ Tissue: {{ info.tissue_name }} |
+ Dataset Status: {{ info.dataset_status_name }} |
+ Platforms: {{ info.gene_chip_name }} |
+ Normalization: {{ info.avg_method_name }} |
+
+ |
+
+
+
+ Contact Information |
+
+
+
+ {{ info.investigator_first_name }} {{ info.inveestigator_last_name }}
+ {{ info.organization_name }}
+ {{ info.investigator_address }}
+ {{ info.investigator_city }}, {{ info.investigator_state }} {{ info.investigator_zipcode }} {{ info.investigator_country }}
+ Tel. {{ info.investigator_phone }}
+ {{ info.investigator_email }}
+ Website
+ |
+
+
+
+ Download datasets and supplementary data files |
+
+
+
+
+ |
+
+
+
+ |
+
+
+ |
+
+
+
+
+
+Specifics of this Data Set: |
+ {{ info.specifics|safe }}
|
+Summary: |
+ {{ info.dataset_summary|safe }}
|
+About the cases used to generate this set of data: |
+ {{ info.about_cases|safe }}
|
+About the tissue used to generate this set of data: |
+ {{ info.about_tissue|safe }}
|
+ About the array platform: |
+ {{ info.about_platform|safe }}
|
+About data values and data processing: |
+ {{ info.about_data_processing|safe }}
|
+Notes: |
+ {{ info.notes|safe }}
|
+Experiment Type: |
+ {{ info.experiment_design|safe }}
|
+Contributor: |
+ {{ info.contributors|safe }}
|
+Citation: |
+ {{ info.citation|safe }}
|
+Data source acknowledgment: |
+ {{ info.acknowledgement|safe }}
|
+Study Id: |
+ {{ info.dataset_id }}
|
+
+
+
+{% endblock %}
\ No newline at end of file
--
cgit v1.2.3