1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
|
import urllib.request
import urllib.error
import urllib.parse
import re
from MySQLdb.cursors import DictCursor
from gn2.wqflask.database import database_connection
from gn2.utility.tools import get_setting
class InfoPage:
def __init__(self, start_vars):
self.info = None
self.gn_accession_id = None
if 'gn_accession_id' in start_vars:
self.gn_accession_id = start_vars['gn_accession_id']
self.info_page_name = start_vars['info_page_name']
self.get_info()
self.get_datasets_list()
def get_info(self, create=False):
query_base = (
"SELECT InfoPageName AS info_page_name, "
"GN_AccesionId AS accession_id, "
"Species.MenuName AS menu_name, "
"Species.TaxonomyId AS taxonomy_id, "
"Tissue.Name AS tissue_name, "
"InbredSet.Name AS group_name, "
"GeneChip.GeneChipName AS gene_chip_name, "
"GeneChip.GeoPlatform AS geo_platform, "
"AvgMethod.Name AS avg_method_name, "
"Datasets.DatasetName AS dataset_name, "
"Datasets.GeoSeries AS geo_series, "
"Datasets.PublicationTitle AS publication_title, "
"DatasetStatus.DatasetStatusName AS dataset_status_name, "
"Datasets.Summary AS dataset_summary, "
"Datasets.AboutCases AS about_cases, "
"Datasets.AboutTissue AS about_tissue, "
"Datasets.AboutDataProcessing AS about_data_processing, "
"Datasets.Acknowledgment AS acknowledgement, "
"Datasets.ExperimentDesign AS experiment_design, "
"Datasets.Contributors AS contributors, "
"Datasets.Citation AS citation, "
"Datasets.Notes AS notes, "
"Investigators.FirstName AS investigator_firstname, "
"Investigators.LastName AS investigator_lastname, "
"Investigators.Address AS investigator_address, "
"Investigators.City AS investigator_city, "
"Investigators.State AS investigator_state, "
"Investigators.ZipCode AS investigator_zipcode, "
"Investigators.Country AS investigator_country, "
"Investigators.Phone AS investigator_phone, "
"Investigators.Email AS investigator_email, "
"Investigators.Url AS investigator_url, "
"Organizations.OrganizationName AS organization_name, "
"InvestigatorId AS investigator_id, "
"DatasetId AS dataset_id, "
"DatasetStatusId AS dataset_status_id, "
"Datasets.AboutPlatform AS about_platform, "
"InfoFileTitle AS info_file_title, "
"Specifics AS specifics"
"FROM InfoFiles "
"LEFT JOIN Species USING (SpeciesId) "
"LEFT JOIN Tissue USING (TissueId) "
"LEFT JOIN InbredSet USING (InbredSetId) "
"LEFT JOIN GeneChip USING (GeneChipId) "
"LEFT JOIN AvgMethod USING (AvgMethodId) "
"LEFT JOIN Datasets USING (DatasetId) "
"LEFT JOIN Investigators USING (InvestigatorId) "
"LEFT JOIN Organizations USING (OrganizationId) "
"LEFT JOIN DatasetStatus USING (DatasetStatusId) WHERE "
)
if not all([self.gn_accession_id, self.info_page_name]):
raise ValueError('No correct parameter found')
results = {}
with database_connection(get_setting("SQL_URI")) as conn, conn.cursor(DictCursor) as cursor:
if self.gn_accession_id:
cursor.execute(f"{query_base}GN_AccesionId = %s",
(self.gn_accession_id,))
elif self.info_page_name:
cursor.execute(f"{query_base}InfoPageName = %s",
(self.info_page_name,))
if (results := cursor.fetchone()):
self.info = results
if ((not results or len(results) < 1)
and self.info_page_name and create):
return self.get_info()
if not self.gn_accession_id and self.info:
self.gn_accession_id = self.info['accession_id']
if not self.info_page_name and self.info:
self.info_page_name = self.info['info_page_name']
def get_datasets_list(self):
self.filelist = []
try:
response = urllib.request.urlopen(
"https://files.genenetwork.org/current/GN%s" % self.gn_accession_id)
data = response.read()
matches = re.findall(r"<tr>.+?</tr>", data, re.DOTALL)
for i, match in enumerate(matches):
if i == 0:
continue
cells = re.findall(r"<td.+?>.+?</td>", match, re.DOTALL)
full_filename = re.search(
r"<a href=\"(.+?)\"", cells[1], re.DOTALL).group(1).strip()
filename = full_filename.split("/")[-1]
filesize = re.search(r">(.+?)<", cells[2]).group(1).strip()
filedate = "N/A" # ZS: Since we can't get it for now
self.filelist.append([filename, filedate, filesize])
except Exception as e:
pass
|