about summary refs log tree commit diff
path: root/gn2/wqflask/export_traits.py
diff options
context:
space:
mode:
Diffstat (limited to 'gn2/wqflask/export_traits.py')
-rw-r--r--gn2/wqflask/export_traits.py191
1 files changed, 191 insertions, 0 deletions
diff --git a/gn2/wqflask/export_traits.py b/gn2/wqflask/export_traits.py
new file mode 100644
index 00000000..2d2a40cc
--- /dev/null
+++ b/gn2/wqflask/export_traits.py
@@ -0,0 +1,191 @@
+import csv
+import datetime
+import io
+import itertools
+import re
+import xlsxwriter
+
+from pprint import pformat as pf
+from zipfile import ZipFile, ZIP_DEFLATED
+
+import simplejson as json
+
+from gn3.computations.gemma import generate_hash_of_string
+
+from gn2.base.trait import create_trait, retrieve_trait_info
+
+
+def export_traits(targs, export_type):
+    if export_type == "collection":
+        return export_collection(targs)
+    else:
+        return export_traitlist(targs)
+
+def export_collection(targs):
+    table_data = json.loads(targs['export_data'])
+    table_rows = table_data['rows']
+
+    buff = io.StringIO()
+    writer = csv.writer(buff)
+
+    now = datetime.datetime.now()
+    time_str = now.strftime('%H:%M (UTC) %m/%d/%y')
+
+    metadata_rows = [
+        ["# Collection Name: " + targs['collection_name_export']],
+        ["# User E-mail: " + targs['user_email_export']],
+        ["# Time/Date: " + time_str]
+    ]
+
+    for row in metadata_rows:
+        writer.writerow(row)
+
+    for trait in table_rows:
+        writer.writerow([trait])
+
+    csv_data = buff.getvalue()
+    buff.close()
+
+    if 'collection_name_export' in targs:
+        file_name = re.sub('\s+', '_', targs['collection_name_export']) # replace whitespace with underscore
+    else:
+        file_name = generate_hash_of_string("".join(table_rows))
+
+    return [file_name, csv_data]
+
+def export_traitlist(targs):
+    table_data = json.loads(targs['export_data'])
+    table_rows = table_data['rows']
+
+    now = datetime.datetime.now()
+    time_str = now.strftime('%H:%M_%d%B%Y')
+    if 'file_name' in targs:
+        zip_file_name = targs['file_name'] + "_export_" + time_str
+    else:
+        zip_file_name = "export_" + time_str
+
+    metadata = []
+
+    if 'database_name' in targs:
+        if targs['database_name'] != "None":
+            metadata.append(["Data Set: " + targs['database_name']])
+    if 'accession_id' in targs:
+        if targs['accession_id'] != "None":
+            metadata.append(
+                ["Metadata Link: http://genenetwork.org/webqtl/main.py?FormID=sharinginfo&GN_AccessionId=" + targs['accession_id']])
+    metadata.append(
+        ["Export Date: " + datetime.datetime.now().strftime("%B %d, %Y")])
+    metadata.append(
+        ["Export Time: " + datetime.datetime.now().strftime("%H:%M GMT")])
+    if 'search_string' in targs:
+        if targs['search_string'] != "None":
+            metadata.append(["Search Query: " + targs['search_string']])
+    if 'filter_term' in targs:
+        if targs['filter_term'] != "None":
+            metadata.append(["Search Filter Terms: " + targs['filter_term']])
+    metadata.append(["Exported Row Number: " + str(len(table_rows))])
+    metadata.append(["Funding for The GeneNetwork: NIGMS (R01 GM123489, 2017-2026), NIDA (P30 DA044223, 2017-2022), NIA (R01AG043930, 2013-2018), NIAAA (U01 AA016662, U01 AA013499, U24 AA013513, U01 AA014425, 2006-2017), NIDA/NIMH/NIAAA (P20-DA 21131, 2001-2012), NCI MMHCC (U01CA105417), NCRR/BIRN (U24 RR021760)"])
+    metadata.append([])
+
+    trait_list = []
+    for trait in table_rows:
+        trait_name, dataset_name, _hash = trait.split(":")
+        trait_ob = create_trait(name=trait_name, dataset_name=dataset_name)
+        trait_ob = retrieve_trait_info(
+            trait_ob, trait_ob.dataset, get_qtl_info=True)
+        trait_list.append(trait_ob)
+
+    table_headers = ['Index', 'URL', 'Species', 'Group', 'Dataset', 'Record ID', 'Symbol', 'Description', 'ProbeTarget', 'PubMed_ID', 'Chr', 'Mb', 'Alias', 'Gene_ID', 'Homologene_ID', 'UniGene_ID',
+                     'Strand_Probe', 'Probe_set_specificity', 'Probe_set_BLAT_score', 'Probe_set_BLAT_Mb_start', 'Probe_set_BLAT_Mb_end', 'QTL_Chr', 'QTL_Mb', 'Locus_at_Peak', 'Max_LRS', 'P_value_of_MAX', 'Mean_Expression']
+
+    traits_by_group = sort_traits_by_group(trait_list)
+
+    file_list = []
+    for group in traits_by_group:
+        group_traits = traits_by_group[group]
+        samplelist = group_traits[0].dataset.group.all_samples_ordered()
+        if not samplelist:
+            continue
+
+        buff = io.StringIO()
+        writer = csv.writer(buff)
+        csv_rows = []
+
+        sample_headers = []
+        for sample in samplelist:
+            sample_headers.append(sample)
+            sample_headers.append(sample + "_SE")
+
+        full_headers = table_headers + sample_headers
+
+        for metadata_row in metadata:
+            writer.writerow(metadata_row)
+
+        csv_rows.append(full_headers)
+
+        for i, trait in enumerate(group_traits):
+            if getattr(trait, "symbol", None):
+                trait_symbol = getattr(trait, "symbol")
+            elif getattr(trait, "abbreviation", None):
+                trait_symbol = getattr(trait, "abbreviation")
+            else:
+                trait_symbol = "N/A"
+            row_contents = [
+                i + 1,
+                "https://genenetwork.org/show_trait?trait_id=" + \
+                str(trait.name) + "&dataset=" + str(trait.dataset.name),
+                trait.dataset.group.species,
+                trait.dataset.group.name,
+                trait.dataset.name,
+                trait.name,
+                trait_symbol,
+                getattr(trait, "description_display", "N/A"),
+                getattr(trait, "probe_target_description", "N/A"),
+                getattr(trait, "pubmed_id", "N/A"),
+                getattr(trait, "chr", "N/A"),
+                getattr(trait, "mb", "N/A"),
+                trait.alias_fmt,
+                getattr(trait, "geneid", "N/A"),
+                getattr(trait, "homologeneid", "N/A"),
+                getattr(trait, "unigeneid", "N/A"),
+                getattr(trait, "strand_probe", "N/A"),
+                getattr(trait, "probe_set_specificity", "N/A"),
+                getattr(trait, "probe_set_blat_score", "N/A"),
+                getattr(trait, "probe_set_blat_mb_start", "N/A"),
+                getattr(trait, "probe_set_blat_mb_end", "N/A"),
+                getattr(trait, "locus_chr", "N/A"),
+                getattr(trait, "locus_mb", "N/A"),
+                getattr(trait, "locus", "N/A"),
+                getattr(trait, "lrs", "N/A"),
+                getattr(trait, "pvalue", "N/A"),
+                getattr(trait, "mean", "N/A")
+            ]
+
+            for sample in samplelist:
+                if sample in trait.data:
+                    row_contents += [trait.data[sample].value,
+                                     trait.data[sample].variance]
+                else:
+                    row_contents += ["x", "x"]
+
+            csv_rows.append(row_contents)
+
+        writer.writerows(csv_rows)
+        csv_data = buff.getvalue()
+        buff.close()
+
+        file_name = group + "_traits.csv"
+        file_list.append([file_name, csv_data])
+
+    return file_list
+
+
+def sort_traits_by_group(trait_list=[]):
+    traits_by_group = {}
+    for trait in trait_list:
+        if trait.dataset.group.name not in list(traits_by_group.keys()):
+            traits_by_group[trait.dataset.group.name] = []
+
+        traits_by_group[trait.dataset.group.name].append(trait)
+
+    return traits_by_group