diff options
Diffstat (limited to 'gn2/wqflask/export_traits.py')
-rw-r--r-- | gn2/wqflask/export_traits.py | 191 |
1 files changed, 191 insertions, 0 deletions
diff --git a/gn2/wqflask/export_traits.py b/gn2/wqflask/export_traits.py new file mode 100644 index 00000000..2d2a40cc --- /dev/null +++ b/gn2/wqflask/export_traits.py @@ -0,0 +1,191 @@ +import csv +import datetime +import io +import itertools +import re +import xlsxwriter + +from pprint import pformat as pf +from zipfile import ZipFile, ZIP_DEFLATED + +import simplejson as json + +from gn3.computations.gemma import generate_hash_of_string + +from gn2.base.trait import create_trait, retrieve_trait_info + + +def export_traits(targs, export_type): + if export_type == "collection": + return export_collection(targs) + else: + return export_traitlist(targs) + +def export_collection(targs): + table_data = json.loads(targs['export_data']) + table_rows = table_data['rows'] + + buff = io.StringIO() + writer = csv.writer(buff) + + now = datetime.datetime.now() + time_str = now.strftime('%H:%M (UTC) %m/%d/%y') + + metadata_rows = [ + ["# Collection Name: " + targs['collection_name_export']], + ["# User E-mail: " + targs['user_email_export']], + ["# Time/Date: " + time_str] + ] + + for row in metadata_rows: + writer.writerow(row) + + for trait in table_rows: + writer.writerow([trait]) + + csv_data = buff.getvalue() + buff.close() + + if 'collection_name_export' in targs: + file_name = re.sub('\s+', '_', targs['collection_name_export']) # replace whitespace with underscore + else: + file_name = generate_hash_of_string("".join(table_rows)) + + return [file_name, csv_data] + +def export_traitlist(targs): + table_data = json.loads(targs['export_data']) + table_rows = table_data['rows'] + + now = datetime.datetime.now() + time_str = now.strftime('%H:%M_%d%B%Y') + if 'file_name' in targs: + zip_file_name = targs['file_name'] + "_export_" + time_str + else: + zip_file_name = "export_" + time_str + + metadata = [] + + if 'database_name' in targs: + if targs['database_name'] != "None": + metadata.append(["Data Set: " + targs['database_name']]) + if 'accession_id' in targs: + if targs['accession_id'] != "None": + metadata.append( + ["Metadata Link: http://genenetwork.org/webqtl/main.py?FormID=sharinginfo&GN_AccessionId=" + targs['accession_id']]) + metadata.append( + ["Export Date: " + datetime.datetime.now().strftime("%B %d, %Y")]) + metadata.append( + ["Export Time: " + datetime.datetime.now().strftime("%H:%M GMT")]) + if 'search_string' in targs: + if targs['search_string'] != "None": + metadata.append(["Search Query: " + targs['search_string']]) + if 'filter_term' in targs: + if targs['filter_term'] != "None": + metadata.append(["Search Filter Terms: " + targs['filter_term']]) + metadata.append(["Exported Row Number: " + str(len(table_rows))]) + metadata.append(["Funding for The GeneNetwork: NIGMS (R01 GM123489, 2017-2026), NIDA (P30 DA044223, 2017-2022), NIA (R01AG043930, 2013-2018), NIAAA (U01 AA016662, U01 AA013499, U24 AA013513, U01 AA014425, 2006-2017), NIDA/NIMH/NIAAA (P20-DA 21131, 2001-2012), NCI MMHCC (U01CA105417), NCRR/BIRN (U24 RR021760)"]) + metadata.append([]) + + trait_list = [] + for trait in table_rows: + trait_name, dataset_name, _hash = trait.split(":") + trait_ob = create_trait(name=trait_name, dataset_name=dataset_name) + trait_ob = retrieve_trait_info( + trait_ob, trait_ob.dataset, get_qtl_info=True) + trait_list.append(trait_ob) + + table_headers = ['Index', 'URL', 'Species', 'Group', 'Dataset', 'Record ID', 'Symbol', 'Description', 'ProbeTarget', 'PubMed_ID', 'Chr', 'Mb', 'Alias', 'Gene_ID', 'Homologene_ID', 'UniGene_ID', + 'Strand_Probe', 'Probe_set_specificity', 'Probe_set_BLAT_score', 'Probe_set_BLAT_Mb_start', 'Probe_set_BLAT_Mb_end', 'QTL_Chr', 'QTL_Mb', 'Locus_at_Peak', 'Max_LRS', 'P_value_of_MAX', 'Mean_Expression'] + + traits_by_group = sort_traits_by_group(trait_list) + + file_list = [] + for group in traits_by_group: + group_traits = traits_by_group[group] + samplelist = group_traits[0].dataset.group.all_samples_ordered() + if not samplelist: + continue + + buff = io.StringIO() + writer = csv.writer(buff) + csv_rows = [] + + sample_headers = [] + for sample in samplelist: + sample_headers.append(sample) + sample_headers.append(sample + "_SE") + + full_headers = table_headers + sample_headers + + for metadata_row in metadata: + writer.writerow(metadata_row) + + csv_rows.append(full_headers) + + for i, trait in enumerate(group_traits): + if getattr(trait, "symbol", None): + trait_symbol = getattr(trait, "symbol") + elif getattr(trait, "abbreviation", None): + trait_symbol = getattr(trait, "abbreviation") + else: + trait_symbol = "N/A" + row_contents = [ + i + 1, + "https://genenetwork.org/show_trait?trait_id=" + \ + str(trait.name) + "&dataset=" + str(trait.dataset.name), + trait.dataset.group.species, + trait.dataset.group.name, + trait.dataset.name, + trait.name, + trait_symbol, + getattr(trait, "description_display", "N/A"), + getattr(trait, "probe_target_description", "N/A"), + getattr(trait, "pubmed_id", "N/A"), + getattr(trait, "chr", "N/A"), + getattr(trait, "mb", "N/A"), + trait.alias_fmt, + getattr(trait, "geneid", "N/A"), + getattr(trait, "homologeneid", "N/A"), + getattr(trait, "unigeneid", "N/A"), + getattr(trait, "strand_probe", "N/A"), + getattr(trait, "probe_set_specificity", "N/A"), + getattr(trait, "probe_set_blat_score", "N/A"), + getattr(trait, "probe_set_blat_mb_start", "N/A"), + getattr(trait, "probe_set_blat_mb_end", "N/A"), + getattr(trait, "locus_chr", "N/A"), + getattr(trait, "locus_mb", "N/A"), + getattr(trait, "locus", "N/A"), + getattr(trait, "lrs", "N/A"), + getattr(trait, "pvalue", "N/A"), + getattr(trait, "mean", "N/A") + ] + + for sample in samplelist: + if sample in trait.data: + row_contents += [trait.data[sample].value, + trait.data[sample].variance] + else: + row_contents += ["x", "x"] + + csv_rows.append(row_contents) + + writer.writerows(csv_rows) + csv_data = buff.getvalue() + buff.close() + + file_name = group + "_traits.csv" + file_list.append([file_name, csv_data]) + + return file_list + + +def sort_traits_by_group(trait_list=[]): + traits_by_group = {} + for trait in trait_list: + if trait.dataset.group.name not in list(traits_by_group.keys()): + traits_by_group[trait.dataset.group.name] = [] + + traits_by_group[trait.dataset.group.name].append(trait) + + return traits_by_group |