import csv import datetime import io import itertools import re import xlsxwriter from pprint import pformat as pf from zipfile import ZipFile, ZIP_DEFLATED import simplejson as json from gn3.computations.gemma import generate_hash_of_string from gn2.base.trait import create_trait, retrieve_trait_info def export_traits(targs, export_type): if export_type == "collection": return export_collection(targs) else: return export_traitlist(targs) def export_collection(targs): table_data = json.loads(targs['export_data']) table_rows = table_data['rows'] buff = io.StringIO() writer = csv.writer(buff) now = datetime.datetime.now() time_str = now.strftime('%H:%M (UTC) %m/%d/%y') metadata_rows = [ ["# Collection Name: " + targs['collection_name_export']], ["# User E-mail: " + targs['user_email_export']], ["# Time/Date: " + time_str] ] for row in metadata_rows: writer.writerow(row) for trait in table_rows: writer.writerow([trait]) csv_data = buff.getvalue() buff.close() if 'collection_name_export' in targs: file_name = re.sub('\s+', '_', targs['collection_name_export']) # replace whitespace with underscore else: file_name = generate_hash_of_string("".join(table_rows)) return [file_name, csv_data] def export_traitlist(targs): table_data = json.loads(targs['export_data']) table_rows = table_data['rows'] now = datetime.datetime.now() time_str = now.strftime('%H:%M_%d%B%Y') if 'file_name' in targs: zip_file_name = targs['file_name'] + "_export_" + time_str else: zip_file_name = "export_" + time_str metadata = [] if 'database_name' in targs: if targs['database_name'] != "None": metadata.append(["Data Set: " + targs['database_name']]) if 'accession_id' in targs: if targs['accession_id'] != "None": metadata.append( ["Metadata Link: http://genenetwork.org/webqtl/main.py?FormID=sharinginfo&GN_AccessionId=" + targs['accession_id']]) metadata.append( ["Export Date: " + datetime.datetime.now().strftime("%B %d, %Y")]) metadata.append( ["Export Time: " + datetime.datetime.now().strftime("%H:%M GMT")]) if 'search_string' in targs: if targs['search_string'] != "None": metadata.append(["Search Query: " + targs['search_string']]) if 'filter_term' in targs: if targs['filter_term'] != "None": metadata.append(["Search Filter Terms: " + targs['filter_term']]) metadata.append(["Exported Row Number: " + str(len(table_rows))]) metadata.append(["Funding for The GeneNetwork: NIGMS (R01 GM123489, 2017-2026), NIDA (P30 DA044223, 2017-2022), NIA (R01AG043930, 2013-2018), NIAAA (U01 AA016662, U01 AA013499, U24 AA013513, U01 AA014425, 2006-2017), NIDA/NIMH/NIAAA (P20-DA 21131, 2001-2012), NCI MMHCC (U01CA105417), NCRR/BIRN (U24 RR021760)"]) metadata.append([]) trait_list = [] for trait in table_rows: trait_name, dataset_name, _hash = trait.split(":") trait_ob = create_trait(name=trait_name, dataset_name=dataset_name) trait_ob = retrieve_trait_info( trait_ob, trait_ob.dataset, get_qtl_info=True) trait_list.append(trait_ob) table_headers = ['Index', 'URL', 'Species', 'Group', 'Dataset', 'Record ID', 'Symbol', 'Description', 'ProbeTarget', 'PubMed_ID', 'Chr', 'Mb', 'Alias', 'Gene_ID', 'Homologene_ID', 'UniGene_ID', 'Strand_Probe', 'Probe_set_specificity', 'Probe_set_BLAT_score', 'Probe_set_BLAT_Mb_start', 'Probe_set_BLAT_Mb_end', 'QTL_Chr', 'QTL_Mb', 'Locus_at_Peak', 'Max_LRS', 'P_value_of_MAX', 'Mean_Expression'] traits_by_group = sort_traits_by_group(trait_list) file_list = [] for group in traits_by_group: group_traits = traits_by_group[group] samplelist = group_traits[0].dataset.group.all_samples_ordered() if not samplelist: continue buff = io.StringIO() writer = csv.writer(buff) csv_rows = [] sample_headers = [] for sample in samplelist: sample_headers.append(sample) sample_headers.append(sample + "_SE") full_headers = table_headers + sample_headers for metadata_row in metadata: writer.writerow(metadata_row) csv_rows.append(full_headers) for i, trait in enumerate(group_traits): if getattr(trait, "symbol", None): trait_symbol = getattr(trait, "symbol") elif getattr(trait, "abbreviation", None): trait_symbol = getattr(trait, "abbreviation") else: trait_symbol = "N/A" row_contents = [ i + 1, "https://genenetwork.org/show_trait?trait_id=" + \ str(trait.name) + "&dataset=" + str(trait.dataset.name), trait.dataset.group.species, trait.dataset.group.name, trait.dataset.name, trait.name, trait_symbol, getattr(trait, "description_display", "N/A"), getattr(trait, "probe_target_description", "N/A"), getattr(trait, "pubmed_id", "N/A"), getattr(trait, "chr", "N/A"), getattr(trait, "mb", "N/A"), trait.alias_fmt, getattr(trait, "geneid", "N/A"), getattr(trait, "homologeneid", "N/A"), getattr(trait, "unigeneid", "N/A"), getattr(trait, "strand_probe", "N/A"), getattr(trait, "probe_set_specificity", "N/A"), getattr(trait, "probe_set_blat_score", "N/A"), getattr(trait, "probe_set_blat_mb_start", "N/A"), getattr(trait, "probe_set_blat_mb_end", "N/A"), getattr(trait, "locus_chr", "N/A"), getattr(trait, "locus_mb", "N/A"), getattr(trait, "locus", "N/A"), getattr(trait, "lrs", "N/A"), getattr(trait, "pvalue", "N/A"), getattr(trait, "mean", "N/A") ] for sample in samplelist: if sample in trait.data: row_contents += [trait.data[sample].value, trait.data[sample].variance] else: row_contents += ["x", "x"] csv_rows.append(row_contents) writer.writerows(csv_rows) csv_data = buff.getvalue() buff.close() file_name = group + "_traits.csv" file_list.append([file_name, csv_data]) return file_list def sort_traits_by_group(trait_list=[]): traits_by_group = {} for trait in trait_list: if trait.dataset.group.name not in list(traits_by_group.keys()): traits_by_group[trait.dataset.group.name] = [] traits_by_group[trait.dataset.group.name].append(trait) return traits_by_group