From d87ecb28847fc6c3d8b93673f1f0371b77ee22ee Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Mon, 20 May 2019 05:47:50 -0500 Subject: add EBI GWAS --- process_gwas.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ ratspub.py | 32 +++++++++++++++++++++++++++++++- server.py | 33 ++++++++------------------------- templates/index.html | 4 ++-- templates/progress.html | 6 +++--- 5 files changed, 92 insertions(+), 31 deletions(-) create mode 100644 process_gwas.py diff --git a/process_gwas.py b/process_gwas.py new file mode 100644 index 0000000..eba59c0 --- /dev/null +++ b/process_gwas.py @@ -0,0 +1,48 @@ +import re + +with open("./addiction_gwas.tsv", "r") as f: + for line in f: + try: + (pmid, trait0, gene0, gene1, snp, pval, trait1)=line.strip().split("\t") + except: + next + key1="unassigned" + key2="unassigned" + trait=trait0+"; "+trait1 + genes=gene0+";"+gene1 + if re.search('cocaine', trait, flags=re.I): + key1="addiction" + key2="cocaine" + elif re.search('smoking|congestive|nicotine', trait, flags=re.I): + key1="addiction" + key2="nicotine" + elif re.search('opioid|morphin|heroin|methadone', trait, flags=re.I): + key1="addiction" + key2="opioid" + elif re.search('amphetam', trait, flags=re.I): + key1="addiction" + key2="amphetamine" + elif re.search('canabis', trait, flags=re.I): + key1="addiction" + key2="canabis" + elif re.search('food', trait, flags=re.I): + key1="addiction" + key2="food" + elif re.search('alcohol', trait, flags=re.I): + key1="addiction" + key2="alcohol" + elif re.search('addiction|abuse', trait, flags=re.I): + key1="addiction" + key2="addiction" + else: + key1="behavior" + key2="psychiatric" + genes=genes.replace(" - ", ";") + genes=genes.replace(",", ";") + printed=dict() + for gene in genes.split(";"): + gene=gene.replace(" ","") + if gene !="NR" and gene not in printed: + text="SNP:"+snp+", P value: "+pval+", Disease/trait: "+trait0+", Mapped trait: "+trait1+"" + print (gene+"\t"+"GWAS"+"\t"+key2+"_GWAS\t"+pmid+"\t"+text) + printed[gene]=1 diff --git a/ratspub.py b/ratspub.py index 0cc5d8a..e9a911b 100755 --- a/ratspub.py +++ b/ratspub.py @@ -65,13 +65,43 @@ def generate_edges(data, filename): json0+="{ data: { id: '" + edgeID + "', source: '" + source + "', target: '" + target + "', sentCnt: " + str(edgeCnts[edgeID]) + ", url:'/sentences?edgeID=" + edgeID + "' } },\n" return(json0) +def searchArchived(sets, query): + if sets=='topGene': + dataFile="topGene_addiction_sentences.tab" + nodes= "{ data: { id: '" + query + "', nodecolor: '" + "#2471A3" + "', fontweight:700, url:'/progress?query="+query+"' } },\n" + elif sets=='gwas': + dataFile="gwas_addiction.tab" + nodes=str() + with open(dataFile, "r") as sents: + catCnt={} + for sent in sents: + (symb, cat0, cat1, pmid, sent)=sent.split("\t") + if (symb.upper() == query.upper()) : + if cat1 in catCnt.keys(): + catCnt[cat1]+=1 + else: + catCnt[cat1]=1 + nodes= "{ data: { id: '" + query + "', nodecolor: '" + "#2471A3" + "', fontweight:700, url:'/progress?query="+query+"' } },\n" + edges=str() + for key in catCnt.keys(): + if sets=='gwas': + nc=nodecolor["gwas"] + elif key in drug_d.keys(): + nc=nodecolor["drug"] + else: + nc=nodecolor["addiction"] + nodes += "{ data: { id: '" + key + "', nodecolor: '" + nc + "', nodetype: 'top150', url:'/shownode?node="+key+"' } },\n" + edgeID=dataFile+"|"+query+"|"+key + edges+="{ data: { id: '" + edgeID+ "', source: '" + query + "', target: '" + key + "', sentCnt: " + str(catCnt[key]) + ", url:'/sentences?edgeID=" + edgeID + "' } },\n" + return(nodes+edges) + # brain region has too many short acronyms to just use the undic function, so search PubMed using the following brain_query_term="cortex|accumbens|striatum|amygadala|hippocampus|tegmental|mesolimbic|infralimbic|prelimbic|habenula" function=undic(function_d) addiction=undic(addiction_d) drug=undic(drug_d) -nodecolor={'function':"#A9CCE3", 'addiction': "#D7BDE2", 'drug': "#F9E79F", 'brain':"#A3E4D7"} +nodecolor={'function':"#A9CCE3", 'addiction': "#D7BDE2", 'drug': "#F9E79F", 'brain':"#A3E4D7", 'gwas':"#AEB6BF"} #https://htmlcolorcodes.com/ n0=generate_nodes(function_d, 'function') n1=generate_nodes(addiction_d, 'addiction') diff --git a/server.py b/server.py index 62a959e..7bdd3f2 100755 --- a/server.py +++ b/server.py @@ -78,7 +78,9 @@ def search(): sent3=gene_category(gene, brain_d, brain_query_term, "brain") progress+=percent e3=generate_edges(sent3, tf_name) - geneEdges=e0+e1+e2+e3 + # gwas + e4=searchArchived('gwas', gene) + geneEdges=e0+e1+e2+e3+e4 if len(geneEdges) >1: edges+=geneEdges nodes+="{ data: { id: '" + gene + "', nodecolor:'#E74C3C', fontweight:700, url:'/gene_gene?gene="+gene+"'} },\n" @@ -119,36 +121,17 @@ def sentences(): for sent in all_sents.split("\n"): if len(sent.strip())!=0: (gene,nouse,cat, pmid, text)=sent.split("\t") - if (gene == gene0 and cat == cat0) : + if (gene.upper() == gene0.upper() and cat.upper() == cat0.upper()) : out+= "
  • "+ text + " PMID:"+pmid+"
    " return render_template('sentences.html', sentences="
      "+out+"

    ") ## show the cytoscape graph for one gene from the top gene list @app.route("/showTopGene") def showTopGene(): - topGene=request.args.get('topGene') - topGeneSentFile="topGene_addiction_sentences.tab" - with open(topGeneSentFile, "r") as sents: - catCnt={} - for sent in sents: - (symb, cat0, cat1, pmid, sent)=sent.split("\t") - if (symb == topGene) : - if cat1 in catCnt.keys(): - catCnt[cat1]+=1 - else: - catCnt[cat1]=1 - nodes= "{ data: { id: '" + topGene + "', nodecolor: '" + "#2471A3" + "', fontweight:700, url:'/progress?query="+topGene+"' } },\n" - edges=str() - for key in catCnt.keys(): - if ( key in drug_d.keys()): - nc=nodecolor["drug"] - else: - nc=nodecolor["addiction"] - nodes += "{ data: { id: '" + key + "', nodecolor: '" + nc + "', nodetype: 'top150', url:'/shownode?node="+key+"' } },\n" - edgeID=topGeneSentFile+"|"+topGene+"|"+key - edges+="{ data: { id: '" + edgeID+ "', source: '" + topGene + "', target: '" + key + "', sentCnt: " + str(catCnt[key]) + ", url:'/sentences?edgeID=" + edgeID + "' } },\n" - message2="

  • "+topGene + " is one of the top addiction genes.
  • An archived search is shown. Click on the blue circle to update the results and include keywords for brain region and gene function. The update may take a long time to finish. " - return render_template("cytoscape.html", elements=nodes+edges, message="Top addiction genes", message2=message2) + query=request.args.get('topGene') + nodesEdges=searchArchived('topGene',query) + message2="
  • "+query + " is one of the top addiction genes.
  • An archived search is shown. Click on the blue circle to update the results and include keywords for brain region and gene function. The update may take a long time to finish. " + return render_template("cytoscape.html", elements=nodesEdges, message="Top addiction genes", message2=message2) @app.route("/shownode") def shownode(): diff --git a/templates/index.html b/templates/index.html index f5b6aea..bbdebfa 100644 --- a/templates/index.html +++ b/templates/index.html @@ -8,9 +8,9 @@

    - RatsPub searches PubMed to find sentences that contain the query terms (i.e., gene symbols) and drug addiction-related keywords. These gene-keyword relationships are presented as an interactive graph that can efficiently answer the question "What do we know about these genes and addiction?" + RatsPub searches PubMed to find sentences that contain the query terms (i.e., gene symbols) and drug addiction-related keywords. These gene-keyword relationships are presented as an interactive graph that can efficiently answer the question "What do we know about these genes and addiction?". To better answer this question, data from EBI GWAS catalog are also included in the search. -

    In addition, clicking gene names in the graph will launch a new search for sentences containing the target gene and 200 addiction-related genes. These results help to answer the question "Are there genes that can link my gene of interest to addiction?" +

    The graph has many interactive elements. For example, clicking gene names will launch a new search for sentences containing the target gene and 200 addiction-related genes. These results help to answer the question "Are there genes that can link my gene of interest to addiction?"

    diff --git a/templates/progress.html b/templates/progress.html index 081f1d9..e881a90 100644 --- a/templates/progress.html +++ b/templates/progress.html @@ -6,9 +6,9 @@