diff options
author | Hao Chen | 2019-05-20 05:47:50 -0500 |
---|---|---|
committer | Hao Chen | 2019-05-20 05:47:50 -0500 |
commit | d87ecb28847fc6c3d8b93673f1f0371b77ee22ee (patch) | |
tree | 8d1404e617fcffc7c4264cf6db3459e790829c46 | |
parent | d883f2e9e6151d0bc5c1e25924253221312b4959 (diff) | |
download | genecup-d87ecb28847fc6c3d8b93673f1f0371b77ee22ee.tar.gz |
add EBI GWAS
-rw-r--r-- | process_gwas.py | 48 | ||||
-rwxr-xr-x | ratspub.py | 32 | ||||
-rwxr-xr-x | server.py | 33 | ||||
-rw-r--r-- | templates/index.html | 4 | ||||
-rw-r--r-- | templates/progress.html | 6 |
5 files changed, 92 insertions, 31 deletions
diff --git a/process_gwas.py b/process_gwas.py new file mode 100644 index 0000000..eba59c0 --- /dev/null +++ b/process_gwas.py @@ -0,0 +1,48 @@ +import re + +with open("./addiction_gwas.tsv", "r") as f: + for line in f: + try: + (pmid, trait0, gene0, gene1, snp, pval, trait1)=line.strip().split("\t") + except: + next + key1="unassigned" + key2="unassigned" + trait=trait0+"; "+trait1 + genes=gene0+";"+gene1 + if re.search('cocaine', trait, flags=re.I): + key1="addiction" + key2="cocaine" + elif re.search('smoking|congestive|nicotine', trait, flags=re.I): + key1="addiction" + key2="nicotine" + elif re.search('opioid|morphin|heroin|methadone', trait, flags=re.I): + key1="addiction" + key2="opioid" + elif re.search('amphetam', trait, flags=re.I): + key1="addiction" + key2="amphetamine" + elif re.search('canabis', trait, flags=re.I): + key1="addiction" + key2="canabis" + elif re.search('food', trait, flags=re.I): + key1="addiction" + key2="food" + elif re.search('alcohol', trait, flags=re.I): + key1="addiction" + key2="alcohol" + elif re.search('addiction|abuse', trait, flags=re.I): + key1="addiction" + key2="addiction" + else: + key1="behavior" + key2="psychiatric" + genes=genes.replace(" - ", ";") + genes=genes.replace(",", ";") + printed=dict() + for gene in genes.split(";"): + gene=gene.replace(" ","") + if gene !="NR" and gene not in printed: + text="SNP:<b>"+snp+"</b>, P value: <b>"+pval+"</b>, Disease/trait:<b> "+trait0+"</b>, Mapped trait:<b> "+trait1+"</b>" + print (gene+"\t"+"GWAS"+"\t"+key2+"_GWAS\t"+pmid+"\t"+text) + printed[gene]=1 @@ -65,13 +65,43 @@ def generate_edges(data, filename): json0+="{ data: { id: '" + edgeID + "', source: '" + source + "', target: '" + target + "', sentCnt: " + str(edgeCnts[edgeID]) + ", url:'/sentences?edgeID=" + edgeID + "' } },\n" return(json0) +def searchArchived(sets, query): + if sets=='topGene': + dataFile="topGene_addiction_sentences.tab" + nodes= "{ data: { id: '" + query + "', nodecolor: '" + "#2471A3" + "', fontweight:700, url:'/progress?query="+query+"' } },\n" + elif sets=='gwas': + dataFile="gwas_addiction.tab" + nodes=str() + with open(dataFile, "r") as sents: + catCnt={} + for sent in sents: + (symb, cat0, cat1, pmid, sent)=sent.split("\t") + if (symb.upper() == query.upper()) : + if cat1 in catCnt.keys(): + catCnt[cat1]+=1 + else: + catCnt[cat1]=1 + nodes= "{ data: { id: '" + query + "', nodecolor: '" + "#2471A3" + "', fontweight:700, url:'/progress?query="+query+"' } },\n" + edges=str() + for key in catCnt.keys(): + if sets=='gwas': + nc=nodecolor["gwas"] + elif key in drug_d.keys(): + nc=nodecolor["drug"] + else: + nc=nodecolor["addiction"] + nodes += "{ data: { id: '" + key + "', nodecolor: '" + nc + "', nodetype: 'top150', url:'/shownode?node="+key+"' } },\n" + edgeID=dataFile+"|"+query+"|"+key + edges+="{ data: { id: '" + edgeID+ "', source: '" + query + "', target: '" + key + "', sentCnt: " + str(catCnt[key]) + ", url:'/sentences?edgeID=" + edgeID + "' } },\n" + return(nodes+edges) + # brain region has too many short acronyms to just use the undic function, so search PubMed using the following brain_query_term="cortex|accumbens|striatum|amygadala|hippocampus|tegmental|mesolimbic|infralimbic|prelimbic|habenula" function=undic(function_d) addiction=undic(addiction_d) drug=undic(drug_d) -nodecolor={'function':"#A9CCE3", 'addiction': "#D7BDE2", 'drug': "#F9E79F", 'brain':"#A3E4D7"} +nodecolor={'function':"#A9CCE3", 'addiction': "#D7BDE2", 'drug': "#F9E79F", 'brain':"#A3E4D7", 'gwas':"#AEB6BF"} #https://htmlcolorcodes.com/ n0=generate_nodes(function_d, 'function') n1=generate_nodes(addiction_d, 'addiction') @@ -78,7 +78,9 @@ def search(): sent3=gene_category(gene, brain_d, brain_query_term, "brain") progress+=percent e3=generate_edges(sent3, tf_name) - geneEdges=e0+e1+e2+e3 + # gwas + e4=searchArchived('gwas', gene) + geneEdges=e0+e1+e2+e3+e4 if len(geneEdges) >1: edges+=geneEdges nodes+="{ data: { id: '" + gene + "', nodecolor:'#E74C3C', fontweight:700, url:'/gene_gene?gene="+gene+"'} },\n" @@ -119,36 +121,17 @@ def sentences(): for sent in all_sents.split("\n"): if len(sent.strip())!=0: (gene,nouse,cat, pmid, text)=sent.split("\t") - if (gene == gene0 and cat == cat0) : + if (gene.upper() == gene0.upper() and cat.upper() == cat0.upper()) : out+= "<li> "+ text + " <a href=\"https://www.ncbi.nlm.nih.gov/pubmed/?term=" + pmid +"\" target=_new>PMID:"+pmid+"<br></a>" return render_template('sentences.html', sentences="<ol>"+out+"</ol><p>") ## show the cytoscape graph for one gene from the top gene list @app.route("/showTopGene") def showTopGene(): - topGene=request.args.get('topGene') - topGeneSentFile="topGene_addiction_sentences.tab" - with open(topGeneSentFile, "r") as sents: - catCnt={} - for sent in sents: - (symb, cat0, cat1, pmid, sent)=sent.split("\t") - if (symb == topGene) : - if cat1 in catCnt.keys(): - catCnt[cat1]+=1 - else: - catCnt[cat1]=1 - nodes= "{ data: { id: '" + topGene + "', nodecolor: '" + "#2471A3" + "', fontweight:700, url:'/progress?query="+topGene+"' } },\n" - edges=str() - for key in catCnt.keys(): - if ( key in drug_d.keys()): - nc=nodecolor["drug"] - else: - nc=nodecolor["addiction"] - nodes += "{ data: { id: '" + key + "', nodecolor: '" + nc + "', nodetype: 'top150', url:'/shownode?node="+key+"' } },\n" - edgeID=topGeneSentFile+"|"+topGene+"|"+key - edges+="{ data: { id: '" + edgeID+ "', source: '" + topGene + "', target: '" + key + "', sentCnt: " + str(catCnt[key]) + ", url:'/sentences?edgeID=" + edgeID + "' } },\n" - message2="<li><strong>"+topGene + "</strong> is one of the top addiction genes. <li> An archived search is shown. Click on the blue circle to update the results and include keywords for brain region and gene function. <strong> The update may take a long time to finish.</strong> " - return render_template("cytoscape.html", elements=nodes+edges, message="Top addiction genes", message2=message2) + query=request.args.get('topGene') + nodesEdges=searchArchived('topGene',query) + message2="<li><strong>"+query + "</strong> is one of the top addiction genes. <li> An archived search is shown. Click on the blue circle to update the results and include keywords for brain region and gene function. <strong> The update may take a long time to finish.</strong> " + return render_template("cytoscape.html", elements=nodesEdges, message="Top addiction genes", message2=message2) @app.route("/shownode") def shownode(): diff --git a/templates/index.html b/templates/index.html index f5b6aea..bbdebfa 100644 --- a/templates/index.html +++ b/templates/index.html @@ -8,9 +8,9 @@ <table> <tr><td > <p> - RatsPub searches PubMed to find <i>sentences</i> that contain the query terms (i.e., gene symbols) and <a href="https://github.com/chen42/RatsPub/blob/master/ratspub_keywords.py">drug addiction-related keywords</a>. These gene-keyword relationships are presented as an interactive graph that can efficiently answer the question <b>"What do we know about these genes and addiction?" </b> + RatsPub searches PubMed to find <i>sentences</i> that contain the query terms (i.e., gene symbols) and <a href="https://github.com/chen42/RatsPub/blob/master/ratspub_keywords.py">drug addiction-related keywords</a>. These gene-keyword relationships are presented as an interactive graph that can efficiently answer the question <b>"What do we know about these genes and addiction?". </b> To better answer this question, data from <a href="https://www.ebi.ac.uk/gwas/">EBI GWAS catalog </a>are also included in the search. -<p> In addition, clicking gene names in the graph will launch a new search for sentences containing the target gene and 200 addiction-related genes. These results help to answer the question <b>"Are there genes that can link my gene of interest to addiction?" </b> +<p> The graph has many interactive elements. For example, clicking gene names will launch a new search for sentences containing the target gene and 200 addiction-related genes. These results help to answer the question <b>"Are there genes that can link my gene of interest to addiction?" </b> </td> <td > <div class='img'><img src="/static/ratspub.png" class="img-fluid"> diff --git a/templates/progress.html b/templates/progress.html index 081f1d9..e881a90 100644 --- a/templates/progress.html +++ b/templates/progress.html @@ -6,9 +6,9 @@ <script> -var r = Math.floor(Math.random() * Math.floor(125)); -var g = Math.floor(Math.random() * Math.floor(125)); -var b = Math.floor(Math.random() * Math.floor(125)); +var r = Math.floor(Math.random() * Math.floor(100)); +var g = Math.floor(Math.random() * Math.floor(100)); +var b = Math.floor(Math.random() * Math.floor(100)); document.body.style.backgroundColor = 'rgba('+r+','+g+','+b+',0.1)' var source = new EventSource("/search"); |