#!/bin/env python3 from flask import Flask, render_template, request, session, Response, redirect, url_for, flash from flask_sqlalchemy import SQLAlchemy import json import shutil from flask import jsonify from datetime import datetime import bcrypt import tempfile import random import string from ratspub import * import time import os import re import pytz app=Flask(__name__) app.config['SECRET_KEY'] = '#DtfrL98G5t1dC*4' app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///userspub.sqlite' db = SQLAlchemy(app) datadir="/export/ratspub/" # the sqlite database class users(db.Model): __tablename__='user' id = db.Column(db.Integer, primary_key=True) name = db.Column(db.String(80), nullable=False) email = db.Column(db.String(80), unique=True, nullable=False) password = db.Column(db.String(128), nullable=False) date_created = db.Column(db.DateTime, default=datetime.utcnow) @app.route("/") def root(): return render_template('index.html') @app.route("/login", methods=["POST", "GET"]) def login(): email = None if request.method == "POST": email = request.form['email'] password = request.form['password'] found_user = users.query.filter_by(email=email).first() if (found_user and (bcrypt.checkpw(password.encode('utf8'), found_user.password))): session['email'] = found_user.email session['name'] = found_user.name session['id'] = found_user.id else: flash("Invalid username or password!", "loginout") return render_template('signup.html') flash("Login Succesful!", "loginout") return render_template('index.html') @app.route("/signup", methods=["POST", "GET"]) def signup(): if request.method == "POST": name = request.form['name'] email = request.form['email'] password = request.form['password'] found_user = users.query.filter_by(email=email).first() if (found_user and (bcrypt.checkpw(password.encode('utf8'), found_user.password)==False)): flash("Already registered, but wrong password!", "loginout") return render_template('signup.html') session['email'] = email session['name'] = name password = bcrypt.hashpw(password.encode('utf8'), bcrypt.gensalt()) user = users(name=name, email=email, password = password) if found_user: session['email'] = found_user.email session['id'] = found_user.id found_user.name = name db.session.commit() else: db.session.add(user) db.session.commit() newuser = users.query.filter_by(email=session['email']).first() session['id'] = newuser.id flash("Login Succesful!", "loginout") return render_template('index.html') else: if 'email' in session: flash("Already Logged In!") return render_template('index.html') return render_template('signup.html') @app.route("/signin", methods=["POST", "GET"]) def signin(): email = None if request.method == "POST": email = request.form['email'] password = request.form['password'] found_user = users.query.filter_by(email=email).first() if (found_user and (bcrypt.checkpw(password.encode('utf8'), found_user.password))): session['email'] = found_user.email session['name'] = found_user.name session['id'] = found_user.id flash("Login Succesful!", "loginout") return render_template('index.html') else: flash("Invalid username or password!", "loginout") return render_template('signup.html') return render_template('signin.html') # change password @app.route("/", methods=["POST", "GET"]) def profile(nm_passwd): try: if "_" in str(nm_passwd): user_name = str(nm_passwd).split("_")[0] user_passwd = str(nm_passwd).split("_")[1] user_passwd = "b\'"+user_passwd+"\'" found_user = users.query.filter_by(name=user_name).first() if request.method == "POST": password = request.form['password'] session['email'] = found_user.email session['name'] = found_user.name session['id'] = found_user.id password = bcrypt.hashpw(password.encode('utf8'), bcrypt.gensalt()) found_user.password = password db.session.commit() flash("Your password is changed!", "loginout") return render_template('index.html') # remove reserved characters from the hashed passwords reserved = (";", "/", "?", ":", "@", "=", "&", ".") def replace_reserved(fullstring): for replace_str in reserved: fullstring = fullstring.replace(replace_str,"") return fullstring replaced_passwd = replace_reserved(str(found_user.password)) if replaced_passwd == user_passwd: return render_template("/passwd_change.html", name=user_name) else: return "This url does not exist" else: return "This url does not exist" except (AttributeError): return "This url does not exist" @app.route("/logout") def logout(): if 'email' in session: global user1 if session['name'] != '': user1 = session['name'] else: user1 = session['email'] flash("You have been logged out, {user1}", "loginout") session.pop('email', None) session.clear() return render_template('index.html') @app.route("/about") def about(): return render_template('about.html') @app.route('/progress') def progress(): #get the type from checkbox search_type = request.args.getlist('type') if (search_type == []): search_type = ['GWAS', 'function', 'addiction', 'drug', 'brain', 'stress', 'psychiatric'] session['search_type'] = search_type # only 1-100 terms are allowed genes=request.args.get('query') genes=genes.replace(",", " ") genes=genes.replace(";", " ") genes=re.sub(r'\bLOC\d*?\b', "", genes, flags=re.I) genes=genes.split() if len(genes)>=100: message="Up to 100 terms can be searched at a time" return render_template('index.html', message=message) elif len(genes)==0: message="Please enter a search term " return render_template('index.html', message=message) tf_path=tempfile.gettempdir() session['path']=tf_path+"/tmp" + ''.join(random.choice(string.ascii_letters) for x in range(6)) # put the query in session cookie session['query']=genes return render_template('progress.html', url_in="search", url_out="cytoscape") @app.route("/search") def search(): genes=session['query'] genes_for_folder_name ="" if len(genes) == 1: marker = "" genes_for_folder_name =str(genes[0]) elif len(genes) == 2: marker = "" genes_for_folder_name =str(genes[0])+"_"+str(genes[1]) elif len(genes) == 3: marker = "" genes_for_folder_name =str(genes[0])+"_"+str(genes[1])+"_"+str(genes[2]) else: genes_for_folder_name =str(genes[0])+"_"+str(genes[1])+"_"+str(genes[2]) marker="_m" # generate a unique session ID depending on timestamp to track the results timestamp = datetime.utcnow().replace(microsecond=0) timestamp = timestamp.replace(tzinfo=pytz.utc) timestamp = timestamp.astimezone(pytz.timezone("America/Chicago")) session['timestamp'] = timestamp timeextension = str(timestamp) timeextension = timeextension.replace(':', '_') timeextension = timeextension.replace('-', '_') timeextension = timeextension.replace(' ', '_') timeextension = timeextension.replace('_06_00', '') user_login=0 #create a folder for the search if ('email' in session): user_login=1 os.makedirs("./user/"+str(session['email']+"/"+timeextension+"_0_"+genes_for_folder_name+marker),exist_ok=True) session['user_folder'] = "./user/"+str(session['email']) user_folder=session['user_folder'] session['path'] = "./user/"+str(session['email'])+"/"+timeextension+"_0_"+genes_for_folder_name+marker+"/"+timeextension percent=round(100/(len(genes)*6),1) # 6 categories snt_file=session['path']+"_snt" cysdata=open(session['path']+"_cy","w+") sntdata=open(snt_file,"w+") zeroLinkNode=open(session['path']+"_0link","w+") search_type = session['search_type'] #consider the types got from checkbox temp_nodes = "" json_nodes = "{\"data\":[" if ("function" in search_type): temp_nodes += n0 json_nodes += nj0 if ("addiction" in search_type): temp_nodes += n1 json_nodes += nj1 if ("drug" in search_type): temp_nodes += n2 json_nodes += nj2 if ("brain" in search_type): temp_nodes += n3 json_nodes += nj3 if ("stress" in search_type): temp_nodes += n4 json_nodes += nj4 if ("psychiatric" in search_type): temp_nodes += n5 json_nodes += nj5 if ("GWAS" in search_type): temp_nodes += n6 json_nodes += nj6 json_nodes = json_nodes[:-2] json_nodes =json_nodes+"]}" def generate(genes, tf_name): sentences=str() edges=str() nodes = temp_nodes progress=0 searchCnt=0 nodesToHide=str() json_edges = str() for gene in genes: gene=gene.replace("-"," ") # report progress immediately progress+=percent yield "data:"+str(progress)+"\n\n" #addiction terms must present with at least one drug addiction=undic(addiction_d) +") AND ("+undic(drug_d) sent0=gene_category(gene, addiction_d, addiction, "addiction") e0=generate_edges(sent0, tf_name) ej0=generate_edges_json(sent0, tf_name) # drug drug=undic(drug_d) sent1=gene_category(gene, drug_d, drug, "drug") progress+=percent yield "data:"+str(progress)+"\n\n" e1=generate_edges(sent1, tf_name) ej1=generate_edges_json(sent1, tf_name) # function function=undic(function_d) sent2=gene_category(gene, function_d, function, "function") progress+=percent yield "data:"+str(progress)+"\n\n" e2=generate_edges(sent2, tf_name) ej2=generate_edges_json(sent2, tf_name) # brain has its own query terms that does not include the many short acronyms sent3=gene_category(gene, brain_d, brain_query_term, "brain") progress+=percent e3=generate_edges(sent3, tf_name) ej3=generate_edges_json(sent3, tf_name) # stress stress=undic(stress_d) sent4=gene_category(gene, stress_d, stress, "stress") progress+=percent yield "data:"+str(progress)+"\n\n" e4=generate_edges(sent4, tf_name) ej4=generate_edges_json(sent4, tf_name) # psychiatric psychiatric=undic(psychiatric_d) sent5=gene_category(gene, psychiatric_d, psychiatric, "psychiatric") progress+=percent yield "data:"+str(progress)+"\n\n" e5=generate_edges(sent5, tf_name) ej5=generate_edges_json(sent5, tf_name) # GWAS e6=searchArchived('GWAS', gene, 'cys') ej6=searchArchived('GWAS', gene , 'json') #consider the types got from checkbox geneEdges = "" if ("addiction" in search_type): geneEdges += e0 json_edges += ej0 if ("drug" in search_type): geneEdges += e1 json_edges += ej1 if ("function" in search_type): geneEdges += e2 json_edges += ej2 if ("brain" in search_type): geneEdges += e3 json_edges += ej3 if ("stress" in search_type): geneEdges += e4 json_edges += ej4 if ("psychiatric" in search_type): geneEdges += e5 json_edges += ej5 if ("GWAS" in search_type): geneEdges += e6 json_edges += ej6 ## there is a bug here. zero link notes are not excluded anymore if len(geneEdges) >1: edges+=geneEdges nodes+="{ data: { id: '" + gene + "', nodecolor:'#E74C3C', fontweight:700, url:'/startGeneGene?forTopGene="+gene+"'} },\n" else: nodesToHide+=gene + " " sentences+=sent0+sent1+sent2+sent3+sent4+sent5 sent0=None sent1=None sent2=None sent3=None sent4=None sent5=None #save data before the last yield searchCnt+=1 if (searchCnt==len(genes)): progress=100 sntdata.write(sentences) sntdata.close() cysdata.write(nodes+edges) cysdata.close() zeroLinkNode.write(nodesToHide) zeroLinkNode.close() yield "data:"+str(progress)+"\n\n" #edges in json format json_edges="{\"data\":["+json_edges json_edges = json_edges[:-2] json_edges =json_edges+"]}" #write edges to txt file in json format with open("json_edges.txt", 'w') as edgesjson: edgesjson.write(json_edges) #write edges to txt file in json format also in user folder if (user_login == 1): with open(user_folder+"/"+timeextension+"_0_"+genes_for_folder_name+marker+"/json_edges.txt", "w") as temp_file_edges: temp_file_edges.write(json_edges) #write nodes to txt file in json format with open("json_nodes.txt", 'w') as nodesjson: #if (userlogin) == 1: nodesjson.write(json_nodes) #write nodes to txt file in json format also in user folder if ('email' in session): with open("./user/"+str(session['email'])+"/"+timeextension+"_0_"+genes_for_folder_name+marker+"/json_nodes.txt", "w") as temp_file_nodes: temp_file_nodes.write(json_nodes) return Response(generate(genes, snt_file), mimetype='text/event-stream') @app.route("/tableview") def tableview(): with open("json_nodes.txt") as jsonfile: jnodes = json.load(jsonfile) jedges ='' file_edges = open('json_edges.txt', 'r') for line in file_edges.readlines(): if ':' not in line: nodata_temp = 1 else: nodata_temp = 0 with open("json_edges.txt") as edgesjsonfile: jedges = json.load(edgesjsonfile) break genename=session['query'] if len(genename)>3: genename = genename[0:3] added = ",..." else: added = "" gene_name = str(genename)[1:] gene_name=gene_name[:-1] gene_name=gene_name.replace("'","") gene_name = gene_name+added num_gene = gene_name.count(',')+1 message3=" Notes:
  • Click on the abstract count to read sentences linking the keyword and the gene.
  • Click on a gene to search its relations with top 200 addiction genes.
  • Click on a keyword to see the terms included in the search.
  • View the results in a graph." return render_template('tableview.html', nodata_temp=nodata_temp, num_gene=num_gene,session_path = session['path'], jedges=jedges, jnodes=jnodes,gene_name=gene_name, message3=message3) @app.route("/tableview0") def tableview0(): with open("json_nodes.txt") as jsonfile: jnodes = json.load(jsonfile) jedges ='' file_edges = open('json_edges.txt', 'r') for line in file_edges.readlines(): if ':' not in line: nodata_temp = 1 else: nodata_temp = 0 with open("json_edges.txt") as edgesjsonfile: jedges = json.load(edgesjsonfile) break genename=session['query'] if len(genename)>3: genename = genename[0:3] added = ",..." else: added = "" gene_name = str(genename)[1:] gene_name=gene_name[:-1] gene_name=gene_name.replace("'","") gene_name = gene_name+added num_gene = gene_name.count(',')+1 message4=" Notes:
  • These are the keywords that have zero abstract counts.
  • View all the results in a graph." return render_template('tableview0.html',nodata_temp=nodata_temp, num_gene=num_gene,session_path = session['path'], jedges=jedges, jnodes=jnodes,gene_name=gene_name, message4=message4) @app.route("/userarchive") def userarchive(): if os.path.exists("./user/"+str(session['email'])) == False: flash("Search history doesn't exist!") return render_template('index.html') if ('email' in session): session['user_folder'] = "./user/"+str(session['email']) session_id=session['id'] def sorted_alphanumeric(data): convert = lambda text: int(text) if text.isdigit() else text.lower() alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] return sorted(data, key=alphanum_key) dirlist = sorted_alphanumeric(os.listdir(session['user_folder'])) folder_list = [] directory_list = [] gene_list=[] for filename in dirlist: folder_list.append(filename) gene_name = filename.split('_0_')[1] if gene_name[-2:] == '_m': gene_name = gene_name[:-2] gene_name = gene_name + ", ..." gene_name = gene_name.replace('_', ', ') gene_list.append(gene_name) gene_name="" filename=filename[0:4]+"-"+filename[5:7]+"-"+filename[8:13]+":"+filename[14:16]+":"+filename[17:19] directory_list.append(filename) len_dir = len(directory_list) message3=" Note:
  • Click on the Date/Time to view archived results.
  • The Date/Time are based on US Central time zone. " return render_template('userarchive.html', len_dir=len_dir, gene_list = gene_list, folder_list=folder_list, directory_list=directory_list, session_id=session_id, message3=message3) # delete this search @app.route('/remove', methods=['GET', 'POST']) def remove(): remove_folder = request.args.get('remove_folder') shutil.rmtree("./user/"+str(session['email']+"/"+remove_folder), ignore_errors=True) return redirect(url_for('userarchive')) @app.route('/date', methods=['GET', 'POST']) def date(): select_date = request.args.get('selected_date') #open the cache folder for the user tf_path="./user" if ('email' in session): time_extension = str(select_date) time_extension = time_extension.split('_0_')[0] gene_name1 = str(select_date).split('_0_')[1] time_extension = time_extension.replace(':', '_') time_extension = time_extension.replace('-', '_') session['path'] = tf_path+"/"+str(session['email'])+"/"+select_date+"/"+time_extension session['user_folder'] = tf_path+"/"+str(session['email']) else: tf_path=tempfile.gettempdir() session['path']=tf_path+"/tmp" + ''.join(random.choice(string.ascii_letters) for x in range(6)) with open(tf_path+"/"+str(session['email'])+"/"+select_date+"/json_edges.txt", "r") as archive_file: with open("json_edges.txt", "w") as temp_file: for line in archive_file: temp_file.write(line) with open(tf_path+"/"+str(session['email'])+"/"+select_date+"/json_nodes.txt", "r") as archive_file: with open("json_nodes.txt", "w") as temp_file: for line in archive_file: temp_file.write(line) with open("json_nodes.txt", "r") as jsonfile: jnodes = json.load(jsonfile) jedges ='' file_edges = open('json_edges.txt', 'r') for line in file_edges.readlines(): if ':' not in line: nodata_temp = 1 else: nodata_temp = 0 with open("json_edges.txt") as edgesjsonfile: jedges = json.load(edgesjsonfile) break gene_list=[] if nodata_temp == 0: for p in jedges['data']: if p['source'] not in gene_list: gene_list.append(p['source']) if len(gene_list)>3: gene_list = gene_list[0:3] added = ",..." else: added = "" gene_name = str(gene_list)[1:] gene_name=gene_name[:-1] gene_name=gene_name.replace("'","") gene_name = gene_name+added num_gene = gene_name.count(',')+1 else: gene_name1 = gene_name1.replace("_", ", ") gene_name = gene_name1 num_gene = gene_name1.count(',')+1 for i in range(0,num_gene): gene_list.append(gene_name1.split(',')[i]) session['query'] = gene_list message3=" Notes:
  • Click on the keywords to see the indicated number of abstracts
  • Click on a gene to search its relations with top 200 addiction genes
  • Click on a keyword to see the terms included in the search
  • Hover your pointer over a node will hide other links
  • Nodes can be moved around for better visibility, reload the page will restore the original layout
  • View the results in a graph." return render_template('tableview.html', title='',nodata_temp=nodata_temp, date=select_date, num_gene=num_gene,session_path = session['path'], jedges=jedges, jnodes=jnodes,gene_name=gene_name, message3=message3) @app.route('/cytoscape') def cytoscape(): message2=" Notes:
  • Click on a line to see the indicated number of abstracts
  • Click on a gene to search its relations with top 200 addiction genes
  • Click on a keyword to see the terms included in the search
  • Hover your pointer over a node will hide other links
  • Nodes can be moved around for better visibility, reload the page will restore the original layout
  • View the results in a table. " with open(session['path']+"_cy","r") as f: elements=f.read() with open(session['path']+"_0link","r") as z: zeroLink=z.read() if (len(zeroLink)>0): message2+="No result was found for these genes: " + zeroLink + "" return render_template('cytoscape.html', elements=elements, message2=message2) @app.route("/sentences") def sentences(): pmid_list=[] edge=request.args.get('edgeID') (tf_name, gene0, cat0)=edge.split("|") out3="" num_abstract = 0 with open(tf_name, "r") as df: all_sents=df.read() for sent in all_sents.split("\n"): if len(sent.strip())!=0: (gene,nouse,cat, pmid, text)=sent.split("\t") if (gene.upper() == gene0.upper() and cat.upper() == cat0.upper()) : out3+= "
  • "+ text + " PMID:"+pmid+"
    " num_abstract += 1 if(pmid+cat0 not in pmid_list): pmid_list.append(pmid+cat0) out1="

    "+gene0 + " and " + cat0 + "

    \n" if len(pmid_list)>1: out2 = str(num_abstract) + ' sentences in ' + str(len(pmid_list)) + ' studies' "
    \n" else: out2 = str(num_abstract) + ' sentence in ' + str(len(pmid_list)) + ' study' "
    \n" out= out1+ out2 +out3 return render_template('sentences.html', sentences="
      "+out+"

    ") ## show the cytoscape graph for one gene from the top gene list @app.route("/showTopGene") def showTopGene(): query=request.args.get('topGene') nodesEdges=searchArchived('topGene',query, 'cys') message2="

  • "+query + " is one of the top addiction genes.
  • An archived search is shown. Click on the blue circle to update the results and include keywords for brain region and gene function. The update may take a long time to finish. " return render_template("cytoscape.html", elements=nodesEdges, message="Top addiction genes", message2=message2) @app.route("/shownode") def shownode(): node=request.args.get('node') allnodes={**brain_d, **drug_d, **function_d, **addiction_d, **stress_d, **psychiatric_d} out="

    "+node.upper()+"


  • "+ allnodes[node].replace("|", "
  • ") return render_template('sentences.html', sentences=out+"

    ") @app.route("/startGeneGene") def startGeneGene(): session['forTopGene']=request.args.get('forTopGene') return render_template('progress.html', url_in="searchGeneGene", url_out="showGeneTopGene") @app.route("/searchGeneGene") def gene_gene(): tmp_ggPMID=session['path']+"_ggPMID" gg_file=session['path']+"_ggSent" #gene_gene result_file=session['path']+"_ggResult" def generate(query): progress=1 yield "data:"+str(progress)+"\n\n" os.system("esearch -db pubmed -query \"" + query + "\" | efetch -format uid |sort >" + tmp_ggPMID) abstracts=os.popen("comm -1 -2 topGene_uniq.pmid " + tmp_ggPMID + " |fetch-pubmed -path "+pubmed_path+ " | xtract -pattern PubmedArticle -element MedlineCitation/PMID,ArticleTitle,AbstractText|sed \"s/-/ /g\"").read() os.system("rm "+tmp_ggPMID) #abstracts = os.popen("esearch -db pubmed -query " + query + " | efetch -format uid |fetch-pubmed -path "+ pubmed_path + " | xtract -pattern PubmedArticle -element MedlineCitation/PMID,ArticleTitle,AbstractText|sed \"s/-/ /g\"").read() progress=10 yield "data:"+str(progress)+"\n\n" topGenes=dict() out=str() hitGenes=dict() with open("topGene_symb_alias.txt", "r") as top_f: for line in top_f: (symb, alias)=line.strip().split("\t") topGenes[symb]=alias.replace("; ","|") allAbstracts= abstracts.split("\n") abstractCnt=len(allAbstracts) rowCnt=0 for row in allAbstracts: rowCnt+=1 if rowCnt/10==int(rowCnt/10): progress=10+round(rowCnt/abstractCnt,2)*80 yield "data:"+str(progress)+"\n\n" tiab=row.split("\t") pmid = tiab.pop(0) tiab= " ".join(tiab) sentences = sent_tokenize(tiab) ## keep the sentence only if it contains the gene for sent in sentences: if findWholeWord(query)(sent): sent=re.sub(r'\b(%s)\b' % query, r'\1', sent, flags=re.I) for symb in topGenes: allNames=symb+"|"+topGenes[symb] if findWholeWord(allNames)(sent) : sent=sent.replace("","").replace("","") sent=re.sub(r'\b(%s)\b' % allNames, r'\1', sent, flags=re.I) out+=query+"\t"+"gene\t" + symb+"\t"+pmid+"\t"+sent+"\n" if symb in hitGenes.keys(): hitGenes[symb]+=1 else: hitGenes[symb]=1 progress=95 yield "data:"+str(progress)+"\n\n" with open(gg_file, "w+") as gg: gg.write(out) gg.close() results="

    "+query+" vs top addiction genes

    Click on the number of sentences will show those sentences. Click on the top addiction genes will show an archived search for that gene.
    " topGeneHits={} for key in hitGenes.keys(): url=gg_file+"|"+query+"|"+key if hitGenes[key]==1: sentword="sentence" else: sentword="sentences" topGeneHits[ "
  • " + "Show " + str(hitGenes[key]) + " " + sentword +" about "+query+" and "+key+"" ]=hitGenes[key] topSorted = [(k, topGeneHits[k]) for k in sorted(topGeneHits, key=topGeneHits.get, reverse=True)] for k,v in topSorted: results+=k saveResult=open(result_file, "w+") saveResult.write(results) saveResult.close() progress=100 yield "data:"+str(progress)+"\n\n" ## start the run query=session['forTopGene'] return Response(generate(query), mimetype='text/event-stream') @app.route('/showGeneTopGene') def showGeneTopGene (): with open(session['path']+"_ggResult", "r") as result_f: results=result_f.read() return render_template('sentences.html', sentences=results+"


    ") ## generate a page that lists all the top 150 addiction genes with links to cytoscape graph. @app.route("/allTopGenes") def top150genes(): return render_template("topAddictionGene.html") if __name__ == '__main__': db.create_all() app.run(debug=True, port=4206)