diff options
| -rw-r--r-- | old/server_gene_gene.py | 141 | ||||
| -rwxr-xr-x | server.py | 161 |
2 files changed, 141 insertions, 161 deletions
diff --git a/old/server_gene_gene.py b/old/server_gene_gene.py new file mode 100644 index 0000000..a125699 --- /dev/null +++ b/old/server_gene_gene.py @@ -0,0 +1,141 @@ +# Dead code removed from server.py -- gene-gene search routes +# These routes were not referenced from any template. + +@app.route("/startGeneGene") +def startGeneGene(): + session['forTopGene']=request.args.get('forTopGene') + return render_template('progress.html', url_in="searchGeneGene", url_out="showGeneTopGene",version=version()) + + +@app.route("/searchGeneGene") +def gene_gene(): + if 'path' not in session: + if 'email' not in session : + tf_path_gg=tempfile.gettempdir() + rnd_gg = "tmp_gg" + ''.join(random.choice(string.ascii_letters) for x in range(6)) + session['path'] = tf_path_gg + "/" + rnd_gg + os.makedirs(session['path'], exist_ok=True) + else: + if 'path_user' in session: + session['path'] = session['path_user'] + else: + return "Error: User session path not found.", 500 + + + tmp_ggPMID=session['path']+"_ggPMID" + gg_file=session['path']+"_ggSent" + result_file=session['path']+"_ggResult" + + def findWholeWord(w): + return re.compile(r'(?<!\w)({})(?!\w)'.format(w), flags=re.IGNORECASE).search + + def generate(query): + from nltk.tokenize import sent_tokenize + progress=1 + yield "data:"+str(progress)+"\n\n" + safe_query = query.replace("\"", "\\\"") + os.system(f"esearch -db pubmed -query \"{safe_query}\" | efetch -format uid |sort > \"{tmp_ggPMID}\"") + + top_gene_pmid_file = "topGene_uniq.pmid" + if not os.path.exists(top_gene_pmid_file): + print(f"Warning: {top_gene_pmid_file} not found. Gene-gene search might be affected.") + open(top_gene_pmid_file, 'a').close() + + abstracts_cmd = f"comm -1 -2 \"{top_gene_pmid_file}\" \"{tmp_ggPMID}\" | fetch-pubmed -path \"{pubmed_path}\" | xtract -pattern PubmedArticle -element MedlineCitation/PMID,ArticleTitle,AbstractText | sed \"s/-/ /g\"" + try: + abstracts_process = os.popen(abstracts_cmd) + abstracts = abstracts_process.read() + abstracts_process.close() + except Exception as e_abs: + print(f"Error getting abstracts for gene-gene search: {e_abs}") + abstracts = "" + + if os.path.exists(tmp_ggPMID): + os.system(f"rm \"{tmp_ggPMID}\"") + + progress=10 + yield "data:"+str(progress)+"\n\n" + topGenes=dict() + out_str=str() + hitGenes=dict() + + top_gene_alias_file = "topGene_symb_alias.txt" + if os.path.exists(top_gene_alias_file): + with open(top_gene_alias_file, "r") as top_f: + for line in top_f: + parts = line.strip().split("\t") + if len(parts) == 2: + symb, alias = parts + topGenes[symb]=alias.replace("; ","|") + else: + print(f"Warning: {top_gene_alias_file} not found. Top gene list will be empty.") + + allAbstracts= abstracts.split("\n") + abstractCnt=len(allAbstracts) if abstracts else 0 + rowCnt=0 + + for row in allAbstracts: + if not row.strip(): continue + rowCnt+=1 + if abstractCnt > 0 and rowCnt % 10 == 0 : + progress=10+round(rowCnt/abstractCnt,2)*80 + yield "data:"+str(progress)+"\n\n" + + tiab_parts=row.split("\t", 1) + if len(tiab_parts) < 2: continue + pmid = tiab_parts[0] + tiab_text_gg = tiab_parts[1] + + sentences_gg = sent_tokenize(tiab_text_gg) + for sent_item in sentences_gg: + if findWholeWord(query)(sent_item): + sent_item=re.sub(r'\b(%s)\b' % query, r'<strong>\1</strong>', sent_item, flags=re.I) + for symb_item in topGenes: + allNames=symb_item+"|"+topGenes[symb_item] + if findWholeWord(allNames)(sent_item) : + sent_item=sent_item.replace("<b>","").replace("</b>","") + sent_item=re.sub(r'\b(%s)\b' % allNames, r'<b>\1</b>', sent_item, flags=re.I) + out_str+=query+"\t"+"gene\t" + symb_item+"\t"+pmid+"\t"+sent_item+"\n" + if symb_item in hitGenes: + hitGenes[symb_item]+=1 + else: + hitGenes[symb_item]=1 + progress=95 + yield "data:"+str(progress)+"\n\n" + with open(gg_file, "w+") as gg: + gg.write(out_str) + + results_html="<h4>"+query+" vs top addiction genes</h4> Click on the number of sentences will show those sentences. Click on the <span style=\"background-color:#FcF3cf\">top addiction genes</span> will show an archived search for that gene.<hr>" + topGeneHits={} + for key_gene in hitGenes.keys(): + url_gg=gg_file+"|"+query+"|"+key_gene + sentword="sentence" if hitGenes[key_gene]==1 else "sentences" + topGeneHits[ "<li> <a href=/sentences?edgeID=" + url_gg+ " target=_new>" + "Show " + str(hitGenes[key_gene]) + " " + sentword +" </a> about "+query+" and <a href=/showTopGene?topGene="+key_gene+" target=_gene><span style=\"background-color:#FcF3cf\">"+key_gene+"</span></a>" ]=hitGenes[key_gene] + + topSorted = sorted(topGeneHits.items(), key=lambda item: item[1], reverse=True) + + for k_html,v_count in topSorted: + results_html+=k_html + + with open(result_file, "w+") as saveResult: + saveResult.write(results_html) + + progress=100 + yield "data:"+str(progress)+"\n\n" + + query_gene_gene=session.get('forTopGene', '') + if not query_gene_gene: + return Response("Error: No gene query found for gene-gene search.", mimetype='text/event-stream') + return Response(generate(query_gene_gene), mimetype='text/event-stream') + + +@app.route('/showGeneTopGene') +def showGeneTopGene (): + results_content = "<p>No results found.</p>" + result_file_path = session.get('path', '') + "_ggResult" + if result_file_path and os.path.exists(result_file_path): + with open(result_file_path, "r") as result_f: + results_content=result_f.read() + else: + print(f"Warning: Result file {result_file_path} not found for showGeneTopGene.") + return render_template('sentences.html', sentences=results_content+"<p><br>",no_footer=True,version=version()) diff --git a/server.py b/server.py index b1a7404..4ea4891 100755 --- a/server.py +++ b/server.py @@ -1900,167 +1900,6 @@ def synonyms(): return f"An error occurred while processing your request for {node}.", 500 -@app.route("/startGeneGene") -def startGeneGene(): - session['forTopGene']=request.args.get('forTopGene') - return render_template('progress.html', url_in="searchGeneGene", url_out="showGeneTopGene",version=version()) - - -@app.route("/searchGeneGene") -def gene_gene(): - # Ensure session['path'] is set (e.g. from /progress by non-logged-in user) - if 'path' not in session: - # Handle error: session path not set, perhaps redirect or show error - # For now, let's assume it's set by a previous step like /progress - # If it can be called directly, this needs robust handling. - # Quick fix: if not set, create a temporary one, but this might indicate flow issue - if 'email' not in session : # Only create temp path if not logged in and path is missing - tf_path_gg=tempfile.gettempdir() - rnd_gg = "tmp_gg" + ''.join(random.choice(string.ascii_letters) for x in range(6)) - session['path'] = tf_path_gg + "/" + rnd_gg - os.makedirs(session['path'], exist_ok=True) - else: # Logged in user should have path_user from /progress - if 'path_user' in session: - session['path'] = session['path_user'] # Unify to use session['path'] - else: # Critical error if logged in and no path_user - return "Error: User session path not found.", 500 - - - tmp_ggPMID=session['path']+"_ggPMID" - gg_file=session['path']+"_ggSent" # Gene_gene - result_file=session['path']+"_ggResult" - - # pubmed_path needs to be defined, assuming it's a global or config - # For this example, let's assume it's a pre-configured path. - # If not, this os.system call will fail or use current dir. - # pubmed_path = "/path/to/local/pubmed/mirror" # Example, should be configured - pubmed_path = "./pubmed_data/" # Placeholder, ensure this path exists or is correctly set - os.makedirs(pubmed_path, exist_ok=True) # Ensure it exists if it's a local relative path - - def findWholeWord(w): # Helper function, should be defined if not in more_functions - return re.compile(r'(?<!\w)({})(?!\w)'.format(w), flags=re.IGNORECASE).search - - def generate(query): - from nltk.tokenize import sent_tokenize # Local import - progress=1 - yield "data:"+str(progress)+"\n\n" - # Ensure query is safe for shell command - safe_query = query.replace("\"", "\\\"") # Basic escaping - os.system(f"esearch -db pubmed -query \"{safe_query}\" | efetch -format uid |sort > \"{tmp_ggPMID}\"") - - # 'topGene_uniq.pmid' file needs to exist - # For robustness, check if it exists - top_gene_pmid_file = "topGene_uniq.pmid" - if not os.path.exists(top_gene_pmid_file): - print(f"Warning: {top_gene_pmid_file} not found. Gene-gene search might be affected.") - # Create an empty file to prevent comm command error, or handle differently - open(top_gene_pmid_file, 'a').close() - - abstracts_cmd = f"comm -1 -2 \"{top_gene_pmid_file}\" \"{tmp_ggPMID}\" | fetch-pubmed -path \"{pubmed_path}\" | xtract -pattern PubmedArticle -element MedlineCitation/PMID,ArticleTitle,AbstractText | sed \"s/-/ /g\"" - try: - abstracts_process = os.popen(abstracts_cmd) - abstracts = abstracts_process.read() - abstracts_process.close() - except Exception as e_abs: - print(f"Error getting abstracts for gene-gene search: {e_abs}") - abstracts = "" - - if os.path.exists(tmp_ggPMID): # Clean up temp file - os.system(f"rm \"{tmp_ggPMID}\"") - - progress=10 - yield "data:"+str(progress)+"\n\n" - topGenes=dict() - out_str=str() # Renamed from out - hitGenes=dict() - - # 'topGene_symb_alias.txt' file needs to exist - top_gene_alias_file = "topGene_symb_alias.txt" - if os.path.exists(top_gene_alias_file): - with open(top_gene_alias_file, "r") as top_f: - for line in top_f: - parts = line.strip().split("\t") - if len(parts) == 2: - symb, alias = parts - topGenes[symb]=alias.replace("; ","|") - else: - print(f"Warning: {top_gene_alias_file} not found. Top gene list will be empty.") - - allAbstracts= abstracts.split("\n") - abstractCnt=len(allAbstracts) if abstracts else 0 # Handle empty abstracts - rowCnt=0 - - for row in allAbstracts: - if not row.strip(): continue - rowCnt+=1 - if abstractCnt > 0 and rowCnt % 10 == 0 : # Check abstractCnt > 0 - progress=10+round(rowCnt/abstractCnt,2)*80 - yield "data:"+str(progress)+"\n\n" - - tiab_parts=row.split("\t", 1) # Split only on first tab - if len(tiab_parts) < 2: continue # Skip malformed lines - pmid = tiab_parts[0] - tiab_text_gg = tiab_parts[1] # Renamed - - sentences_gg = sent_tokenize(tiab_text_gg) # Renamed - ## keep the sentence only if it contains the gene - for sent_item in sentences_gg: # Renamed - if findWholeWord(query)(sent_item): - sent_item=re.sub(r'\b(%s)\b' % query, r'<strong>\1</strong>', sent_item, flags=re.I) - for symb_item in topGenes: # Renamed - allNames=symb_item+"|"+topGenes[symb_item] - if findWholeWord(allNames)(sent_item) : - sent_item=sent_item.replace("<b>","").replace("</b>","") # Clean previous bolds - sent_item=re.sub(r'\b(%s)\b' % allNames, r'<b>\1</b>', sent_item, flags=re.I) # Bold current match - out_str+=query+"\t"+"gene\t" + symb_item+"\t"+pmid+"\t"+sent_item+"\n" - if symb_item in hitGenes: # Check if key exists - hitGenes[symb_item]+=1 - else: - hitGenes[symb_item]=1 - progress=95 - yield "data:"+str(progress)+"\n\n" - with open(gg_file, "w+") as gg: - gg.write(out_str) - # gg.close() # Not needed with 'with open' - - results_html="<h4>"+query+" vs top addiction genes</h4> Click on the number of sentences will show those sentences. Click on the <span style=\"background-color:#FcF3cf\">top addiction genes</span> will show an archived search for that gene.<hr>" # Renamed - topGeneHits={} - for key_gene in hitGenes.keys(): # Renamed - url_gg=gg_file+"|"+query+"|"+key_gene # Renamed - sentword="sentence" if hitGenes[key_gene]==1 else "sentences" - topGeneHits[ "<li> <a href=/sentences?edgeID=" + url_gg+ " target=_new>" + "Show " + str(hitGenes[key_gene]) + " " + sentword +" </a> about "+query+" and <a href=/showTopGene?topGene="+key_gene+" target=_gene><span style=\"background-color:#FcF3cf\">"+key_gene+"</span></a>" ]=hitGenes[key_gene] - - topSorted = sorted(topGeneHits.items(), key=lambda item: item[1], reverse=True) # Correct way to sort dict by value - - for k_html,v_count in topSorted: # Renamed - results_html+=k_html - - with open(result_file, "w+") as saveResult: # Ensure it's opened in write mode - saveResult.write(results_html) - # saveResult.close() # Not needed - - progress=100 - yield "data:"+str(progress)+"\n\n" - - # Start the run - query_gene_gene=session.get('forTopGene', '') # Get from session, default to empty - if not query_gene_gene: - return Response("Error: No gene query found for gene-gene search.", mimetype='text/event-stream') - return Response(generate(query_gene_gene), mimetype='text/event-stream') - - -@app.route('/showGeneTopGene') -def showGeneTopGene (): - results_content = "<p>No results found.</p>" # Default content - result_file_path = session.get('path', '') + "_ggResult" # Get path from session - if result_file_path and os.path.exists(result_file_path): - with open(result_file_path, "r") as result_f: - results_content=result_f.read() - else: - print(f"Warning: Result file {result_file_path} not found for showGeneTopGene.") - return render_template('sentences.html', sentences=results_content+"<p><br>",no_footer=True,version=version()) - - # Generate a page that lists all the top 150 addiction genes with links to cytoscape graph. @app.route("/allTopGenes") def top150genes(): |
