2 files changed, 141 insertions, 161 deletions
diff --git a/old/server_gene_gene.py b/old/server_gene_gene.py
new file mode 100644
index 0000000..a125699
--- /dev/null
+++ b/old/server_gene_gene.py
@@ -0,0 +1,141 @@
+# Dead code removed from server.py -- gene-gene search routes
+# These routes were not referenced from any template.
+
+@app.route("/startGeneGene")
+def startGeneGene():
+    session['forTopGene']=request.args.get('forTopGene')
+    return render_template('progress.html', url_in="searchGeneGene", url_out="showGeneTopGene",version=version())
+
+
+@app.route("/searchGeneGene")
+def gene_gene():
+    if 'path' not in session:
+        if 'email' not in session :
+             tf_path_gg=tempfile.gettempdir()
+             rnd_gg = "tmp_gg" + ''.join(random.choice(string.ascii_letters) for x in range(6))
+             session['path'] = tf_path_gg + "/" + rnd_gg
+             os.makedirs(session['path'], exist_ok=True)
+        else:
+            if 'path_user' in session:
+                session['path'] = session['path_user']
+            else:
+                 return "Error: User session path not found.", 500
+
+
+    tmp_ggPMID=session['path']+"_ggPMID"
+    gg_file=session['path']+"_ggSent"
+    result_file=session['path']+"_ggResult"
+
+    def findWholeWord(w):
+        return re.compile(r'(?<!\w)({})(?!\w)'.format(w), flags=re.IGNORECASE).search
+
+    def generate(query):
+        from nltk.tokenize import sent_tokenize
+        progress=1
+        yield "data:"+str(progress)+"\n\n"
+        safe_query = query.replace("\"", "\\\"")
+        os.system(f"esearch -db pubmed -query \"{safe_query}\" | efetch -format uid |sort > \"{tmp_ggPMID}\"")
+
+        top_gene_pmid_file = "topGene_uniq.pmid"
+        if not os.path.exists(top_gene_pmid_file):
+            print(f"Warning: {top_gene_pmid_file} not found. Gene-gene search might be affected.")
+            open(top_gene_pmid_file, 'a').close()
+
+        abstracts_cmd = f"comm -1 -2 \"{top_gene_pmid_file}\" \"{tmp_ggPMID}\" | fetch-pubmed -path \"{pubmed_path}\" | xtract -pattern PubmedArticle -element MedlineCitation/PMID,ArticleTitle,AbstractText | sed \"s/-/ /g\""
+        try:
+            abstracts_process = os.popen(abstracts_cmd)
+            abstracts = abstracts_process.read()
+            abstracts_process.close()
+        except Exception as e_abs:
+            print(f"Error getting abstracts for gene-gene search: {e_abs}")
+            abstracts = ""
+
+        if os.path.exists(tmp_ggPMID):
+            os.system(f"rm \"{tmp_ggPMID}\"")
+
+        progress=10
+        yield "data:"+str(progress)+"\n\n"
+        topGenes=dict()
+        out_str=str()
+        hitGenes=dict()
+
+        top_gene_alias_file = "topGene_symb_alias.txt"
+        if os.path.exists(top_gene_alias_file):
+            with open(top_gene_alias_file, "r") as top_f:
+                for line in top_f:
+                    parts = line.strip().split("\t")
+                    if len(parts) == 2:
+                        symb, alias = parts
+                        topGenes[symb]=alias.replace("; ","|")
+        else:
+            print(f"Warning: {top_gene_alias_file} not found. Top gene list will be empty.")
+
+        allAbstracts= abstracts.split("\n")
+        abstractCnt=len(allAbstracts) if abstracts else 0
+        rowCnt=0
+
+        for row in allAbstracts:
+            if not row.strip(): continue
+            rowCnt+=1
+            if abstractCnt > 0 and rowCnt % 10 == 0 :
+                progress=10+round(rowCnt/abstractCnt,2)*80
+                yield "data:"+str(progress)+"\n\n"
+
+            tiab_parts=row.split("\t", 1)
+            if len(tiab_parts) < 2: continue
+            pmid = tiab_parts[0]
+            tiab_text_gg = tiab_parts[1]
+
+            sentences_gg = sent_tokenize(tiab_text_gg)
+            for sent_item in sentences_gg:
+                if findWholeWord(query)(sent_item):
+                    sent_item=re.sub(r'\b(%s)\b' % query, r'<strong>\1</strong>', sent_item, flags=re.I)
+                    for symb_item in topGenes:
+                        allNames=symb_item+"|"+topGenes[symb_item]
+                        if findWholeWord(allNames)(sent_item) :
+                            sent_item=sent_item.replace("<b>","").replace("</b>","")
+                            sent_item=re.sub(r'\b(%s)\b' % allNames, r'<b>\1</b>', sent_item, flags=re.I)
+                            out_str+=query+"\t"+"gene\t" + symb_item+"\t"+pmid+"\t"+sent_item+"\n"
+                            if symb_item in hitGenes:
+                                hitGenes[symb_item]+=1
+                            else:
+                                hitGenes[symb_item]=1
+        progress=95
+        yield "data:"+str(progress)+"\n\n"
+        with open(gg_file, "w+") as gg:
+            gg.write(out_str)
+
+        results_html="<h4>"+query+" vs top addiction genes</h4> Click on the number of sentences will show those sentences. Click on the <span style=\"background-color:#FcF3cf\">top addiction genes</span> will show an archived search for that gene.<hr>"
+        topGeneHits={}
+        for key_gene in hitGenes.keys():
+            url_gg=gg_file+"|"+query+"|"+key_gene
+            sentword="sentence" if hitGenes[key_gene]==1 else "sentences"
+            topGeneHits[ "<li> <a href=/sentences?edgeID=" + url_gg+ " target=_new>" + "Show " + str(hitGenes[key_gene]) + " " + sentword +" </a> about "+query+" and <a href=/showTopGene?topGene="+key_gene+" target=_gene><span style=\"background-color:#FcF3cf\">"+key_gene+"</span></a>" ]=hitGenes[key_gene]
+
+        topSorted = sorted(topGeneHits.items(), key=lambda item: item[1], reverse=True)
+
+        for k_html,v_count in topSorted:
+            results_html+=k_html
+
+        with open(result_file, "w+") as saveResult:
+            saveResult.write(results_html)
+
+        progress=100
+        yield "data:"+str(progress)+"\n\n"
+
+    query_gene_gene=session.get('forTopGene', '')
+    if not query_gene_gene:
+        return Response("Error: No gene query found for gene-gene search.", mimetype='text/event-stream')
+    return Response(generate(query_gene_gene), mimetype='text/event-stream')
+
+
+@app.route('/showGeneTopGene')
+def showGeneTopGene ():
+    results_content = "<p>No results found.</p>"
+    result_file_path = session.get('path', '') + "_ggResult"
+    if result_file_path and os.path.exists(result_file_path):
+        with open(result_file_path, "r") as result_f:
+            results_content=result_f.read()
+    else:
+        print(f"Warning: Result file {result_file_path} not found for showGeneTopGene.")
+    return render_template('sentences.html', sentences=results_content+"<p><br>",no_footer=True,version=version())
diff --git a/server.py b/server.py
index b1a7404..4ea4891 100755
--- a/server.py
+++ b/server.py
@@ -1900,167 +1900,6 @@ def synonyms():
         return f"An error occurred while processing your request for {node}.", 500
 
 
-@app.route("/startGeneGene")
-def startGeneGene():
-    session['forTopGene']=request.args.get('forTopGene')
-    return render_template('progress.html', url_in="searchGeneGene", url_out="showGeneTopGene",version=version())
-
-
-@app.route("/searchGeneGene")
-def gene_gene():
-    # Ensure session['path'] is set (e.g. from /progress by non-logged-in user)
-    if 'path' not in session:
-        # Handle error: session path not set, perhaps redirect or show error
-        # For now, let's assume it's set by a previous step like /progress
-        # If it can be called directly, this needs robust handling.
-        # Quick fix: if not set, create a temporary one, but this might indicate flow issue
-        if 'email' not in session : # Only create temp path if not logged in and path is missing
-             tf_path_gg=tempfile.gettempdir()
-             rnd_gg = "tmp_gg" + ''.join(random.choice(string.ascii_letters) for x in range(6))
-             session['path'] = tf_path_gg + "/" + rnd_gg
-             os.makedirs(session['path'], exist_ok=True)
-        else: # Logged in user should have path_user from /progress
-            if 'path_user' in session:
-                session['path'] = session['path_user'] # Unify to use session['path']
-            else: # Critical error if logged in and no path_user
-                 return "Error: User session path not found.", 500
-
-
-    tmp_ggPMID=session['path']+"_ggPMID"
-    gg_file=session['path']+"_ggSent" # Gene_gene
-    result_file=session['path']+"_ggResult"
-
-    # pubmed_path needs to be defined, assuming it's a global or config
-    # For this example, let's assume it's a pre-configured path.
-    # If not, this os.system call will fail or use current dir.
-    # pubmed_path = "/path/to/local/pubmed/mirror" # Example, should be configured
-    pubmed_path = "./pubmed_data/" # Placeholder, ensure this path exists or is correctly set
-    os.makedirs(pubmed_path, exist_ok=True) # Ensure it exists if it's a local relative path
-
-    def findWholeWord(w): # Helper function, should be defined if not in more_functions
-        return re.compile(r'(?<!\w)({})(?!\w)'.format(w), flags=re.IGNORECASE).search
-
-    def generate(query):
-        from nltk.tokenize import sent_tokenize # Local import
-        progress=1
-        yield "data:"+str(progress)+"\n\n"
-        # Ensure query is safe for shell command
-        safe_query = query.replace("\"", "\\\"") # Basic escaping
-        os.system(f"esearch -db pubmed -query \"{safe_query}\" | efetch -format uid |sort > \"{tmp_ggPMID}\"")
-
-        # 'topGene_uniq.pmid' file needs to exist
-        # For robustness, check if it exists
-        top_gene_pmid_file = "topGene_uniq.pmid"
-        if not os.path.exists(top_gene_pmid_file):
-            print(f"Warning: {top_gene_pmid_file} not found. Gene-gene search might be affected.")
-            # Create an empty file to prevent comm command error, or handle differently
-            open(top_gene_pmid_file, 'a').close()
-
-        abstracts_cmd = f"comm -1 -2 \"{top_gene_pmid_file}\" \"{tmp_ggPMID}\" | fetch-pubmed -path \"{pubmed_path}\" | xtract -pattern PubmedArticle -element MedlineCitation/PMID,ArticleTitle,AbstractText | sed \"s/-/ /g\""
-        try:
-            abstracts_process = os.popen(abstracts_cmd)
-            abstracts = abstracts_process.read()
-            abstracts_process.close()
-        except Exception as e_abs:
-            print(f"Error getting abstracts for gene-gene search: {e_abs}")
-            abstracts = ""
-
-        if os.path.exists(tmp_ggPMID): # Clean up temp file
-            os.system(f"rm \"{tmp_ggPMID}\"")
-
-        progress=10
-        yield "data:"+str(progress)+"\n\n"
-        topGenes=dict()
-        out_str=str() # Renamed from out
-        hitGenes=dict()
-
-        # 'topGene_symb_alias.txt' file needs to exist
-        top_gene_alias_file = "topGene_symb_alias.txt"
-        if os.path.exists(top_gene_alias_file):
-            with open(top_gene_alias_file, "r") as top_f:
-                for line in top_f:
-                    parts = line.strip().split("\t")
-                    if len(parts) == 2:
-                        symb, alias = parts
-                        topGenes[symb]=alias.replace("; ","|")
-        else:
-            print(f"Warning: {top_gene_alias_file} not found. Top gene list will be empty.")
-
-        allAbstracts= abstracts.split("\n")
-        abstractCnt=len(allAbstracts) if abstracts else 0 # Handle empty abstracts
-        rowCnt=0
-
-        for row in allAbstracts:
-            if not row.strip(): continue
-            rowCnt+=1
-            if abstractCnt > 0 and rowCnt % 10 == 0 : # Check abstractCnt > 0
-                progress=10+round(rowCnt/abstractCnt,2)*80
-                yield "data:"+str(progress)+"\n\n"
-
-            tiab_parts=row.split("\t", 1) # Split only on first tab
-            if len(tiab_parts) < 2: continue # Skip malformed lines
-            pmid = tiab_parts[0]
-            tiab_text_gg = tiab_parts[1] # Renamed
-
-            sentences_gg = sent_tokenize(tiab_text_gg) # Renamed
-            ## keep the sentence only if it contains the gene
-            for sent_item in sentences_gg: # Renamed
-                if findWholeWord(query)(sent_item):
-                    sent_item=re.sub(r'\b(%s)\b' % query, r'<strong>\1</strong>', sent_item, flags=re.I)
-                    for symb_item in topGenes: # Renamed
-                        allNames=symb_item+"|"+topGenes[symb_item]
-                        if findWholeWord(allNames)(sent_item) :
-                            sent_item=sent_item.replace("<b>","").replace("</b>","") # Clean previous bolds
-                            sent_item=re.sub(r'\b(%s)\b' % allNames, r'<b>\1</b>', sent_item, flags=re.I) # Bold current match
-                            out_str+=query+"\t"+"gene\t" + symb_item+"\t"+pmid+"\t"+sent_item+"\n"
-                            if symb_item in hitGenes: # Check if key exists
-                                hitGenes[symb_item]+=1
-                            else:
-                                hitGenes[symb_item]=1
-        progress=95
-        yield "data:"+str(progress)+"\n\n"
-        with open(gg_file, "w+") as gg:
-            gg.write(out_str)
-            # gg.close() # Not needed with 'with open'
-
-        results_html="<h4>"+query+" vs top addiction genes</h4> Click on the number of sentences will show those sentences. Click on the <span style=\"background-color:#FcF3cf\">top addiction genes</span> will show an archived search for that gene.<hr>" # Renamed
-        topGeneHits={}
-        for key_gene in hitGenes.keys(): # Renamed
-            url_gg=gg_file+"|"+query+"|"+key_gene # Renamed
-            sentword="sentence" if hitGenes[key_gene]==1 else "sentences"
-            topGeneHits[ "<li> <a href=/sentences?edgeID=" + url_gg+ " target=_new>" + "Show " + str(hitGenes[key_gene]) + " " + sentword +" </a> about "+query+" and <a href=/showTopGene?topGene="+key_gene+" target=_gene><span style=\"background-color:#FcF3cf\">"+key_gene+"</span></a>" ]=hitGenes[key_gene]
-
-        topSorted = sorted(topGeneHits.items(), key=lambda item: item[1], reverse=True) # Correct way to sort dict by value
-
-        for k_html,v_count in topSorted: # Renamed
-            results_html+=k_html
-
-        with open(result_file, "w+") as saveResult: # Ensure it's opened in write mode
-            saveResult.write(results_html)
-            # saveResult.close() # Not needed
-
-        progress=100
-        yield "data:"+str(progress)+"\n\n"
-
-    # Start the run
-    query_gene_gene=session.get('forTopGene', '') # Get from session, default to empty
-    if not query_gene_gene:
-        return Response("Error: No gene query found for gene-gene search.", mimetype='text/event-stream')
-    return Response(generate(query_gene_gene), mimetype='text/event-stream')
-
-
-@app.route('/showGeneTopGene')
-def showGeneTopGene ():
-    results_content = "<p>No results found.</p>" # Default content
-    result_file_path = session.get('path', '') + "_ggResult" # Get path from session
-    if result_file_path and os.path.exists(result_file_path):
-        with open(result_file_path, "r") as result_f:
-            results_content=result_f.read()
-    else:
-        print(f"Warning: Result file {result_file_path} not found for showGeneTopGene.")
-    return render_template('sentences.html', sentences=results_content+"<p><br>",no_footer=True,version=version())
-
-
 # Generate a page that lists all the top 150 addiction genes with links to cytoscape graph.
 @app.route("/allTopGenes")
 def top150genes():