aboutsummaryrefslogtreecommitdiff
path: root/topGene_step1_cnt_abstracts.py
diff options
context:
space:
mode:
authorHao Chen2019-05-17 19:10:08 -0500
committerHao Chen2019-05-17 19:10:08 -0500
commiteb51746ce69fb43450b6896e4a71e5052df51115 (patch)
treed87e36cbf05611010fa19fd78b94b120095cf25a /topGene_step1_cnt_abstracts.py
parenta6376a94e137eea0e0d326d6524fe9c2177b1b34 (diff)
downloadgenecup-eb51746ce69fb43450b6896e4a71e5052df51115.tar.gz
find top genes
Diffstat (limited to 'topGene_step1_cnt_abstracts.py')
-rwxr-xr-xtopGene_step1_cnt_abstracts.py7
1 files changed, 7 insertions, 0 deletions
diff --git a/topGene_step1_cnt_abstracts.py b/topGene_step1_cnt_abstracts.py
index 780c314..a9dd23f 100755
--- a/topGene_step1_cnt_abstracts.py
+++ b/topGene_step1_cnt_abstracts.py
@@ -39,6 +39,7 @@ if len(sys.argv)==2:
input_f=sys.argv[1]
else:
input_f="./ncbi_gene_symb_syno_name_txid9606.txt"
+ input_f="./ncbi_gene_symb_syno_name_txid9606_p2.txt"
addiction=undic(addiction_d)
drug=undic(drug_d)
@@ -72,6 +73,8 @@ with open (input_f, "r") as f:
# tab is added if there are abstracts counts
if "\t" in line:
(gene, count)=line.split("\t")
+ if int(count)<100:
+ rerun=1
else:
gene=line.strip()
# remove synonyms with only two letters
@@ -93,3 +96,7 @@ with open (input_f, "r") as f:
if (int(count)>0):
out.write(gene+"\t"+count)
+sorted_f=out_f.replace(".txt","_sorted.txt")
+os.system("sort -k2 -t$'\t' -rn " + out_f + " > " + sorted_f )
+
+