diff options
author | Hao Chen | 2019-05-17 19:10:08 -0500 |
---|---|---|
committer | Hao Chen | 2019-05-17 19:10:08 -0500 |
commit | eb51746ce69fb43450b6896e4a71e5052df51115 (patch) | |
tree | d87e36cbf05611010fa19fd78b94b120095cf25a /topGene_step1_cnt_abstracts.py | |
parent | a6376a94e137eea0e0d326d6524fe9c2177b1b34 (diff) | |
download | genecup-eb51746ce69fb43450b6896e4a71e5052df51115.tar.gz |
find top genes
Diffstat (limited to 'topGene_step1_cnt_abstracts.py')
-rwxr-xr-x | topGene_step1_cnt_abstracts.py | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/topGene_step1_cnt_abstracts.py b/topGene_step1_cnt_abstracts.py index 780c314..a9dd23f 100755 --- a/topGene_step1_cnt_abstracts.py +++ b/topGene_step1_cnt_abstracts.py @@ -39,6 +39,7 @@ if len(sys.argv)==2: input_f=sys.argv[1] else: input_f="./ncbi_gene_symb_syno_name_txid9606.txt" + input_f="./ncbi_gene_symb_syno_name_txid9606_p2.txt" addiction=undic(addiction_d) drug=undic(drug_d) @@ -72,6 +73,8 @@ with open (input_f, "r") as f: # tab is added if there are abstracts counts if "\t" in line: (gene, count)=line.split("\t") + if int(count)<100: + rerun=1 else: gene=line.strip() # remove synonyms with only two letters @@ -93,3 +96,7 @@ with open (input_f, "r") as f: if (int(count)>0): out.write(gene+"\t"+count) +sorted_f=out_f.replace(".txt","_sorted.txt") +os.system("sort -k2 -t$'\t' -rn " + out_f + " > " + sorted_f ) + + |