aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHao Chen2020-04-10 10:23:37 -0500
committerHao Chen2020-04-10 10:23:37 -0500
commitd86b2a97aa02e3b68e1a25f565554f9239f384b1 (patch)
tree297185ed78704cf28ebe42ec6d12847cb15136b3
parentabc62d1a24357818c88c91089f22611a93e28a01 (diff)
downloadgenecup-d86b2a97aa02e3b68e1a25f565554f9239f384b1.tar.gz
maybe more efficient
-rw-r--r--nlp.py33
-rwxr-xr-xserver.py37
2 files changed, 38 insertions, 32 deletions
diff --git a/nlp.py b/nlp.py
index e54713e..9f36b58 100644
--- a/nlp.py
+++ b/nlp.py
@@ -48,18 +48,23 @@ with open('./nlp/vocabulary.txt', 'r') as vocab:
vocab = vocab.read()
# create the CNN model
-def create_model(vocab_size, max_length):
- model = Sequential()
- model.add(Embedding(vocab_size, 32, input_length=max_length))
- model.add(Conv1D(filters=16, kernel_size=4, activation='relu'))
- model.add(MaxPooling1D(pool_size=2))
- model.add(Flatten())
- model.add(Dense(10, activation='relu'))
- model.add(Dense(1, activation='sigmoid'))
- opt = keras.optimizers.Adamax(learning_rate=0.002, beta_1=0.9, beta_2=0.999)
+#def create_model(vocab_size, max_length):
+model = Sequential()
+model.add(Embedding(vocab_size, 32, input_length=max_length))
+model.add(Conv1D(filters=16, kernel_size=4, activation='relu'))
+model.add(MaxPooling1D(pool_size=2))
+model.add(Flatten())
+model.add(Dense(10, activation='relu'))
+model.add(Dense(1, activation='sigmoid'))
+opt = keras.optimizers.Adamax(learning_rate=0.002, beta_1=0.9, beta_2=0.999)
+model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[keras.metrics.AUC()])
+model = create_model(23154, 64)
+# load the weights
+## this is done for every prediction??
+checkpoint_path = "./nlp/weights.ckpt"
+model.load_weights(checkpoint_path)
- model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[keras.metrics.AUC()])
- return model
+#return model
def predict_sent(sent_for_pred):
max_length = 64
@@ -70,13 +75,9 @@ def predict_sent(sent_for_pred):
line = [line]
tokenized_sent = tokenizer.texts_to_sequences(line)
tokenized_sent = pad_sequences(tokenized_sent, maxlen=max_length, padding='post')
- model = create_model(23154, 64)
- # load the weights
- checkpoint_path = "./nlp/weights.ckpt"
- model.load_weights(checkpoint_path)
predict_sent = model.predict(tokenized_sent, verbose=0)
percent_sent = predict_sent[0,0]
if round(percent_sent) == 0:
return 'neg'
else:
- return 'pos' \ No newline at end of file
+ return 'pos'
diff --git a/server.py b/server.py
index f16d930..f36a4b3 100755
--- a/server.py
+++ b/server.py
@@ -541,11 +541,14 @@ def sentences():
edge=request.args.get('edgeID')
(tf_name, gene0, cat0)=edge.split("|")
out3=""
- out5_pl=""
- out5_sn=""
+# out5_pl=""
+# out5_sn=""
out_pos = ""
out_neg = ""
num_abstract = 0
+ stress_systemic = "<br><br><br><hr>"+"<b>Sentence(s) describing celluar stress (classified using a deep learning model):</b>"
+ stress_cellular = "<b>Sentence(s) describing systemic stress (classified using a deep learning model):</b>"
+ stress_sents={}
with open(tf_name, "r") as df:
all_sents=df.read()
for sent in all_sents.split("\n"):
@@ -555,29 +558,31 @@ def sentences():
out3+= "<li> "+ text + " <a href=\"https://www.ncbi.nlm.nih.gov/pubmed/?term=" + pmid +"\" target=_new>PMID:"+pmid+"<br></a>"
num_abstract += 1
if(pmid+cat0 not in pmid_list):
- pmid_list.append(pmid+cat0)
+ pmid_list.append(pmid+cat0)
if(cat0=='stress'):
- out5_pl = 'These are analyzed by deep learning to seperate the relevant sentences.'
- out5_sn = 'This is analyzed by deep learning to see whether it is relevant or not.'
+# out5_pl = 'These are analyzed by deep learning to seperate the relevant sentences.'
+# out5_sn = 'This is analyzed by deep learning to see whether it is relevant or not.'
out_pred = "<li> "+ text + " <a href=\"https://www.ncbi.nlm.nih.gov/pubmed/?term=" + pmid +"\" target=_new>PMID:"+pmid+"<br></a>"
+ #should we add the html part after the predict_sent function?
out4 = predict_sent(out_pred)
- if(out4 == 'pos'):
- out_rel = "<b>Relevant sentences:</b>"
- out_pos += out_pred
- else:
- out_irrel = "<br><br><br><hr>"+"<b>Irrelevant sentences:</b>"
- out_neg += out_pred
+ stress_sents[out4] +=stress_sents[out4]
+# stress_sents["pos"]+=stress_sents["pos"]
+# stress_sents["neg"]+=stress_sents["neg"]
+# if(out4 == 'pos'):
+# out_pos += out_pred
+# else:
+# out_neg += out_pred
out1="<h3>"+gene0 + " and " + cat0 + "</h3>\n"
if len(pmid_list)>1:
out2 = str(num_abstract) + ' sentences in ' + str(len(pmid_list)) + ' studies' + "<br>"
- if(out5_pl!=""):
- out2 += out5_pl
+# if(out5_pl!=""):
+# out2 += out5_pl
out2 += "<hr>\n"
else:
out2 = str(num_abstract) + ' sentence in ' + str(len(pmid_list)) + ' study' "<br>"
- if(out5_sn!=""):
- out2 += out5_sn
- out2 += "<hr>\n"
+# if(out5_sn!=""):
+# out2 += out5_sn
+ out2 += "<hr>\n"
if(out_neg == "" and out_pos == ""):
out= out1+ out2 +out3
elif(out_pos != "" and out_neg!=""):