diff options
author | Hao Chen | 2020-04-10 10:23:37 -0500 |
---|---|---|
committer | Hao Chen | 2020-04-10 10:23:37 -0500 |
commit | d86b2a97aa02e3b68e1a25f565554f9239f384b1 (patch) | |
tree | 297185ed78704cf28ebe42ec6d12847cb15136b3 | |
parent | abc62d1a24357818c88c91089f22611a93e28a01 (diff) | |
download | genecup-d86b2a97aa02e3b68e1a25f565554f9239f384b1.tar.gz |
maybe more efficient
-rw-r--r-- | nlp.py | 33 | ||||
-rwxr-xr-x | server.py | 37 |
2 files changed, 38 insertions, 32 deletions
@@ -48,18 +48,23 @@ with open('./nlp/vocabulary.txt', 'r') as vocab: vocab = vocab.read() # create the CNN model -def create_model(vocab_size, max_length): - model = Sequential() - model.add(Embedding(vocab_size, 32, input_length=max_length)) - model.add(Conv1D(filters=16, kernel_size=4, activation='relu')) - model.add(MaxPooling1D(pool_size=2)) - model.add(Flatten()) - model.add(Dense(10, activation='relu')) - model.add(Dense(1, activation='sigmoid')) - opt = keras.optimizers.Adamax(learning_rate=0.002, beta_1=0.9, beta_2=0.999) +#def create_model(vocab_size, max_length): +model = Sequential() +model.add(Embedding(vocab_size, 32, input_length=max_length)) +model.add(Conv1D(filters=16, kernel_size=4, activation='relu')) +model.add(MaxPooling1D(pool_size=2)) +model.add(Flatten()) +model.add(Dense(10, activation='relu')) +model.add(Dense(1, activation='sigmoid')) +opt = keras.optimizers.Adamax(learning_rate=0.002, beta_1=0.9, beta_2=0.999) +model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[keras.metrics.AUC()]) +model = create_model(23154, 64) +# load the weights +## this is done for every prediction?? +checkpoint_path = "./nlp/weights.ckpt" +model.load_weights(checkpoint_path) - model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[keras.metrics.AUC()]) - return model +#return model def predict_sent(sent_for_pred): max_length = 64 @@ -70,13 +75,9 @@ def predict_sent(sent_for_pred): line = [line] tokenized_sent = tokenizer.texts_to_sequences(line) tokenized_sent = pad_sequences(tokenized_sent, maxlen=max_length, padding='post') - model = create_model(23154, 64) - # load the weights - checkpoint_path = "./nlp/weights.ckpt" - model.load_weights(checkpoint_path) predict_sent = model.predict(tokenized_sent, verbose=0) percent_sent = predict_sent[0,0] if round(percent_sent) == 0: return 'neg' else: - return 'pos'
\ No newline at end of file + return 'pos' @@ -541,11 +541,14 @@ def sentences(): edge=request.args.get('edgeID') (tf_name, gene0, cat0)=edge.split("|") out3="" - out5_pl="" - out5_sn="" +# out5_pl="" +# out5_sn="" out_pos = "" out_neg = "" num_abstract = 0 + stress_systemic = "<br><br><br><hr>"+"<b>Sentence(s) describing celluar stress (classified using a deep learning model):</b>" + stress_cellular = "<b>Sentence(s) describing systemic stress (classified using a deep learning model):</b>" + stress_sents={} with open(tf_name, "r") as df: all_sents=df.read() for sent in all_sents.split("\n"): @@ -555,29 +558,31 @@ def sentences(): out3+= "<li> "+ text + " <a href=\"https://www.ncbi.nlm.nih.gov/pubmed/?term=" + pmid +"\" target=_new>PMID:"+pmid+"<br></a>" num_abstract += 1 if(pmid+cat0 not in pmid_list): - pmid_list.append(pmid+cat0) + pmid_list.append(pmid+cat0) if(cat0=='stress'): - out5_pl = 'These are analyzed by deep learning to seperate the relevant sentences.' - out5_sn = 'This is analyzed by deep learning to see whether it is relevant or not.' +# out5_pl = 'These are analyzed by deep learning to seperate the relevant sentences.' +# out5_sn = 'This is analyzed by deep learning to see whether it is relevant or not.' out_pred = "<li> "+ text + " <a href=\"https://www.ncbi.nlm.nih.gov/pubmed/?term=" + pmid +"\" target=_new>PMID:"+pmid+"<br></a>" + #should we add the html part after the predict_sent function? out4 = predict_sent(out_pred) - if(out4 == 'pos'): - out_rel = "<b>Relevant sentences:</b>" - out_pos += out_pred - else: - out_irrel = "<br><br><br><hr>"+"<b>Irrelevant sentences:</b>" - out_neg += out_pred + stress_sents[out4] +=stress_sents[out4] +# stress_sents["pos"]+=stress_sents["pos"] +# stress_sents["neg"]+=stress_sents["neg"] +# if(out4 == 'pos'): +# out_pos += out_pred +# else: +# out_neg += out_pred out1="<h3>"+gene0 + " and " + cat0 + "</h3>\n" if len(pmid_list)>1: out2 = str(num_abstract) + ' sentences in ' + str(len(pmid_list)) + ' studies' + "<br>" - if(out5_pl!=""): - out2 += out5_pl +# if(out5_pl!=""): +# out2 += out5_pl out2 += "<hr>\n" else: out2 = str(num_abstract) + ' sentence in ' + str(len(pmid_list)) + ' study' "<br>" - if(out5_sn!=""): - out2 += out5_sn - out2 += "<hr>\n" +# if(out5_sn!=""): +# out2 += out5_sn + out2 += "<hr>\n" if(out_neg == "" and out_pos == ""): out= out1+ out2 +out3 elif(out_pos != "" and out_neg!=""): |