Feature/gn llms (#140)

* add entry route for gn_llms * add gn_llms reference doc ids * init authorization module for gn-llm * Add class for parsing unprocessable response * add init config file * add clienmodule:gn-llm fahamu interface * Add module descriptor for client file * reponse data handler * add response file handler * add processing file * remove unnecessary files * init code refactoring * Restructure code to module * refactor code:disble pylint for testing on cd
author: Alexander Kabui 2023-12-22 23:50:25 +0300
committer: GitHub 2023-12-22 23:50:25 +0300
commit: fb49b824bf7a90ecadf021c01c65880e78e74298 (patch)
tree: dd1058fdda3b6257c7e33e418725648dde2c3381 /gn3/llms/process.py
parent: 093163ebd511700078f998fa754e786435a81d1b (diff)
download: genenetwork3-fb49b824bf7a90ecadf021c01c65880e78e74298.tar.gz
1 files changed, 111 insertions, 0 deletions
diff --git a/gn3/llms/process.py b/gn3/llms/process.py
new file mode 100644
index 0000000..4220d7a
--- /dev/null
+++ b/gn3/llms/process.py
@@ -0,0 +1,111 @@
+
+# pylint: skip-file
+
+import requests
+import sys
+import time
+import string
+import json
+import os
+from gn3.llms.client import GeneNetworkQAClient
+from gn3.llms.response import DocIDs
+
+
+baseUrl           = 'https://genenetwork.fahamuai.com/api/tasks'
+answerUrl         = baseUrl + '/answers'
+basedir           = os.path.abspath(os.path.dirname(__file__))
+apiClient         = GeneNetworkQAClient(requests.Session(), api_key='')
+
+
+
+
+
+
+
+
+
+def formatBibliographyInfo(bibInfo):
+    if isinstance(bibInfo, str):
+        # remove '.txt'
+        bibInfo = bibInfo.removesuffix('.txt')
+    elif isinstance(bibInfo, dict):
+        # format string bibliography information
+        bibInfo = "{0}. ".format(bibInfo['author'], bibInfo['title'], bibInfo['year'], bibInfo['doi'])
+    return bibInfo
+
+
+def askTheDocuments( extendUrl, my_auth ):
+    try:
+        res     = requests.post(baseUrl+extendUrl,
+                            data={},
+                            headers=my_auth)
+        res.raise_for_status()
+    except:
+        raise # what
+    if (res.status_code != 200):
+        return negativeStatusMsg(res), 0
+    task_id     = getTaskIDFromResult(res)
+    res         = getAnswerUsingTaskID(task_id, my_auth)
+    if (res.status_code != 200):
+        return negativeStatusMsg(res), 0
+    return res, 1
+
+def getAnswerUsingTaskID( extendUrl, my_auth ):
+    try:
+        res = requests.get(answerUrl+extendUrl, data={}, headers=my_auth)
+        res.raise_for_status()
+    except:
+        raise
+    return res
+
+def openAPIConfig():
+    f = open(os.path.join(basedir, "api.config.json") , "rb" )
+    result = json.load(f)
+    f.close()
+    return result
+
+
+def getTaskIDFromResult(res):
+    task_id = json.loads(res.text)
+    result  = '?task_id=' + str(task_id['task_id'])
+    return result
+
+def negativeStatusMsg(res):
+    return 'Problems\n\tStatus code => {0}\n\tReason=> {res.reason}'.format(res.status_code, res.reason)
+
+def filterResponseText(val):
+    return json.loads(''.join([str(char) for char in val if char in string.printable]))
+
+def getGNQA(query):
+    res, task_id = apiClient.ask('?ask=' + query)
+    res, success = apiClient.get_answer(task_id)
+
+    if ( success == 1 ):
+        respText       = filterResponseText(res.text)
+        if respText.get("data") is None:
+            return  "Unfortunately I have nothing on the query",[]
+        answer         = respText['data']['answer']
+        context        = respText['data']['context']
+        references = parse_context(context)
+        return answer,references
+    else:
+        return res, "Unfortunately I have nothing."
+
+
+
+def parse_context(context):
+    """parse content map id to reference"""
+    result = []
+    for doc_ids,summary in context.items():
+        comboTxt = ""
+        for entry  in summary:
+            comboTxt += '\t' + entry['text']
+
+        docInfo = DocIDs().getInfo(doc_ids)
+        if doc_ids !=docInfo:
+            bibInfo = formatBibliographyInfo(docInfo)
+
+        else:
+            bibInfo = doc_ids
+        result.append({"doc_id":doc_ids,"bibInfo":bibInfo,"comboTxt":comboTxt})
+    return result
+\ No newline at end of file
author	Alexander Kabui	2023-12-22 23:50:25 +0300
committer	GitHub	2023-12-22 23:50:25 +0300
commit	fb49b824bf7a90ecadf021c01c65880e78e74298 (patch)
tree	dd1058fdda3b6257c7e33e418725648dde2c3381 /gn3/llms/process.py
parent	093163ebd511700078f998fa754e786435a81d1b (diff)
download	genenetwork3-fb49b824bf7a90ecadf021c01c65880e78e74298.tar.gz