From fb49b824bf7a90ecadf021c01c65880e78e74298 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Fri, 22 Dec 2023 23:50:25 +0300 Subject: Feature/gn llms (#140) * add entry route for gn_llms * add gn_llms reference doc ids * init authorization module for gn-llm * Add class for parsing unprocessable response * add init config file * add clienmodule:gn-llm fahamu interface * Add module descriptor for client file * reponse data handler * add response file handler * add processing file * remove unnecessary files * init code refactoring * Restructure code to module * refactor code:disble pylint for testing on cd--- gn3/llms/process.py | 111 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 gn3/llms/process.py (limited to 'gn3/llms/process.py') diff --git a/gn3/llms/process.py b/gn3/llms/process.py new file mode 100644 index 0000000..4220d7a --- /dev/null +++ b/gn3/llms/process.py @@ -0,0 +1,111 @@ + +# pylint: skip-file + +import requests +import sys +import time +import string +import json +import os +from gn3.llms.client import GeneNetworkQAClient +from gn3.llms.response import DocIDs + + +baseUrl = 'https://genenetwork.fahamuai.com/api/tasks' +answerUrl = baseUrl + '/answers' +basedir = os.path.abspath(os.path.dirname(__file__)) +apiClient = GeneNetworkQAClient(requests.Session(), api_key='') + + + + + + + + + +def formatBibliographyInfo(bibInfo): + if isinstance(bibInfo, str): + # remove '.txt' + bibInfo = bibInfo.removesuffix('.txt') + elif isinstance(bibInfo, dict): + # format string bibliography information + bibInfo = "{0}. ".format(bibInfo['author'], bibInfo['title'], bibInfo['year'], bibInfo['doi']) + return bibInfo + + +def askTheDocuments( extendUrl, my_auth ): + try: + res = requests.post(baseUrl+extendUrl, + data={}, + headers=my_auth) + res.raise_for_status() + except: + raise # what + if (res.status_code != 200): + return negativeStatusMsg(res), 0 + task_id = getTaskIDFromResult(res) + res = getAnswerUsingTaskID(task_id, my_auth) + if (res.status_code != 200): + return negativeStatusMsg(res), 0 + return res, 1 + +def getAnswerUsingTaskID( extendUrl, my_auth ): + try: + res = requests.get(answerUrl+extendUrl, data={}, headers=my_auth) + res.raise_for_status() + except: + raise + return res + +def openAPIConfig(): + f = open(os.path.join(basedir, "api.config.json") , "rb" ) + result = json.load(f) + f.close() + return result + + +def getTaskIDFromResult(res): + task_id = json.loads(res.text) + result = '?task_id=' + str(task_id['task_id']) + return result + +def negativeStatusMsg(res): + return 'Problems\n\tStatus code => {0}\n\tReason=> {res.reason}'.format(res.status_code, res.reason) + +def filterResponseText(val): + return json.loads(''.join([str(char) for char in val if char in string.printable])) + +def getGNQA(query): + res, task_id = apiClient.ask('?ask=' + query) + res, success = apiClient.get_answer(task_id) + + if ( success == 1 ): + respText = filterResponseText(res.text) + if respText.get("data") is None: + return "Unfortunately I have nothing on the query",[] + answer = respText['data']['answer'] + context = respText['data']['context'] + references = parse_context(context) + return answer,references + else: + return res, "Unfortunately I have nothing." + + + +def parse_context(context): + """parse content map id to reference""" + result = [] + for doc_ids,summary in context.items(): + comboTxt = "" + for entry in summary: + comboTxt += '\t' + entry['text'] + + docInfo = DocIDs().getInfo(doc_ids) + if doc_ids !=docInfo: + bibInfo = formatBibliographyInfo(docInfo) + + else: + bibInfo = doc_ids + result.append({"doc_id":doc_ids,"bibInfo":bibInfo,"comboTxt":comboTxt}) + return result \ No newline at end of file -- cgit v1.2.3