diff options
-rw-r--r-- | gn3/api/llm.py | 7 | ||||
-rw-r--r-- | gn3/llms/client.py | 17 | ||||
-rw-r--r-- | gn3/llms/process.py | 70 | ||||
-rw-r--r-- | gn3/settings.py | 8 |
4 files changed, 54 insertions, 48 deletions
diff --git a/gn3/api/llm.py b/gn3/api/llm.py index 83b5e85..68e6acc 100644 --- a/gn3/api/llm.py +++ b/gn3/api/llm.py @@ -2,7 +2,7 @@ # pylint: skip-file -from flask import jsonify, request, Blueprint +from flask import jsonify, request, Blueprint, current_app from gn3.llms.process import getGNQA @@ -16,7 +16,10 @@ def gnqa(): return jsonify({"error": "querygnqa is missing in the request"}), 400 try: - answer, refs = getGNQA(query) + auth_token = current_app.config.get("FAHAMU_AUTH_TOKEN") + answer, refs = getGNQA( + query, auth_token) + return jsonify({ "query": query, "answer": answer, diff --git a/gn3/llms/client.py b/gn3/llms/client.py index 6bcbf31..1c3acce 100644 --- a/gn3/llms/client.py +++ b/gn3/llms/client.py @@ -5,7 +5,7 @@ import os import datetime import time import requests -from flask import current_app + from requests import Session from urllib.parse import urljoin from requests.packages.urllib3.util.retry import Retry @@ -59,7 +59,8 @@ class GeneNetworkQAClient(Session): def __init__(self, account, api_key, version="v3", timeout=5, total_retries=5, backoff_factor=30): super().__init__() - self.headers.update({"Authorization": "Bearer " + current_app.config.get("FAHAMU_AUTH_TOKEN","")}) + self.headers.update( + {"Authorization": "Bearer " + api_key}) self.answer_url = f"{self.BASE_URL}/answers" self.feedback_url = f"{self.BASE_URL}/feedback" @@ -75,8 +76,6 @@ class GeneNetworkQAClient(Session): self.mount("https://", adapter) self.mount("http://", adapter) - - @staticmethod def format_bibliography_info(bib_info): @@ -86,9 +85,9 @@ class GeneNetworkQAClient(Session): elif isinstance(bib_info, dict): # Format string bibliography information bib_info = "{0}.{1}.{2}.{3} ".format(bib_info.get('author', ''), - bib_info.get('title', ''), - bib_info.get('year', ''), - bib_info.get('doi', '')) + bib_info.get('title', ''), + bib_info.get('year', ''), + bib_info.get('doi', '')) return bib_info @staticmethod @@ -133,7 +132,7 @@ class GeneNetworkQAClient(Session): return res, 1 def custom_request(self, method, url, *args, **kwargs): - max_retries = 3 + max_retries = 5 retry_delay = 10 response = super().request(method, url, *args, **kwargs) @@ -151,7 +150,7 @@ class GeneNetworkQAClient(Session): raise exc if response.ok: # Give time to get all the data - time.sleep(retry_delay*3) + time.sleep(retry_delay*1.5) return response else: time.sleep(retry_delay) diff --git a/gn3/llms/process.py b/gn3/llms/process.py index b8e7ded..58d565a 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -11,17 +11,9 @@ from gn3.llms.client import GeneNetworkQAClient from gn3.llms.response import DocIDs -baseUrl = 'https://genenetwork.fahamuai.com/api/tasks' -answerUrl = baseUrl + '/answers' -basedir = os.path.abspath(os.path.dirname(__file__)) -apiClient = GeneNetworkQAClient(requests.Session(), api_key='') - - - - - - - +baseUrl = 'https://genenetwork.fahamuai.com/api/tasks' +answerUrl = baseUrl + '/answers' +basedir = os.path.abspath(os.path.dirname(__file__)) def formatBibliographyInfo(bibInfo): @@ -30,27 +22,29 @@ def formatBibliographyInfo(bibInfo): bibInfo = bibInfo.removesuffix('.txt') elif isinstance(bibInfo, dict): # format string bibliography information - bibInfo = "{0}.{1}.{2}.{3} ".format(bibInfo['author'], bibInfo['title'], bibInfo['year'], bibInfo['doi']) + bibInfo = "{0}.{1}.{2}.{3} ".format( + bibInfo['author'], bibInfo['title'], bibInfo['year'], bibInfo['doi']) return bibInfo -def askTheDocuments( extendUrl, my_auth ): +def askTheDocuments(extendUrl, my_auth): try: - res = requests.post(baseUrl+extendUrl, + res = requests.post(baseUrl+extendUrl, data={}, headers=my_auth) res.raise_for_status() except: - raise # what + raise # what if (res.status_code != 200): return negativeStatusMsg(res), 0 - task_id = getTaskIDFromResult(res) - res = getAnswerUsingTaskID(task_id, my_auth) + task_id = getTaskIDFromResult(res) + res = getAnswerUsingTaskID(task_id, my_auth) if (res.status_code != 200): return negativeStatusMsg(res), 0 return res, 1 -def getAnswerUsingTaskID( extendUrl, my_auth ): + +def getAnswerUsingTaskID(extendUrl, my_auth): try: res = requests.get(answerUrl+extendUrl, data={}, headers=my_auth) res.raise_for_status() @@ -58,8 +52,9 @@ def getAnswerUsingTaskID( extendUrl, my_auth ): raise return res + def openAPIConfig(): - f = open(os.path.join(basedir, "api.config.json") , "rb" ) + f = open(os.path.join(basedir, "api.config.json"), "rb") result = json.load(f) f.close() return result @@ -67,45 +62,50 @@ def openAPIConfig(): def getTaskIDFromResult(res): task_id = json.loads(res.text) - result = '?task_id=' + str(task_id['task_id']) + result = '?task_id=' + str(task_id['task_id']) return result + def negativeStatusMsg(res): - return 'Problems\n\tStatus code => {0}\n\tReason=> {1}'.format(res.status_code, res.reason) # mypy: ignore + # mypy: ignore + return 'Problems\n\tStatus code => {0}\n\tReason=> {1}'.format(res.status_code, res.reason) + def filterResponseText(val): return json.loads(''.join([str(char) for char in val if char in string.printable])) -def getGNQA(query): - res, task_id = apiClient.ask('?ask=' + query) + +def getGNQA(query, auth_token): + apiClient = GeneNetworkQAClient(requests.Session(), api_key=auth_token) + res, task_id = apiClient.ask('?ask=' + query, auth_token) res, success = apiClient.get_answer(task_id) - if ( success == 1 ): - respText = filterResponseText(res.text) + if (success == 1): + respText = filterResponseText(res.text) if respText.get("data") is None: - return "Unfortunately I have nothing on the query",[] - answer = respText['data']['answer'] - context = respText['data']['context'] + return "Unfortunately I have nothing on the query", [] + answer = respText['data']['answer'] + context = respText['data']['context'] references = parse_context(context) - return answer,references + return answer, references else: return res, "Unfortunately I have nothing." - def parse_context(context): """parse content map id to reference""" result = [] - for doc_ids,summary in context.items(): + for doc_ids, summary in context.items(): comboTxt = "" - for entry in summary: + for entry in summary: comboTxt += '\t' + entry['text'] docInfo = DocIDs().getInfo(doc_ids) - if doc_ids !=docInfo: + if doc_ids != docInfo: bibInfo = formatBibliographyInfo(docInfo) else: bibInfo = doc_ids - result.append({"doc_id":doc_ids,"bibInfo":bibInfo,"comboTxt":comboTxt}) - return result
\ No newline at end of file + result.append( + {"doc_id": doc_ids, "bibInfo": bibInfo, "comboTxt": comboTxt}) + return result diff --git a/gn3/settings.py b/gn3/settings.py index ca4d271..15baae2 100644 --- a/gn3/settings.py +++ b/gn3/settings.py @@ -34,6 +34,7 @@ SECRET_KEY = "password" # gn2 results only used in fetching dataset info # FAHAMU API TOKEN +FAHAMU_AUTH_TOKEN = "" GN2_BASE_URL = "http://www.genenetwork.org/" @@ -54,6 +55,8 @@ GENOTYPE_FILES = os.environ.get( XAPIAN_DB_PATH = "xapian" # CROSS-ORIGIN SETUP + + def parse_env_cors(default): """Parse comma-separated configuration into list of strings.""" origins_str = os.environ.get("CORS_ORIGINS", None) @@ -62,6 +65,7 @@ def parse_env_cors(default): origin.strip() for origin in origins_str.split(",") if origin != ""] return default + CORS_ORIGINS = parse_env_cors("*") CORS_HEADERS = [ @@ -75,9 +79,9 @@ TEXTDIR = f"{GNSHARE}/web/ProbeSetFreeze_DataMatrix" ROUND_TO = 10 -MULTIPROCESSOR_PROCS = 6 # Number of processes to spawn +MULTIPROCESSOR_PROCS = 6 # Number of processes to spawn -AUTH_SERVER_URL="" +AUTH_SERVER_URL = "" AUTH_MIGRATIONS = "migrations/auth" AUTH_DB = os.environ.get( "AUTH_DB", f"{os.environ.get('HOME')}/genenetwork/gn3_files/db/auth.db") |