aboutsummaryrefslogtreecommitdiff
path: root/gn3/llms/process.py
blob: 526c1b36fdef5bbf37a732a42a9b592a2573e77f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# pylint: skip-file

import requests
import sys
import time
import string
import json
import os
from urllib.request import urlretrieve
from urllib.parse import quote

from gn3.llms.client import GeneNetworkQAClient
from gn3.llms.response import DocIDs


baseUrl = 'https://genenetwork.fahamuai.com/api/tasks'
answerUrl = baseUrl + '/answers'
basedir = os.path.abspath(os.path.dirname(__file__))


def formatBibliographyInfo(bibInfo):
    if isinstance(bibInfo, str):
        # remove '.txt'
        bibInfo = bibInfo.removesuffix('.txt')
    elif isinstance(bibInfo, dict):
        # format string bibliography information
        bibInfo = "{0}.{1}.{2}.{3} ".format(
            bibInfo['author'], bibInfo['title'], bibInfo['year'], bibInfo['doi'])
    return bibInfo


def askTheDocuments(extendUrl, my_auth):
    try:
        res = requests.post(baseUrl+extendUrl,
                            data={},
                            headers=my_auth)
        res.raise_for_status()
    except:
        raise  # what
    if (res.status_code != 200):
        return negativeStatusMsg(res), 0
    task_id = getTaskIDFromResult(res)
    res = getAnswerUsingTaskID(task_id, my_auth)
    if (res.status_code != 200):
        return negativeStatusMsg(res), 0
    return res, 1


def getAnswerUsingTaskID(extendUrl, my_auth):
    try:
        res = requests.get(answerUrl+extendUrl, data={}, headers=my_auth)
        res.raise_for_status()
    except:
        raise
    return res


def openAPIConfig():
    f = open(os.path.join(basedir, "api.config.json"), "rb")
    result = json.load(f)
    f.close()
    return result


def getTaskIDFromResult(res):
    task_id = json.loads(res.text)
    result = '?task_id=' + str(task_id['task_id'])
    return result


def negativeStatusMsg(res):
    # mypy: ignore
    return 'Problems\n\tStatus code => {0}\n\tReason=> {1}'.format(res.status_code, res.reason)


def filterResponseText(val):
    return json.loads(''.join([str(char) for char in val if char in string.printable]))


def getGNQA(query, auth_token):
    apiClient = GeneNetworkQAClient(requests.Session(), api_key=auth_token)
    res, task_id = apiClient.ask('?ask=' + quote(query), auth_token)
    res, success = apiClient.get_answer(task_id)

    if (success == 1):
        respText = filterResponseText(res.text)
        if respText.get("data") is None:
            return "Unfortunately I have nothing on the query", []
        answer = respText['data']['answer']
        context = respText['data']['context']
        references = parse_context(context)
        return answer, references
    else:
        return res, "Unfortunately I have nothing."


def parse_context(context):
    """parse content map id to reference"""
    result = []
    for doc_ids, summary in context.items():
        comboTxt = ""
        for entry in summary:
            comboTxt += '\t' + entry['text']

        docInfo = DocIDs().getInfo(doc_ids)
        if doc_ids != docInfo:
            bibInfo = formatBibliographyInfo(docInfo)

        else:
            bibInfo = doc_ids
        result.append(
            {"doc_id": doc_ids, "bibInfo": bibInfo, "comboTxt": comboTxt})
    return result