aboutsummaryrefslogtreecommitdiff
path: root/gn3/llms/process.py
blob: b8e7dedbe4564df3a402e7c6814919b679581e77 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# pylint: skip-file

import requests
import sys
import time
import string
import json
import os
from gn3.llms.client import GeneNetworkQAClient
from gn3.llms.response import DocIDs


baseUrl           = 'https://genenetwork.fahamuai.com/api/tasks'
answerUrl         = baseUrl + '/answers'
basedir           = os.path.abspath(os.path.dirname(__file__))
apiClient         = GeneNetworkQAClient(requests.Session(), api_key='')









def formatBibliographyInfo(bibInfo):
    if isinstance(bibInfo, str):
        # remove '.txt'
        bibInfo = bibInfo.removesuffix('.txt')
    elif isinstance(bibInfo, dict):
        # format string bibliography information
        bibInfo = "{0}.{1}.{2}.{3} ".format(bibInfo['author'], bibInfo['title'], bibInfo['year'], bibInfo['doi'])
    return bibInfo


def askTheDocuments( extendUrl, my_auth ):
    try:
        res     = requests.post(baseUrl+extendUrl,
                            data={},
                            headers=my_auth)
        res.raise_for_status()
    except:
        raise # what
    if (res.status_code != 200):
        return negativeStatusMsg(res), 0
    task_id     = getTaskIDFromResult(res)
    res         = getAnswerUsingTaskID(task_id, my_auth)
    if (res.status_code != 200):
        return negativeStatusMsg(res), 0
    return res, 1

def getAnswerUsingTaskID( extendUrl, my_auth ):
    try:
        res = requests.get(answerUrl+extendUrl, data={}, headers=my_auth)
        res.raise_for_status()
    except:
        raise
    return res

def openAPIConfig():
    f = open(os.path.join(basedir, "api.config.json") , "rb" )
    result = json.load(f)
    f.close()
    return result


def getTaskIDFromResult(res):
    task_id = json.loads(res.text)
    result  = '?task_id=' + str(task_id['task_id'])
    return result

def negativeStatusMsg(res):
    return 'Problems\n\tStatus code => {0}\n\tReason=> {1}'.format(res.status_code, res.reason)  # mypy: ignore

def filterResponseText(val):
    return json.loads(''.join([str(char) for char in val if char in string.printable]))

def getGNQA(query):
    res, task_id = apiClient.ask('?ask=' + query)
    res, success = apiClient.get_answer(task_id)

    if ( success == 1 ):
        respText       = filterResponseText(res.text)
        if respText.get("data") is None:
            return  "Unfortunately I have nothing on the query",[]
        answer         = respText['data']['answer']
        context        = respText['data']['context']
        references = parse_context(context)
        return answer,references
    else:
        return res, "Unfortunately I have nothing."



def parse_context(context):
    """parse content map id to reference"""
    result = []
    for doc_ids,summary in context.items():
        comboTxt = ""
        for entry  in summary:
            comboTxt += '\t' + entry['text']

        docInfo = DocIDs().getInfo(doc_ids)
        if doc_ids !=docInfo:
            bibInfo = formatBibliographyInfo(docInfo)

        else:
            bibInfo = doc_ids
        result.append({"doc_id":doc_ids,"bibInfo":bibInfo,"comboTxt":comboTxt})
    return result