1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
|
"""Api endpoints for gnqa"""
import ipaddress
import json
import string
import uuid
from datetime import datetime
from datetime import timedelta
from typing import Optional
from functools import wraps
from flask import Blueprint
from flask import current_app
from flask import jsonify
from flask import request
from authlib.jose.errors import DecodeError
from gn3.llms.process import get_gnqa
from gn3.llms.errors import LLMError
from gn3.oauth2.authorisation import require_token
from gn3 import sqlite_db_utils as db
gnqa = Blueprint("gnqa", __name__)
HISTORY_TABLE_CREATE_QUERY = """
CREATE TABLE IF NOT EXISTS history(
user_id TEXT NOT NULL,
task_id TEXT NOT NULL,
query TEXT NOT NULL,
results JSONB,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY(task_id)
) WITHOUT ROWID
"""
RATING_TABLE_CREATE_QUERY = """
CREATE TABLE IF NOT EXISTS Rating(
user_id TEXT NOT NULL,
query TEXT NOT NULL,
answer TEXT NOT NULL,
weight INTEGER NOT NULL DEFAULT 0,
task_id TEXT NOT NULL UNIQUE,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY(task_id)
)
"""
RATE_LIMITER_TABLE_CREATE_QUERY = """
CREATE TABLE IF NOT EXISTS Limiter(
identifier TEXT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
tokens INTEGER,
expiry_time TIMESTAMP,
PRIMARY KEY(identifier)
)
"""
def database_setup():
"""Temporary method to remove the need to have CREATE queries in functions"""
with db.connection(current_app.config["LLM_DB_PATH"]) as conn:
cursor = conn.cursor()
cursor.execute(HISTORY_TABLE_CREATE_QUERY)
cursor.execute(RATING_TABLE_CREATE_QUERY)
cursor.execute(RATE_LIMITER_TABLE_CREATE_QUERY)
def clean_query(query:str) -> str:
"""This function cleans up query removing
punctuation and whitepace and transform to
lowercase
clean_query("!hello test.") -> "hello test"
"""
strip_chars = string.punctuation + string.whitespace
str_query = query.lower().strip(strip_chars)
return str_query
def is_verified_anonymous_user(request_metadata):
"""This function should verify autheniticity of metadate from gn2 """
anony_id = request_metadata.headers.get("Anonymous-Id") #should verify this + metadata signature
user_status = request_metadata.headers.get("Anonymous-Status", "")
_user_signed_metadata = (
request_metadata.headers.get("Anony-Metadata", "")) # TODO~ verify this for integrity
return bool(anony_id) and user_status.lower() == "verified"
def with_gnqna_fallback(view_func):
"""Allow fallback to GNQNA user if token auth fails or token is malformed."""
@wraps(view_func)
def wrapper(*args, **kwargs):
def call_with_anonymous_fallback():
return view_func.__wrapped__(*args,
**{**kwargs, "auth_token": None, "valid_anony": True})
try:
response = view_func(*args, **kwargs)
is_invalid_token = (
isinstance(response, tuple) and
len(response) == 2 and
response[1] == 400
)
if is_invalid_token and is_verified_anonymous_user(request):
return call_with_anonymous_fallback()
return response
except (DecodeError, ValueError): # occurs when trying to parse the token or auth results
if is_verified_anonymous_user(request):
return call_with_anonymous_fallback()
return view_func.__wrapped__(*args, **kwargs)
return wrapper
def is_valid_address(ip_string) -> bool :
"""Function checks if is a valid ip address is valid"""
# todo !verify data is sent from gn2
try:
ipaddress.ip_address(ip_string)
return True
except ValueError:
return False
def check_rate_limiter(ip_address, db_path, query, tokens_lifespan=1440, default_tokens=4):
"""
Checks if an anonymous user has a valid token within the given lifespan.
If expired or not found, creates or resets the token bucket.
`tokens_lifespan` is in seconds. 1440 seconds.
default_token set to 4 requests per hour.
"""
# Extract IP address /identifier
if not ip_address or not is_valid_address(ip_address):
raise ValueError("Please provide a valid IP address")
now = datetime.utcnow()
new_expiry = (now + timedelta(seconds=tokens_lifespan)).strftime("%Y-%m-%d %H:%M:%S")
with db.connection(db_path) as conn:
cursor = conn.cursor()
# Fetch existing limiter record
cursor.execute("""
SELECT tokens, expiry_time FROM Limiter
WHERE identifier = ?
""", (ip_address,))
row = cursor.fetchone()
if row:
tokens, expiry_time_str = row
expiry_time = datetime.strptime(expiry_time_str, "%Y-%m-%d %H:%M:%S")
time_diff = (expiry_time - now).total_seconds()
if 0 < time_diff <= tokens_lifespan:
if tokens > 0:
# Consume token
cursor.execute("""
UPDATE Limiter
SET tokens = tokens - 1
WHERE identifier = ? AND tokens > 0
""", (ip_address,))
return True
else:
raise LLMError("Rate limit exceeded. Please try again later.",
query)
else:
# Token expired — reset ~probably reset this after 200 status
cursor.execute("""
UPDATE Limiter
SET tokens = ?, expiry_time = ?
WHERE identifier = ?
""", (default_tokens, new_expiry, ip_address))
return True
else:
# New user — insert record ~probably reset this after 200 status
cursor.execute("""
INSERT INTO Limiter(identifier, tokens, expiry_time)
VALUES (?, ?, ?)
""", (ip_address, default_tokens, new_expiry))
return True
@gnqa.route("/search", methods=["GET"])
@with_gnqna_fallback
@require_token
def search(auth_token=None, valid_anony=False):
"""Api endpoint for searching queries in fahamu Api"""
query = request.args.get("query", "")
if not query:
return jsonify({"error": "query get parameter is missing in the request"}), 400
fahamu_token = current_app.config.get("FAHAMU_AUTH_TOKEN")
if not fahamu_token:
raise LLMError(
"Request failed: an LLM authorisation token is required ", query)
database_setup()
with db.connection(current_app.config["LLM_DB_PATH"]) as conn:
cursor = conn.cursor()
previous_answer_query = """
SELECT user_id, task_id, query, results FROM history
WHERE created_at > DATE('now', '-21 day') AND
query = ?
ORDER BY created_at DESC LIMIT 1 """
res = cursor.execute(previous_answer_query, (clean_query(query),))
previous_result = res.fetchone()
if previous_result:
_, _, _, response = previous_result
response = json.loads(response)
response["query"] = query
return response
if valid_anony:
# rate limit anonymous verified users
user_metadata = json.loads(request.headers.get("Anony-Metadata", {}))
check_rate_limiter(user_metadata.get("ip_address", ""),
current_app.config["LLM_DB_PATH"],
request.args.get("query", ""))
task_id, answer, refs = get_gnqa(
query, fahamu_token, current_app.config.get("DATA_DIR"))
response = {
"task_id": task_id,
"query": query,
"answer": answer,
"references": refs
}
user_id = str(uuid.uuid4()) if valid_anony else get_user_id(auth_token)
cursor.execute(
"""INSERT INTO history(user_id, task_id, query, results)
VALUES(?, ?, ?, ?)
""", (user_id, str(task_id["task_id"]),
clean_query(query),
json.dumps(response))
)
return response
@gnqa.route("/rating/<task_id>", methods=["POST"])
@require_token
def rate_queries(task_id, auth_token=None):
"""Api endpoint for rating GNQA query and answer"""
database_setup()
user_id = get_user_id(auth_token)
with db.connection(current_app.config["LLM_DB_PATH"]) as conn:
results = request.json
query, answer, weight = (results.get("query"),
results.get("answer"),
results.get("weight", 0))
cursor = conn.cursor()
cursor.execute("""INSERT INTO Rating(user_id, query,
answer, weight, task_id)
VALUES(?, ?, ?, ?, ?)
ON CONFLICT(task_id) DO UPDATE SET
weight=excluded.weight
""", (user_id, query, answer, weight, task_id))
return {
"message": "You have successfully rated this query. Thank you!"
}, 200
@gnqa.route("/search/records", methods=["GET"])
@require_token
def get_user_search_records(auth_token=None):
"""get all history records for a given user using their
user id
"""
with db.connection(current_app.config["LLM_DB_PATH"]) as conn:
cursor = conn.cursor()
cursor.execute(
"""SELECT task_id, query, created_at from history WHERE user_id=?""",
(get_user_id(auth_token),))
results = [dict(item) for item in cursor.fetchall()]
return jsonify(sorted(results, reverse=True,
key=lambda x: datetime.strptime(x.get("created_at"),
'%Y-%m-%d %H:%M:%S')))
@gnqa.route("/search/record/<task_id>", methods=["GET"])
@require_token
def get_user_record_by_task(task_id, auth_token = None):
"""Get user previous search record by task id """
with db.connection(current_app.config["LLM_DB_PATH"]) as conn:
cursor = conn.cursor()
cursor.execute(
"""SELECT results from history
Where task_id=? and user_id=?""",
(task_id, get_user_id(auth_token),))
record = cursor.fetchone()
if record:
return dict(record).get("results")
return {}
@gnqa.route("/search/record/<task_id>", methods=["DELETE"])
@require_token
def delete_record(task_id, auth_token = None):
"""Delete user previous seach record by task-id"""
with db.connection(current_app.config["LLM_DB_PATH"]) as conn:
cursor = conn.cursor()
query = """DELETE FROM history
WHERE task_id=? and user_id=?"""
cursor.execute(query, (task_id, get_user_id(auth_token),))
return {"msg": f"Successfully Deleted the task {task_id}"}
@gnqa.route("/search/records", methods=["DELETE"])
@require_token
def delete_records(auth_token=None):
""" Delete a users records using for all given task ids"""
with db.connection(current_app.config["LLM_DB_PATH"]) as conn:
task_ids = list(request.json.values())
cursor = conn.cursor()
query = ("DELETE FROM history WHERE task_id IN "
f"({', '.join('?' * len(task_ids))}) "
"AND user_id=?")
cursor.execute(query, (*task_ids, get_user_id(auth_token),))
return jsonify({})
def get_user_id(auth_token: Optional[dict] = None):
"""Retrieve the user ID from the JWT token."""
if auth_token is None or auth_token.get("jwt", {}).get("sub") is None:
raise LLMError("Invalid auth token encountered")
user_id = auth_token["jwt"]["sub"]
return user_id
|