"""Api endpoints for gnqa""" import ipaddress import json import string import uuid from datetime import datetime from datetime import timedelta from typing import Optional from functools import wraps from flask import Blueprint from flask import current_app from flask import jsonify from flask import request from authlib.jose.errors import DecodeError from gn3.llms.process import get_gnqa from gn3.llms.errors import LLMError from gn3.oauth2.authorisation import require_token from gn3 import sqlite_db_utils as db gnqa = Blueprint("gnqa", __name__) HISTORY_TABLE_CREATE_QUERY = """ CREATE TABLE IF NOT EXISTS history( user_id TEXT NOT NULL, task_id TEXT NOT NULL, query TEXT NOT NULL, results JSONB, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY(task_id) ) WITHOUT ROWID """ RATING_TABLE_CREATE_QUERY = """ CREATE TABLE IF NOT EXISTS Rating( user_id TEXT NOT NULL, query TEXT NOT NULL, answer TEXT NOT NULL, weight INTEGER NOT NULL DEFAULT 0, task_id TEXT NOT NULL UNIQUE, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY(task_id) ) """ RATE_LIMITER_TABLE_CREATE_QUERY = """ CREATE TABLE IF NOT EXISTS Limiter( identifier TEXT NOT NULL, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, tokens INTEGER, expiry_time TIMESTAMP, PRIMARY KEY(identifier) ) """ def database_setup(): """Temporary method to remove the need to have CREATE queries in functions""" with db.connection(current_app.config["LLM_DB_PATH"]) as conn: cursor = conn.cursor() cursor.execute(HISTORY_TABLE_CREATE_QUERY) cursor.execute(RATING_TABLE_CREATE_QUERY) cursor.execute(RATE_LIMITER_TABLE_CREATE_QUERY) def clean_query(query:str) -> str: """This function cleans up query removing punctuation and whitepace and transform to lowercase clean_query("!hello test.") -> "hello test" """ strip_chars = string.punctuation + string.whitespace str_query = query.lower().strip(strip_chars) return str_query def is_verified_anonymous_user(header_metadata): """This function should verify autheniticity of metadate from gn2 """ anony_id = header_metadata.get("Anonymous-Id") #should verify this + metadata signature user_status = header_metadata.get("Anonymous-Status", "") _user_signed_metadata = ( header_metadata.get("Anony-Metadata", "")) # TODO~ verify this for integrity with tokens return bool(anony_id) and user_status.lower() == "verified" def with_gnqna_fallback(view_func): """Allow fallback to GNQNA user if token auth fails or token is malformed.""" @wraps(view_func) def wrapper(*args, **kwargs): def call_with_anonymous_fallback(): return view_func.__wrapped__(*args, **{**kwargs, "auth_token": None, "valid_anony": True}) try: response = view_func(*args, **kwargs) is_invalid_token = ( isinstance(response, tuple) and len(response) == 2 and response[1] == 400 ) if is_invalid_token and is_verified_anonymous_user(dict(request.headers)): return call_with_anonymous_fallback() return response except (DecodeError, ValueError): # occurs when trying to parse the token or auth results if is_verified_anonymous_user(dict(request.headers)): return call_with_anonymous_fallback() return view_func.__wrapped__(*args, **kwargs) return wrapper def is_valid_address(ip_string) -> bool : """Function checks if is a valid ip address is valid""" # todo !verify data is sent from gn2 try: ipaddress.ip_address(ip_string) return True except ValueError: return False def check_rate_limiter(ip_address, db_path, query, tokens_lifespan=1440, default_tokens=4): """ Checks if an anonymous user has a valid token within the given lifespan. If expired or not found, creates or resets the token bucket. `tokens_lifespan` is in seconds. 1440 seconds. default_token set to 4 requests per hour. """ # Extract IP address /identifier if not ip_address or not is_valid_address(ip_address): raise ValueError("Please provide a valid IP address") now = datetime.utcnow() new_expiry = (now + timedelta(seconds=tokens_lifespan)).strftime("%Y-%m-%d %H:%M:%S") with db.connection(db_path) as conn: cursor = conn.cursor() # Fetch existing limiter record cursor.execute(""" SELECT tokens, expiry_time FROM Limiter WHERE identifier = ? """, (ip_address,)) row = cursor.fetchone() if row: tokens, expiry_time_str = row expiry_time = datetime.strptime(expiry_time_str, "%Y-%m-%d %H:%M:%S") time_diff = (expiry_time - now).total_seconds() if 0 < time_diff <= tokens_lifespan: if tokens > 0: # Consume token cursor.execute(""" UPDATE Limiter SET tokens = tokens - 1 WHERE identifier = ? AND tokens > 0 """, (ip_address,)) return True else: raise LLMError("Rate limit exceeded. Please try again later.", query) else: # Token expired — reset ~probably reset this after 200 status cursor.execute(""" UPDATE Limiter SET tokens = ?, expiry_time = ? WHERE identifier = ? """, (default_tokens, new_expiry, ip_address)) return True else: # New user — insert record ~probably reset this after 200 status cursor.execute(""" INSERT INTO Limiter(identifier, tokens, expiry_time) VALUES (?, ?, ?) """, (ip_address, default_tokens, new_expiry)) return True @gnqa.route("/search", methods=["GET"]) @with_gnqna_fallback @require_token def search(auth_token=None, valid_anony=False): """Api endpoint for searching queries in fahamu Api""" query = request.args.get("query", "") if not query: return jsonify({"error": "query get parameter is missing in the request"}), 400 fahamu_token = current_app.config.get("FAHAMU_AUTH_TOKEN") if not fahamu_token: raise LLMError( "Request failed: an LLM authorisation token is required ", query) database_setup() with db.connection(current_app.config["LLM_DB_PATH"]) as conn: cursor = conn.cursor() previous_answer_query = """ SELECT user_id, task_id, query, results FROM history WHERE created_at > DATE('now', '-21 day') AND query = ? ORDER BY created_at DESC LIMIT 1 """ res = cursor.execute(previous_answer_query, (clean_query(query),)) previous_result = res.fetchone() if previous_result: _, _, _, response = previous_result response = json.loads(response) response["query"] = query return response if valid_anony: # rate limit anonymous verified users user_metadata = json.loads(request.headers.get("Anony-Metadata", {})) check_rate_limiter(user_metadata.get("ip_address", ""), current_app.config["LLM_DB_PATH"], request.args.get("query", "")) task_id, answer, refs = get_gnqa( query, fahamu_token, current_app.config.get("DATA_DIR")) response = { "task_id": task_id, "query": query, "answer": answer, "references": refs } user_id = str(uuid.uuid4()) if valid_anony else get_user_id(auth_token) cursor.execute( """INSERT INTO history(user_id, task_id, query, results) VALUES(?, ?, ?, ?) """, (user_id, str(task_id["task_id"]), clean_query(query), json.dumps(response)) ) return response @gnqa.route("/rating/", methods=["POST"]) @require_token def rate_queries(task_id, auth_token=None): """Api endpoint for rating GNQA query and answer""" database_setup() user_id = get_user_id(auth_token) with db.connection(current_app.config["LLM_DB_PATH"]) as conn: results = request.json query, answer, weight = (results.get("query"), results.get("answer"), results.get("weight", 0)) cursor = conn.cursor() cursor.execute("""INSERT INTO Rating(user_id, query, answer, weight, task_id) VALUES(?, ?, ?, ?, ?) ON CONFLICT(task_id) DO UPDATE SET weight=excluded.weight """, (user_id, query, answer, weight, task_id)) return { "message": "You have successfully rated this query. Thank you!" }, 200 @gnqa.route("/search/records", methods=["GET"]) @require_token def get_user_search_records(auth_token=None): """get all history records for a given user using their user id """ with db.connection(current_app.config["LLM_DB_PATH"]) as conn: cursor = conn.cursor() cursor.execute( """SELECT task_id, query, created_at from history WHERE user_id=?""", (get_user_id(auth_token),)) results = [dict(item) for item in cursor.fetchall()] return jsonify(sorted(results, reverse=True, key=lambda x: datetime.strptime(x.get("created_at"), '%Y-%m-%d %H:%M:%S'))) @gnqa.route("/search/record/", methods=["GET"]) @require_token def get_user_record_by_task(task_id, auth_token = None): """Get user previous search record by task id """ with db.connection(current_app.config["LLM_DB_PATH"]) as conn: cursor = conn.cursor() cursor.execute( """SELECT results from history Where task_id=? and user_id=?""", (task_id, get_user_id(auth_token),)) record = cursor.fetchone() if record: return dict(record).get("results") return {} @gnqa.route("/search/record/", methods=["DELETE"]) @require_token def delete_record(task_id, auth_token = None): """Delete user previous seach record by task-id""" with db.connection(current_app.config["LLM_DB_PATH"]) as conn: cursor = conn.cursor() query = """DELETE FROM history WHERE task_id=? and user_id=?""" cursor.execute(query, (task_id, get_user_id(auth_token),)) return {"msg": f"Successfully Deleted the task {task_id}"} @gnqa.route("/search/records", methods=["DELETE"]) @require_token def delete_records(auth_token=None): """ Delete a users records using for all given task ids""" with db.connection(current_app.config["LLM_DB_PATH"]) as conn: task_ids = list(request.json.values()) cursor = conn.cursor() query = ("DELETE FROM history WHERE task_id IN " f"({', '.join('?' * len(task_ids))}) " "AND user_id=?") cursor.execute(query, (*task_ids, get_user_id(auth_token),)) return jsonify({}) def get_user_id(auth_token: Optional[dict] = None): """Retrieve the user ID from the JWT token.""" if auth_token is None or auth_token.get("jwt", {}).get("sub") is None: raise LLMError("Invalid auth token encountered") user_id = auth_token["jwt"]["sub"] return user_id