diff options
author | Alexander_Kabui | 2024-05-27 17:37:13 +0300 |
---|---|---|
committer | Alexander_Kabui | 2024-05-27 17:37:13 +0300 |
commit | d3f87b9a02bfec223d23c16eb1374d53065fea92 (patch) | |
tree | 668a86fca591814020264e2c4d02df6a93421448 /gn3/llms | |
parent | 59a27f884b2821ab9142f5285cd713ec374ea820 (diff) | |
download | genenetwork3-d3f87b9a02bfec223d23c16eb1374d53065fea92.tar.gz |
Add regular expressions for parsing links in texts.
Diffstat (limited to 'gn3/llms')
-rw-r--r-- | gn3/llms/process.py | 14 |
1 files changed, 11 insertions, 3 deletions
diff --git a/gn3/llms/process.py b/gn3/llms/process.py index 40e53c5..55c27a0 100644 --- a/gn3/llms/process.py +++ b/gn3/llms/process.py @@ -1,6 +1,7 @@ """this module contains code for processing response from fahamu client.py""" # pylint: disable=C0301 import os +import re import string import json import logging @@ -76,8 +77,13 @@ def parse_context(context, get_info_func, format_bib_func): doc_info = get_info_func(doc_ids) bib_info = doc_ids if doc_ids == doc_info else format_bib_func( doc_info) + pattern = r'(https?://|www\.)[\w.-]+(\.[a-zA-Z]{2,})([/\w.-]*)*' + combo_text = re.sub(pattern, + lambda x: f"<a href='{x[0]}' target=_blank> {x[0]} </a>", + combo_txt) results.append( - {"doc_id": doc_ids, "bibInfo": bib_info, "comboTxt": combo_txt}) + {"doc_id": doc_ids, "bibInfo": bib_info, + "comboTxt": combo_text}) return results @@ -137,8 +143,10 @@ def get_gnqa(query, auth_token, data_dir=""): res, task_id = api_client.ask('?ask=' + quote(query), query=query) res, _status = api_client.get_answer(task_id) resp_text = json.loads(''.join([str(char) - for char in res.text if char in string.printable])) - answer = resp_text['data']['answer'] + for char in res.text if char in string.printable])) + answer = re.sub(r'(https?://|www\.)[\w.-]+(\.[a-zA-Z]{2,})([/\w.-]*)*', + lambda x: f"<a href='{x[0]}' target=_blank> {x[0]} </a>", + resp_text["data"]["answer"]) context = resp_text['data']['context'] return task_id, answer, fetch_pubmed(parse_context( context, DocIDs().get_info, |