1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
import http.client
import json
import os
# TODO - Move process json to dedicated data processing module
def process_json(json_object, indent=0):
"""
Recursively traverses the JSON object (dicts and lists) to create an unstructured text blob.
"""
text_blob = ""
if isinstance(json_object, dict):
for key, value in json_object.items():
padding = " " * indent
if isinstance(value, (dict, list)):
text_blob += (
f"{padding}{key}:\n{process_json(value, indent + 1)}"
)
else:
text_blob += f"{padding}{key}: {value}\n"
elif isinstance(json_object, list):
for index, item in enumerate(json_object):
padding = " " * indent
if isinstance(item, (dict, list)):
text_blob += f"{padding}Item {index + 1}:\n{process_json(item, indent + 1)}"
else:
text_blob += f"{padding}Item {index + 1}: {item}\n"
return text_blob
# TODO - Introduce abstract "Integration" ABC.
class SerperClient:
def __init__(self, api_base: str = "google.serper.dev") -> None:
api_key = os.getenv("SERPER_API_KEY")
if not api_key:
raise ValueError(
"Please set the `SERPER_API_KEY` environment variable to use `SerperClient`."
)
self.api_base = api_base
self.headers = {
"X-API-KEY": api_key,
"Content-Type": "application/json",
}
@staticmethod
def _extract_results(result_data: dict) -> list:
formatted_results = []
for key, value in result_data.items():
# Skip searchParameters as it's not a result entry
if key == "searchParameters":
continue
# Handle 'answerBox' as a single item
if key == "answerBox":
value["type"] = key # Add the type key to the dictionary
formatted_results.append(value)
# Handle lists of results
elif isinstance(value, list):
for item in value:
item["type"] = key # Add the type key to the dictionary
formatted_results.append(item)
# Handle 'peopleAlsoAsk' and potentially other single item formats
elif isinstance(value, dict):
value["type"] = key # Add the type key to the dictionary
formatted_results.append(value)
return formatted_results
# TODO - Add explicit typing for the return value
def get_raw(self, query: str, limit: int = 10) -> list:
connection = http.client.HTTPSConnection(self.api_base)
payload = json.dumps({"q": query, "num_outputs": limit})
connection.request("POST", "/search", payload, self.headers)
response = connection.getresponse()
data = response.read()
json_data = json.loads(data.decode("utf-8"))
return SerperClient._extract_results(json_data)
@staticmethod
def construct_context(results: list) -> str:
# Organize results by type
organized_results = {}
for result in results:
result_type = result.metadata.pop(
"type", "Unknown"
) # Pop the type and use as key
if result_type not in organized_results:
organized_results[result_type] = [result.metadata]
else:
organized_results[result_type].append(result.metadata)
context = ""
# Iterate over each result type
for result_type, items in organized_results.items():
context += f"# {result_type} Results:\n"
for index, item in enumerate(items, start=1):
# Process each item under the current type
context += f"Item {index}:\n"
context += process_json(item) + "\n"
return context
|