aboutsummaryrefslogtreecommitdiff
path: root/R2R/r2r/integrations
diff options
context:
space:
mode:
Diffstat (limited to 'R2R/r2r/integrations')
-rwxr-xr-xR2R/r2r/integrations/__init__.py3
-rwxr-xr-xR2R/r2r/integrations/serper.py103
2 files changed, 106 insertions, 0 deletions
diff --git a/R2R/r2r/integrations/__init__.py b/R2R/r2r/integrations/__init__.py
new file mode 100755
index 00000000..0830f40c
--- /dev/null
+++ b/R2R/r2r/integrations/__init__.py
@@ -0,0 +1,3 @@
+from .serper import SerperClient
+
+__all__ = ["SerperClient"]
diff --git a/R2R/r2r/integrations/serper.py b/R2R/r2r/integrations/serper.py
new file mode 100755
index 00000000..14333d1a
--- /dev/null
+++ b/R2R/r2r/integrations/serper.py
@@ -0,0 +1,103 @@
+import http.client
+import json
+import os
+
+
+# TODO - Move process json to dedicated data processing module
+def process_json(json_object, indent=0):
+ """
+ Recursively traverses the JSON object (dicts and lists) to create an unstructured text blob.
+ """
+ text_blob = ""
+ if isinstance(json_object, dict):
+ for key, value in json_object.items():
+ padding = " " * indent
+ if isinstance(value, (dict, list)):
+ text_blob += (
+ f"{padding}{key}:\n{process_json(value, indent + 1)}"
+ )
+ else:
+ text_blob += f"{padding}{key}: {value}\n"
+ elif isinstance(json_object, list):
+ for index, item in enumerate(json_object):
+ padding = " " * indent
+ if isinstance(item, (dict, list)):
+ text_blob += f"{padding}Item {index + 1}:\n{process_json(item, indent + 1)}"
+ else:
+ text_blob += f"{padding}Item {index + 1}: {item}\n"
+ return text_blob
+
+
+# TODO - Introduce abstract "Integration" ABC.
+class SerperClient:
+ def __init__(self, api_base: str = "google.serper.dev") -> None:
+ api_key = os.getenv("SERPER_API_KEY")
+ if not api_key:
+ raise ValueError(
+ "Please set the `SERPER_API_KEY` environment variable to use `SerperClient`."
+ )
+
+ self.api_base = api_base
+ self.headers = {
+ "X-API-KEY": api_key,
+ "Content-Type": "application/json",
+ }
+
+ @staticmethod
+ def _extract_results(result_data: dict) -> list:
+ formatted_results = []
+
+ for key, value in result_data.items():
+ # Skip searchParameters as it's not a result entry
+ if key == "searchParameters":
+ continue
+
+ # Handle 'answerBox' as a single item
+ if key == "answerBox":
+ value["type"] = key # Add the type key to the dictionary
+ formatted_results.append(value)
+ # Handle lists of results
+ elif isinstance(value, list):
+ for item in value:
+ item["type"] = key # Add the type key to the dictionary
+ formatted_results.append(item)
+ # Handle 'peopleAlsoAsk' and potentially other single item formats
+ elif isinstance(value, dict):
+ value["type"] = key # Add the type key to the dictionary
+ formatted_results.append(value)
+
+ return formatted_results
+
+ # TODO - Add explicit typing for the return value
+ def get_raw(self, query: str, limit: int = 10) -> list:
+ connection = http.client.HTTPSConnection(self.api_base)
+ payload = json.dumps({"q": query, "num_outputs": limit})
+ connection.request("POST", "/search", payload, self.headers)
+ response = connection.getresponse()
+ data = response.read()
+ json_data = json.loads(data.decode("utf-8"))
+ return SerperClient._extract_results(json_data)
+
+ @staticmethod
+ def construct_context(results: list) -> str:
+ # Organize results by type
+ organized_results = {}
+ for result in results:
+ result_type = result.metadata.pop(
+ "type", "Unknown"
+ ) # Pop the type and use as key
+ if result_type not in organized_results:
+ organized_results[result_type] = [result.metadata]
+ else:
+ organized_results[result_type].append(result.metadata)
+
+ context = ""
+ # Iterate over each result type
+ for result_type, items in organized_results.items():
+ context += f"# {result_type} Results:\n"
+ for index, item in enumerate(items, start=1):
+ # Process each item under the current type
+ context += f"Item {index}:\n"
+ context += process_json(item) + "\n"
+
+ return context