2 files changed, 106 insertions, 0 deletions
diff --git a/R2R/r2r/integrations/__init__.py b/R2R/r2r/integrations/__init__.py
new file mode 100755
index 00000000..0830f40c
--- /dev/null
+++ b/R2R/r2r/integrations/__init__.py
@@ -0,0 +1,3 @@
+from .serper import SerperClient
+
+__all__ = ["SerperClient"]
diff --git a/R2R/r2r/integrations/serper.py b/R2R/r2r/integrations/serper.py
new file mode 100755
index 00000000..14333d1a
--- /dev/null
+++ b/R2R/r2r/integrations/serper.py
@@ -0,0 +1,103 @@
+import http.client
+import json
+import os
+
+
+# TODO - Move process json to dedicated data processing module
+def process_json(json_object, indent=0):
+    """
+    Recursively traverses the JSON object (dicts and lists) to create an unstructured text blob.
+    """
+    text_blob = ""
+    if isinstance(json_object, dict):
+        for key, value in json_object.items():
+            padding = "  " * indent
+            if isinstance(value, (dict, list)):
+                text_blob += (
+                    f"{padding}{key}:\n{process_json(value, indent + 1)}"
+                )
+            else:
+                text_blob += f"{padding}{key}: {value}\n"
+    elif isinstance(json_object, list):
+        for index, item in enumerate(json_object):
+            padding = "  " * indent
+            if isinstance(item, (dict, list)):
+                text_blob += f"{padding}Item {index + 1}:\n{process_json(item, indent + 1)}"
+            else:
+                text_blob += f"{padding}Item {index + 1}: {item}\n"
+    return text_blob
+
+
+# TODO - Introduce abstract "Integration" ABC.
+class SerperClient:
+    def __init__(self, api_base: str = "google.serper.dev") -> None:
+        api_key = os.getenv("SERPER_API_KEY")
+        if not api_key:
+            raise ValueError(
+                "Please set the `SERPER_API_KEY` environment variable to use `SerperClient`."
+            )
+
+        self.api_base = api_base
+        self.headers = {
+            "X-API-KEY": api_key,
+            "Content-Type": "application/json",
+        }
+
+    @staticmethod
+    def _extract_results(result_data: dict) -> list:
+        formatted_results = []
+
+        for key, value in result_data.items():
+            # Skip searchParameters as it's not a result entry
+            if key == "searchParameters":
+                continue
+
+            # Handle 'answerBox' as a single item
+            if key == "answerBox":
+                value["type"] = key  # Add the type key to the dictionary
+                formatted_results.append(value)
+            # Handle lists of results
+            elif isinstance(value, list):
+                for item in value:
+                    item["type"] = key  # Add the type key to the dictionary
+                    formatted_results.append(item)
+            # Handle 'peopleAlsoAsk' and potentially other single item formats
+            elif isinstance(value, dict):
+                value["type"] = key  # Add the type key to the dictionary
+                formatted_results.append(value)
+
+        return formatted_results
+
+    # TODO - Add explicit typing for the return value
+    def get_raw(self, query: str, limit: int = 10) -> list:
+        connection = http.client.HTTPSConnection(self.api_base)
+        payload = json.dumps({"q": query, "num_outputs": limit})
+        connection.request("POST", "/search", payload, self.headers)
+        response = connection.getresponse()
+        data = response.read()
+        json_data = json.loads(data.decode("utf-8"))
+        return SerperClient._extract_results(json_data)
+
+    @staticmethod
+    def construct_context(results: list) -> str:
+        # Organize results by type
+        organized_results = {}
+        for result in results:
+            result_type = result.metadata.pop(
+                "type", "Unknown"
+            )  # Pop the type and use as key
+            if result_type not in organized_results:
+                organized_results[result_type] = [result.metadata]
+            else:
+                organized_results[result_type].append(result.metadata)
+
+        context = ""
+        # Iterate over each result type
+        for result_type, items in organized_results.items():
+            context += f"# {result_type} Results:\n"
+            for index, item in enumerate(items, start=1):
+                # Process each item under the current type
+                context += f"Item {index}:\n"
+                context += process_json(item) + "\n"
+
+        return context