From f8b4e043f89802feab64149514cddc4382353ab7 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sun, 30 Mar 2025 22:53:59 +0000 Subject: [PATCH 1/3] fix: improve Windows compatibility for HTTP caching Co-Authored-By: Aaron Steers --- airbyte/http_caching/proxy.py | 15 ++++++++++----- airbyte/http_caching/serialization.py | 22 ++++++++++++++-------- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/airbyte/http_caching/proxy.py b/airbyte/http_caching/proxy.py index a2e9f1f6..8ec07155 100644 --- a/airbyte/http_caching/proxy.py +++ b/airbyte/http_caching/proxy.py @@ -107,7 +107,9 @@ def _get_cache_path(self, key: str, *, is_read: bool = False) -> Path: The path to the cache file. """ base_dir = self.read_dir if is_read else self.cache_dir + extension = ".json" if self.serialization_format == SerializationFormat.JSON else ".mitm" + return base_dir / f"{key}{extension}" def request(self, flow: HTTPFlow) -> None: @@ -127,14 +129,15 @@ def request(self, flow: HTTPFlow) -> None: if cache_path.exists(): try: cached_data: dict[str, Any] = self.serializer.deserialize(cache_path) + cached_flow = HTTPFlow.from_state(cached_data) + if hasattr(cached_flow, "response") and cached_flow.response: flow.response = cached_flow.response - logger.info(f"Serving {flow.request.url} from cache") + logger.info(f"Serving {flow.request.url} from cache") + return except Exception as e: - logger.warning(f"Failed to load cached response: {e}") - else: - return + logger.warning(f"Failed to load cached response: {e}", exc_info=True) if self.mode == HttpCacheMode.READ_ONLY_FAIL_ON_MISS: flow.response = Response.make( @@ -154,7 +157,9 @@ def response(self, flow: HTTPFlow) -> None: cache_path = self._get_cache_path(key, is_read=False) try: + cache_path.parent.mkdir(parents=True, exist_ok=True) + self.serializer.serialize(flow.get_state(), cache_path) logger.info(f"Cached response for {flow.request.url}") except Exception as e: - logger.warning(f"Failed to cache response: {e}") + logger.warning(f"Failed to cache response: {e}", exc_info=True) diff --git a/airbyte/http_caching/serialization.py b/airbyte/http_caching/serialization.py index b3257d8e..93b8cf2f 100644 --- a/airbyte/http_caching/serialization.py +++ b/airbyte/http_caching/serialization.py @@ -88,12 +88,14 @@ def serialize(self, data: T_SerializedData, path: Path) -> None: """ path.parent.mkdir(parents=True, exist_ok=True) - if not str(path).endswith(".mitm"): + if path.suffix != ".mitm": path = path.with_suffix(".mitm") + flows = data.get("flows", []) + with path.open("wb") as f: fw = io.FlowWriter(f) - for flow in data.get("flows", []): + for flow in flows: fw.add(flow) def deserialize(self, path: Path) -> T_SerializedData: @@ -105,14 +107,18 @@ def deserialize(self, path: Path) -> T_SerializedData: Returns: The deserialized data. """ - if not str(path).endswith(".mitm"): + if path.suffix != ".mitm": path = path.with_suffix(".mitm") if not path.exists(): return {"flows": []} - with path.open("rb") as f: - fr = io.FlowReader(f) - flows = list(fr.stream()) - - return {"flows": flows} + try: + with path.open("rb") as f: + fr = io.FlowReader(f) + flows = list(fr.stream()) + return {"flows": flows} + except Exception as e: + import logging + logging.warning(f"Error reading flow file {path}: {e}") + return {"flows": []} From 35ebec01764ad55be02cc3a27b87ee1485f2dcfd Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sun, 30 Mar 2025 22:56:45 +0000 Subject: [PATCH 2/3] style: fix formatting and linting issues Co-Authored-By: Aaron Steers --- airbyte/http_caching/proxy.py | 10 +++++----- airbyte/http_caching/serialization.py | 11 +++++++---- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/airbyte/http_caching/proxy.py b/airbyte/http_caching/proxy.py index 8ec07155..fd457a22 100644 --- a/airbyte/http_caching/proxy.py +++ b/airbyte/http_caching/proxy.py @@ -107,9 +107,9 @@ def _get_cache_path(self, key: str, *, is_read: bool = False) -> Path: The path to the cache file. """ base_dir = self.read_dir if is_read else self.cache_dir - + extension = ".json" if self.serialization_format == SerializationFormat.JSON else ".mitm" - + return base_dir / f"{key}{extension}" def request(self, flow: HTTPFlow) -> None: @@ -129,9 +129,9 @@ def request(self, flow: HTTPFlow) -> None: if cache_path.exists(): try: cached_data: dict[str, Any] = self.serializer.deserialize(cache_path) - + cached_flow = HTTPFlow.from_state(cached_data) - + if hasattr(cached_flow, "response") and cached_flow.response: flow.response = cached_flow.response logger.info(f"Serving {flow.request.url} from cache") @@ -158,7 +158,7 @@ def response(self, flow: HTTPFlow) -> None: try: cache_path.parent.mkdir(parents=True, exist_ok=True) - + self.serializer.serialize(flow.get_state(), cache_path) logger.info(f"Cached response for {flow.request.url}") except Exception as e: diff --git a/airbyte/http_caching/serialization.py b/airbyte/http_caching/serialization.py index 93b8cf2f..40b323f7 100644 --- a/airbyte/http_caching/serialization.py +++ b/airbyte/http_caching/serialization.py @@ -4,11 +4,14 @@ from __future__ import annotations import json +import logging from enum import Enum from typing import TYPE_CHECKING, Any, Protocol from mitmproxy.io import io +logger = logging.getLogger(__name__) + if TYPE_CHECKING: from pathlib import Path @@ -92,7 +95,7 @@ def serialize(self, data: T_SerializedData, path: Path) -> None: path = path.with_suffix(".mitm") flows = data.get("flows", []) - + with path.open("wb") as f: fw = io.FlowWriter(f) for flow in flows: @@ -117,8 +120,8 @@ def deserialize(self, path: Path) -> T_SerializedData: with path.open("rb") as f: fr = io.FlowReader(f) flows = list(fr.stream()) - return {"flows": flows} except Exception as e: - import logging - logging.warning(f"Error reading flow file {path}: {e}") + logger.warning(f"Error reading flow file {path}: {e}") return {"flows": []} + else: + return {"flows": flows} From 0a134d1044a990eecc947bcb3b96aaeaaeaa9f0d Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sun, 30 Mar 2025 22:58:23 +0000 Subject: [PATCH 3/3] style: fix import sorting issues Co-Authored-By: Aaron Steers --- airbyte/http_caching/serialization.py | 1 + 1 file changed, 1 insertion(+) diff --git a/airbyte/http_caching/serialization.py b/airbyte/http_caching/serialization.py index 40b323f7..d37df456 100644 --- a/airbyte/http_caching/serialization.py +++ b/airbyte/http_caching/serialization.py @@ -10,6 +10,7 @@ from mitmproxy.io import io + logger = logging.getLogger(__name__)