legout
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 3 additions & 1 deletion b/‎pyproject.toml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/flowerpower/cfg/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎src/flowerpower/cfg/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/flowerpower/cli/cfg.py‎
Lines changed: 1 addition & 1 deletion b/‎src/flowerpower/cli/cfg.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎…flowerpower/utils/filesystem/__init__.py‎ ‎src/flowerpower/filesystem/__init__.py‎src/flowerpower/utils/filesystem/__init__.py renamed to src/flowerpower/filesystem/__init__.py b/‎…flowerpower/utils/filesystem/__init__.py‎ ‎src/flowerpower/filesystem/__init__.py‎src/flowerpower/utils/filesystem/__init__.py renamed to src/flowerpower/filesystem/__init__.py
diff --git a/‎src/flowerpower/utils/filesystem/base.py‎ ‎src/flowerpower/filesystem/base.py‎src/flowerpower/utils/filesystem/base.py renamed to src/flowerpower/filesystem/base.py
Lines changed: 11 additions & 8 deletions b/‎src/flowerpower/utils/filesystem/base.py‎ ‎src/flowerpower/filesystem/base.py‎src/flowerpower/utils/filesystem/base.py renamed to src/flowerpower/filesystem/base.py
Lines changed: 11 additions & 8 deletions
diff --git a/‎src/flowerpower/utils/filesystem/ext.py‎ ‎src/flowerpower/filesystem/ext.py‎src/flowerpower/utils/filesystem/ext.py renamed to src/flowerpower/filesystem/ext.py
Lines changed: 21 additions & 17 deletions b/‎src/flowerpower/utils/filesystem/ext.py‎ ‎src/flowerpower/filesystem/ext.py‎src/flowerpower/utils/filesystem/ext.py renamed to src/flowerpower/filesystem/ext.py
Lines changed: 21 additions & 17 deletions
diff --git a/‎src/flowerpower/flowerpower.py‎
Lines changed: 6 additions & 5 deletions b/‎src/flowerpower/flowerpower.py‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎src/flowerpower/http/api/cfg.py‎
Lines changed: 2 additions & 2 deletions b/‎src/flowerpower/http/api/cfg.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/flowerpower/io/base.py‎
Lines changed: 6 additions & 6 deletions b/‎src/flowerpower/io/base.py‎
Lines changed: 6 additions & 6 deletions
@@ -19,3 +19,4 @@ flowerpower.db
 Digraph.gv
 Digraph.gv.pdf
 test/test2.json
+.qodo/history.sqlite
@@ -28,7 +28,7 @@ keywords = [
 name = "FlowerPower"
 readme = "README.md"
 requires-python = ">= 3.11"
-version = "0.9.2"
+version = "0.9.3"
 
 [project.scripts]
 flowerpower = "flowerpower.cli:app"
@@ -113,5 +113,7 @@ dev-dependencies = [
   "paho-mqtt>=2.1.0",
   "greenlet>=3.1.1",
   "obstore>=0.3.0",
+  "pytest>=8.3.4",
+  "mocker>=1.1.1",
 ]
 managed = true
@@ -6,7 +6,7 @@
 from munch import Munch, munchify
 from pydantic import Field
 
-from ..utils.filesystem import get_filesystem
+from ..filesystem import get_filesystem
 from .base import BaseConfig
 from .pipeline.run import PipelineRunConfig
 from .pipeline.schedule import (
 
@@ -32,7 +32,7 @@
 #     cfg.update(data)
 #     try:
 #         cfg.to_yaml(
-#             os.path.join(
+#             posixpath.join(
 #                 "pipelines",
 #                 pipeline_name + ".yml",
 #             ),
 
@@ -1,6 +1,7 @@
 import base64
 import inspect
 import os
+import posixpath
 import urllib
 from pathlib import Path
 
@@ -14,8 +15,8 @@
 from fsspec.utils import infer_storage_options
 from loguru import logger
 
-from ..storage_options import BaseStorageOptions
-from ..storage_options import from_dict as storage_options_from_dict
+from ..utils.storage_options import BaseStorageOptions
+from ..utils.storage_options import from_dict as storage_options_from_dict
 from . import AbstractFileSystem
 
 
@@ -24,13 +25,15 @@ def __init__(self, directory):
         self.directory = directory
 
     def __call__(self, path: str) -> str:
-        os.makedirs(os.path.dirname(os.path.join(self.directory, path)), exist_ok=True)
+        os.makedirs(
+            posixpath.dirname(posixpath.join(self.directory, path)), exist_ok=True
+        )
         return path
 
 
 class MonitoredSimpleCacheFileSystem(SimpleCacheFileSystem):
     def __init__(self, **kwargs):
-        # kwargs["cache_storage"] = os.path.join(
+        # kwargs["cache_storage"] = posixpath.join(
         #    kwargs.get("cache_storage"), kwargs.get("fs").protocol[0]
         # )
         self._verbose = kwargs.get("verbose", False)
@@ -41,8 +44,8 @@ def _check_file(self, path):
         self._check_cache()
         cache_path = self._mapper(path)
         for storage in self.storage:
-            fn = os.path.join(storage, cache_path)
-            if os.path.exists(fn):
+            fn = posixpath.join(storage, cache_path)
+            if posixpath.exists(fn):
                 return fn
             if self._verbose:
                 logger.info(f"Downloading {self.protocol[0]}://{path}")
@@ -55,7 +58,7 @@ def size(self, path):
         if cached_file is None:
             return self.fs.size(path)
         else:
-            return os.path.getsize(cached_file)
+            return posixpath.getsize(cached_file)
 
     def sync(self, reload: bool = False):
         if reload:
@@ -285,7 +288,7 @@ def get_filesystem(
     host = pp.get("host", "")
     path = pp.get("path", "")
     if host and host not in path:
-        path = os.path.join(host, path)
+        path = posixpath.join(host, path)
 
     if protocol == "file" or protocol == "local":
         fs = filesystem(protocol)
 
@@ -1,5 +1,5 @@
 import datetime as dt
-import os
+import posixpath
 import uuid
 from typing import Generator
 
@@ -10,8 +10,12 @@
 import pyarrow.parquet as pq
 from fsspec import AbstractFileSystem
 
-from ..misc import convert_large_types_to_standard, run_parallel, _dict_to_dataframe
-from ..polars import pl
+from ..utils.misc import (
+    convert_large_types_to_standard,
+    run_parallel,
+    _dict_to_dataframe,
+)
+from ..utils.polars import pl
 
 import importlib
 
@@ -72,8 +76,8 @@ def _read_json(
         if "**" in path:
             path = self.glob(path)
         else:
-            if ".json" not in os.path.basename(path):
-                path = os.path.join(path, "**/*.jsonl" if jsonlines else "**/*.json")
+            if ".json" not in posixpath.basename(path):
+                path = posixpath.join(path, "**/*.jsonl" if jsonlines else "**/*.json")
                 path = self.glob(path)
 
     if isinstance(path, list):
@@ -147,8 +151,8 @@ def _read_json_batches(
         if "**" in path:
             path = self.glob(path)
         else:
-            if ".json" not in os.path.basename(path):
-                path = os.path.join(path, "**/*.jsonl" if jsonlines else "**/*.json")
+            if ".json" not in posixpath.basename(path):
+                path = posixpath.join(path, "**/*.jsonl" if jsonlines else "**/*.json")
                 path = self.glob(path)
 
     if isinstance(path, str):
@@ -305,8 +309,8 @@ def _read_csv(
         if "**" in path:
             path = self.glob(path)
         else:
-            if ".csv" not in os.path.basename(path):
-                path = os.path.join(path, "**/*.csv")
+            if ".csv" not in posixpath.basename(path):
+                path = posixpath.join(path, "**/*.csv")
                 path = self.glob(path)
 
     if isinstance(path, list):
@@ -363,8 +367,8 @@ def _read_csv_batches(
         if "**" in path:
             path = self.glob(path)
         else:
-            if ".csv" not in os.path.basename(path):
-                path = os.path.join(path, "**/*.csv")
+            if ".csv" not in posixpath.basename(path):
+                path = posixpath.join(path, "**/*.csv")
                 path = self.glob(path)
 
     # Ensure path is a list
@@ -497,12 +501,12 @@ def _read_parquet(
         if isinstance(path, str):
             if "**" in path:
                 if "*.parquet" in path:
-                    path = os.path.join(path, "*.parquet")
+                    path = posixpath.join(path, "*.parquet")
 
                 path = self.glob(path)
             else:
                 if ".parquet" in path:
-                    path = os.path.join(path, "**/*.parquet")
+                    path = posixpath.join(path, "**/*.parquet")
                 path = self.glob(path)
 
         if isinstance(path, list):
@@ -568,11 +572,11 @@ def _read_parquet_batches(
     if isinstance(path, str):
         if "**" in path:
             if "*.parquet" not in path:
-                path = os.path.join(path, "**/*.parquet")
+                path = posixpath.join(path, "**/*.parquet")
             path = self.glob(path)
         else:
             if ".parquet" not in path:
-                path = os.path.join(path, "**/*.parquet")
+                path = posixpath.join(path, "**/*.parquet")
             path = self.glob(path)
 
     if not isinstance(path, list):
@@ -825,7 +829,7 @@ def pyarrow_parquet_dataset(
         (pds.Dataset): Pyarrow dataset.
     """
     if not self.is_file(path):
-        path = os.path.join(path, "_metadata")
+        path = posixpath.join(path, "_metadata")
     return pds.dataset(
         path,
         filesystem=self,
@@ -1076,7 +1080,7 @@ def _write(i, data, p, basename):
         if mode == "delete_matching":
             write_file(self, data[i], p, format, **kwargs)
         elif mode == "overwrite":
-            self.fs.rm(os.path.dirname(p), recursive=True)
+            self.fs.rm(posixpath.dirname(p), recursive=True)
             write_file(self, data[i], p, format, **kwargs)
         elif mode == "append":
             if not self.exists(p):
 
@@ -1,12 +1,13 @@
 import datetime as dt
 import os
+import posixpath
 from pathlib import Path
 
 import rich
 from fsspec.spec import AbstractFileSystem
 
 from .cfg import Config
-from .utils.filesystem import get_filesystem
+from .filesystem import get_filesystem
 
 
 def init(
@@ -22,20 +23,20 @@ def init(
     if base_dir is None:
         base_dir = str(Path.cwd())
 
-    fs = get_filesystem(os.path.join(base_dir, name), **storage_options)
+    fs = get_filesystem(posixpath.join(base_dir, name), **storage_options)
 
     fs.makedirs("conf/pipelines", exist_ok=True)
     fs.makedirs("pipelines", exist_ok=True)
 
-    cfg = Config.load(base_dir=os.path.join(base_dir, name), name=name)
+    cfg = Config.load(base_dir=posixpath.join(base_dir, name), name=name)
 
-    with open(os.path.join(base_dir, name, "README.md"), "w") as f:
+    with open(posixpath.join(base_dir, name, "README.md"), "w") as f:
         f.write(
             f"# {name.replace('_', ' ').upper()}\n\n"
             f"**created with FlowerPower**\n\n*{dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*\n\n"
         )
     cfg.save()
-    os.chdir(os.path.join(base_dir, name))
+    os.chdir(posixpath.join(base_dir, name))
 
     rich.print(
         f"\n✨ Initialized FlowerPower project [bold blue]{name}[/bold blue] at [italic green]{base_dir}[/italic green]\n"
 
@@ -1,4 +1,4 @@
-import os
+import posixpath
 
 from sanic import Blueprint
 from sanic.exceptions import SanicException
@@ -36,7 +36,7 @@ async def update_pipeline(request, pipeline_name, body: PipelineConfig) -> json:
     cfg.update(data)
     try:
         cfg.to_yaml(
-            os.path.join(
+            posixpath.join(
                 "pipelines",
                 pipeline_name + ".yml",
             ),
 
@@ -10,7 +10,7 @@
 from fsspec.utils import get_protocol
 from pydantic import BaseModel, ConfigDict
 
-from ..utils.filesystem import get_filesystem
+from ..filesystem import get_filesystem
 from ..utils.polars import pl
 from ..utils.sql import sql2polars_filter, sql2pyarrow_filter
 from ..utils.storage_options import (
@@ -67,7 +67,7 @@ class BaseFileIO(BaseModel):
     format: str | None = None
 
     def model_post_init(self, __context):
-        # self._update_storage_options_from_env()
+        self._raw_path = self.path
         if isinstance(self.storage_options, dict):
             if "protocol" not in self.storage_options:
                 self.storage_options["protocol"] = get_protocol(self.path)
@@ -569,7 +569,7 @@ def to_pyarrow_dataset(
                 **kwargs,
             )
         elif self.format == "parquet":
-            if self.fs.exists(os.path.join(self._path, "_metadata")):
+            if self.fs.exists(posixpath.join(self._path, "_metadata")):
                 self._dataset = self.fs.parquet_dataset(
                     self._path,
                     schema=self.schema_,
@@ -738,7 +738,7 @@ class BaseFileWriter(BaseFileIO):
         | pd.DataFrame
         | dict[str, Any]
         | list[pl.DataFrame | pl.LazyFrame | pa.Table | pd.DataFrame | dict[str, Any]]
-    )
+    ) | None = None
     basename: str | None = None
     concat: bool = False
     mode: str = "append"  # append, overwrite, delete_matching, error_if_exists
@@ -788,7 +788,7 @@ class BaseDatasetWriter(BaseFileWriter):
             | pd.DataFrame
             | dict[str, Any]
         ]
-    )
+    ) | None = None
     basename: str | None = None
     schema_: pa.Schema | None = None
     partition_by: str | list[str] | pds.Partitioning | None = None
@@ -798,7 +798,7 @@ class BaseDatasetWriter(BaseFileWriter):
     max_rows_per_file: int | None = 2_500_000
     concat: bool = False
     mode: str = "append"  # append, overwrite, delete_matching, error_if_exists
-    is_pydala_dataset: bool = (False,)
+    is_pydala_dataset: bool = False
 
     def write(
         self,