deepseek-ai · guspan-tanadi · Mar 27, 2025
diff --git a/smallpond/common.py b/smallpond/common.py
@@ -14,7 +14,7 @@
 
 DEFAULT_MAX_RETRY_COUNT = 5
 DEFAULT_MAX_FAIL_COUNT = 3
-# duckdb default row group size https://duckdb.org/docs/data/parquet/tips#selecting-a-row_group_size
+# duckdb default row group size https://duckdb.org/docs/stable/data/parquet/tips#selecting-a-row_group_size
 MAX_ROW_GROUP_SIZE = 10 * 1024 * 1024
 MAX_ROW_GROUP_BYTES = 2 * GB
 MAX_NUM_ROW_GROUPS = 256

diff --git a/smallpond/logical/dataset.py b/smallpond/logical/dataset.py
@@ -81,7 +81,7 @@ def __init__(
         columns, optional
             Only load the specified columns if not None.
         union_by_name, optional
-            Unify the columns of different files by name (see https://duckdb.org/docs/data/multiple_files/combining_schemas#union-by-name).
+            Unify the columns of different files by name (see https://duckdb.org/docs/stable/data/multiple_files/combining_schemas#union-by-name).
         """
         self.paths = [paths] if isinstance(paths, str) else paths
         "The paths to the dataset files."

diff --git a/smallpond/logical/node.py b/smallpond/logical/node.py
@@ -592,7 +592,7 @@ def __init__(
             The number of rows stored in each row group of parquet file.
             Large row group size provides more opportunities to compress the data.
             Small row groups size could make filtering rows faster and achieve high concurrency.
-            See https://duckdb.org/docs/data/parquet/tips.html#selecting-a-row_group_size.
+            See https://duckdb.org/docs/stable/data/parquet/tips.html#selecting-a-row_group_size.
         parquet_dictionary_encoding, optional
             Specify if we should use dictionary encoding in general or only for some columns.
             See `use_dictionary` in https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html.
@@ -753,7 +753,7 @@ def __init__(
             The number of rows stored in each row group of parquet file.
             Large row group size provides more opportunities to compress the data.
             Small row groups size could make filtering rows faster and achieve high concurrency.
-            See https://duckdb.org/docs/data/parquet/tips.html#selecting-a-row_group_size.
+            See https://duckdb.org/docs/stable/data/parquet/tips.html#selecting-a-row_group_size.
         parquet_dictionary_encoding, optional
             Specify if we should use dictionary encoding in general or only for some columns.
             See `use_dictionary` in https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html.
@@ -984,7 +984,7 @@ def __init__(
             since UDF execution in duckdb is not highly paralleled.
         per_thread_output, optional
             If the final number of Parquet files is not important, writing one file per thread can significantly improve performance.
-            Also see https://duckdb.org/docs/data/parquet/tips.html#enabling-per_thread_output.
+            Also see https://duckdb.org/docs/stable/data/parquet/tips.html#enabling-per_thread_output.
         materialize_output, optional
             Query result is materialized to the underlying filesystem as parquet files if enabled.
         materialize_in_memory, optional
@@ -1002,7 +1002,7 @@ def __init__(
             The number of rows stored in each row group of parquet file.
             Large row group size provides more opportunities to compress the data.
             Small row groups size could make filtering rows faster and achieve high concurrency.
-            See https://duckdb.org/docs/data/parquet/tips.html#selecting-a-row_group_size.
+            See https://duckdb.org/docs/stable/data/parquet/tips.html#selecting-a-row_group_size.
         parquet_dictionary_encoding, optional
             Specify if we should use dictionary encoding in general or only for some columns.
             When encoding the column, if the dictionary size is too large, the column will fallback to PLAIN encoding.
@@ -1621,7 +1621,7 @@ def __init__(
             The number of rows stored in each row group of parquet file.
             Large row group size provides more opportunities to compress the data.
             Small row groups size could make filtering rows faster and achieve high concurrency.
-            See https://duckdb.org/docs/data/parquet/tips.html#selecting-a-row_group_size.
+            See https://duckdb.org/docs/stable/data/parquet/tips.html#selecting-a-row_group_size.
         parquet_dictionary_encoding, optional
             Specify if we should use dictionary encoding in general or only for some columns.
             See `use_dictionary` in https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html.
@@ -1802,7 +1802,7 @@ def __init__(
         generated_columns
             Auto generated columns, supported values: `filename`, `file_row_number`.
         union_by_name, optional
-            Unify the columns of different files by name (see https://duckdb.org/docs/data/multiple_files/combining_schemas#union-by-name).
+            Unify the columns of different files by name (see https://duckdb.org/docs/stable/data/multiple_files/combining_schemas#union-by-name).
 
         Examples
         --------

diff --git a/smallpond/logical/udf.py b/smallpond/logical/udf.py
@@ -102,7 +102,7 @@ class UDFStructType:
     """
     A wrapper of duckdb.struct_type, eg: UDFStructType({'host': 'VARCHAR', 'path:' 'VARCHAR', 'query': 'VARCHAR'})
 
-    See https://duckdb.org/docs/api/python/types.html#a-field_one-b-field_two--n-field_n
+    See https://duckdb.org/docs/stable/clients/python/types.html#a-field_one-b-field_two--n-field_n
     """
 
     def __init__(self, fields: Union[Dict[str, str], List[str]]) -> None:
@@ -116,7 +116,7 @@ class UDFListType:
     """
     A wrapper of duckdb.list_type, eg: UDFListType(UDFType.INTEGER)
 
-    See https://duckdb.org/docs/api/python/types.html#listchild_type
+    See https://duckdb.org/docs/stable/clients/python/types.html#listchild_type
     """
 
     def __init__(self, child) -> None:
@@ -130,7 +130,7 @@ class UDFMapType:
     """
     A wrapper of duckdb.map_type, eg: UDFMapType(UDFType.VARCHAR, UDFType.INTEGER)
 
-    See https://duckdb.org/docs/api/python/types.html#dictkey_type-value_type
+    See https://duckdb.org/docs/stable/clients/python/types.html#dictkey_type-value_type
     """
 
     def __init__(self, key, value) -> None: