pgpartman
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 2 additions & 2 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 29 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎META.json‎
Lines changed: 3 additions & 3 deletions b/‎META.json‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎bin/common/check_unique_constraint.py‎
100755100644
Lines changed: 2 additions & 2 deletions b/‎bin/common/check_unique_constraint.py‎
100755100644
Lines changed: 2 additions & 2 deletions
diff --git a/‎bin/common/dump_partition.py‎
100755100644 b/‎bin/common/dump_partition.py‎
100755100644
diff --git a/‎bin/common/vacuum_maintenance.py‎
100755100644
Lines changed: 1 addition & 1 deletion b/‎bin/common/vacuum_maintenance.py‎
100755100644
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/pg_partman.md‎
Lines changed: 52 additions & 1 deletion b/‎doc/pg_partman.md‎
Lines changed: 52 additions & 1 deletion
diff --git a/‎pg_partman.control‎
Lines changed: 1 addition & 1 deletion b/‎pg_partman.control‎
Lines changed: 1 addition & 1 deletion
@@ -9,3 +9,4 @@ ignore/*
 sql/*.sql
 test/not_working_yet/*
 *.zip
+*.fuse*
@@ -1,6 +1,6 @@
 repos:
 - repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v4.6.0
+  rev: v6.0.0
   hooks:
     - id: check-merge-conflict
     - id: check-symlinks
@@ -12,6 +12,6 @@ repos:
     - id: trailing-whitespace
       args: ['--markdown-linebreak-ext=md']
 - repo: https://github.com/codespell-project/codespell
-  rev: v2.3.0
+  rev: v2.4.1
   hooks:
     - id: codespell
@@ -1,3 +1,32 @@
+5.3.0
+=====
+
+NEW FEATURES
+------------
+ - Added new function partition_data_async() to allow smaller batching of data per transaction when moving data out of the default partition. (Github Issue #353)
+    - Note this function currently only works with time-based partitioning. ID/integer partitioning is in development.
+    - WARNING: While data is in transition between the default and the destination child table using this procedure, it is NOT visible to users of the partition table. See documentation for this function for additional details.
+ - Better support filtering out any columns with `p_ignored_columns` while partitioning data using the `partition_data_time()`, `partition_data_id()`, or `partition_data_proc()` utilities. (Github PR#723)
+    - Allows for filtering out GENERATED columns while moving data so that newly generated values will be entered for moved rows.
+    - Non-GENERATED columns that are filtered out will either have NULL values or use the default value when rows are moved.
+    - TODO update partition_data_id
+ - Added support for uuid-based partition sets to partition_data_time()/partition_data_proc() functions (Github #789)
+ - Allow a starting offset to id/integer based partitioning. Added a new parameter to create_parent: p_offset_id. Note that the offset will carry through to all subsequent child tables. Ex: offset of 5 with interval 10 will make lower boundaries 5, 15, 25, etc. (Github Issue #339)
+ - Reduce the logging of the dynamic background working runs to be DEBUG1. Changed existing DEBUG1 logging messages in the BGW to DEBUG2.
+ - Unlogged tables are still supported in pg_partman as of PostgreSQL 18 and newer, but the parent table can no longer be flagged unlogged. This only works through the template table system in pg_partman.
+
+BUGFIXES
+--------
+ - Allow `partition_data_*()` utilities to properly work when a PK/Unique key is set to GENERATE ALWAYS.
+ - Handle if the given default table already exists when calling `create_parent()`. Helps to better handle migrating an existing partition set to pg_partman.
+ - Added check to ensure that the default table cannot be manually set as the value of p_source_table in partitioning functions and procedures. This would previously cause an unhandled edge case endless loop since the data moved out of the default was getting moved right back into the default again instead of a new child partition. (Github Issue #353)
+ - Always ensure transaction is committed at proper time when using reapply_constraints_proc(). (Github PR#780)
+ - Added plpgsql as a required dependency in the extension control file. (Github PR# 808)
+
+DOCUMENTATION
+-------------
+ - Updated documentation for the time decoder function to note that it must take a TEXT value as its parameter at this time.
+
 5.2.4
 =====
 BUG FIXES
 
@@ -1,7 +1,7 @@
 {
     "name": "pg_partman",
     "abstract": "Extension to manage partitioned tables by time or ID",
-    "version": "5.2.4",
+    "version": "5.3.0",
     "maintainer": [
         "Keith Fiske <[email protected]>"
     ],
@@ -20,9 +20,9 @@
     },
     "provides": {
         "pg_partman": {
-            "file": "sql/pg_partman--5.2.4.sql",
+            "file": "sql/pg_partman--5.3.0.sql",
             "docfile": "doc/pg_partman.md",
-            "version": "5.2.4",
+            "version": "5.3.0",
             "abstract": "Extension to manage partitioned tables by time or ID"
         }
     },
 
@@ -4,14 +4,14 @@
 
 partman_version = "2.0.0"
 
-parser = argparse.ArgumentParser(description="This script is used to check that all rows in a partition set are unique for the given columns. Since unique constraints are not applied across partition sets, this cannot be enforced within the database. This script can be used as a monitor to ensure uniquness. If any unique violations are found, the values, along with a count of each, are output.")
+parser = argparse.ArgumentParser(description="This script is used to check that all rows in a partition set are unique for the given columns. Since unique constraints are not applied across partition sets, this cannot be enforced within the database. This script can be used as a monitor to ensure uniqueness. If any unique violations are found, the values, along with a count of each, are output.")
 parser.add_argument('-p', '--parent',  help="Parent table of the partition set to be checked")
 parser.add_argument('-l', '--column_list', help="Comma separated list of columns that make up the unique constraint to be checked")
 parser.add_argument('-c','--connection', default="host=", help="""Connection string for use by psycopg. Defaults to "host=" (local socket).""")
 parser.add_argument('-t', '--temp', help="Path to a writable folder that can be used for temp working files. Defaults system temp folder.")
 parser.add_argument('--psql', help="Full path to psql binary if not in current PATH")
 parser.add_argument('--simple', action="store_true", help="Output a single integer value with the total duplicate count. Use this for monitoring software that requires a simple value to be checked for.")
-parser.add_argument('--index_scan', action="store_true", help="By default index scans are disabled to force the script to check the actual table data with sequential scans. Set this option if you want the script to allow index scans to be used (does not guarentee that they will be used).")
+parser.add_argument('--index_scan', action="store_true", help="By default index scans are disabled to force the script to check the actual table data with sequential scans. Set this option if you want the script to allow index scans to be used (does not guarantee that they will be used).")
 parser.add_argument('-q', '--quiet', action="store_true", help="Suppress all output unless there is a constraint violation found.")
 parser.add_argument('--version', action="store_true", help="Print out the minimum version of pg_partman this script is meant to work with. The version of pg_partman installed may be greater than this.")
 args = parser.parse_args()
 
@@ -204,7 +204,7 @@ def vacuum_table(conn, schemaname, tablename):
         sys.exit(2)
 
     if args.interval != None and args.type == None:
-        print("--interval argment requires setting --type argument as well")
+        print("--interval argument requires setting --type argument as well")
         sys.exit(2)
 
     main_conn = create_conn()
 
@@ -271,6 +271,7 @@ partition_data_time(
     , p_analyze boolean DEFAULT true
     , p_source_table text DEFAULT NULL
     , p_ignored_columns text[] DEFAULT NULL
+    , p_override_system_value boolean DEFAULT false
 )
 RETURNS bigint
 ```
@@ -287,6 +288,7 @@ RETURNS bigint
  * `p_analyze` - optional argument, by default whenever a new child table is created, an analyze is run on the parent table of the partition set to ensure constraint exclusion works. This analyze can be skipped by setting this to false and help increase the speed of moving large amounts of data. If this is set to false, it is highly recommended that a manual analyze of the partition set be done upon completion to ensure statistics are updated properly.
  * `p_source_table` - This option can be used when you need to move data into a partitioned table. Pass a schema qualified tablename to this parameter and any data in that table will be MOVED to the partition set designated by p_parent_table, creating any child tables as needed.
  * `p_ignored_columns` - This option allows for filtering out specific columns when moving data from the default/source to the target child table(s). This is generally only required when using columns with a GENERATED ALWAYS value since directly inserting a value would fail when moving the data. Value is a text array of column names.
+ * `p_override_system_value` - When moving data from the default or another source table to a partition set that has GENERATED ALWAYS column values, you may want to keep the values from the source vs having newly generated values. This allows you to set the `OVERRIDING SYSTEM VALUE` flag when inserting data. Note that you may need to reset the underlying sequence for the target generated columns when overriding inserted data.
  * Returns the number of rows that were moved from the parent table to partitions. Returns zero when source table is empty and partitioning is complete.
 
 
@@ -301,6 +303,7 @@ partition_data_id(p_parent_table text
     , p_analyze boolean DEFAULT true
     , p_source_table text DEFAULT NULL
     , p_ignored_columns text[] DEFAULT NULL
+    , p_override_system_value boolean DEFAULT false
 )
 RETURNS bigint
 ```
@@ -317,6 +320,7 @@ RETURNS bigint
  * `p_analyze` - optional argument, by default whenever a new child table is created, an analyze is run on the parent table of the partition set to ensure constraint exclusion works. This analyze can be skipped by setting this to false and help increase the speed of moving large amounts of data. If this is set to false, it is highly recommended that a manual analyze of the partition set be done upon completion to ensure statistics are updated properly.
  * `p_source_table` - This option can be used when you need to move data into a partitioned table. Pass a schema qualified tablename to this parameter and any data in that table will be MOVED to the partition set designated by p_parent_table, creating any child tables as needed.
  * `p_ignored_columns` - This option allows for filtering out specific columns when moving data from the default/source to the target child table(s). This is generally only required when using columns with a GENERATED ALWAYS value since directly inserting a value would fail when moving the data. Value is a text array of column names.
+ * `p_override_system_value` - When moving data from the default or another source table to a partition set that has GENERATED ALWAYS column values, you may want to keep the values from the source vs having newly generated values. This allows you to set the `OVERRIDING SYSTEM VALUE` flag when inserting data. Note that you may need to reset the underlying sequence for the target generated columns when overriding inserted data.
  * Returns the number of rows that were moved from the parent table to partitions. Returns zero when source table is empty and partitioning is complete.
 
 
@@ -339,7 +343,7 @@ partition_data_proc (
  * A procedure that can partition data in distinct commit batches to avoid long running transactions and data contention issues.
  * Calls either partition_data_time() or partition_data_id() in a loop depending on partitioning type.
  * `p_parent_table` - Parent table of an already created partition set.
- * `p_loop_count` - How many times to loop through the value given for p_interval. If p_interval not set, will use default partition interval and make at most this many partition(s). Procedure commits at the end of each loop (NOT passed as p_batch_count to partitioning function). If not set, all data in the parent/source table will be partitioned in a single run of the procedure.
+ * `p_loop_count` - How many times to loop through the value given for p_interval. If p_interval not set, will use default partition interval and make at most this many partition(s). Procedure commits at the end of each loop (NOT passed as p_batch_count to partitioning function). If not set, all data in the default/source table will be partitioned in a single run of the procedure.
  * `p_interval` - Parameter that is passed on to the partitioning function as p_batch_interval argument. See underlying functions for further explanation.
  * `p_lock_wait` - Parameter that is passed directly through to the underlying partition_data_*() function. Number of seconds to wait on rows that may be locked by another transaction. Default is to wait forever (0).
  * `p_lock_wait_tries` - Parameter to set how many times the procedure will attempt waiting the amount of time set for p_lock_wait. Default is 10 tries.
@@ -350,6 +354,50 @@ partition_data_proc (
  * `p_quiet` - Procedures cannot return values, so by default it emits NOTICE's to show progress. Set this option to silence these notices.
 
 
+<a id="partition_data_proc"></a>
+```sql
+partition_data_async (
+    p_parent_table text
+    , p_loop_count int DEFAULT NULL
+    , p_interval text DEFAULT NULL
+    , p_lock_wait int DEFAULT 0
+    , p_lock_wait_tries int DEFAULT 10
+    , p_wait int DEFAULT 1
+    , p_order text DEFAULT 'ASC'
+    , p_ignored_columns text[] DEFAULT NULL
+    , p_quiet boolean DEFAULT false
+)
+```
+ * Note: This procedure currently only works with time-based partitioning as of pg_partman version 5.3.0. Integer/id support is in development.
+ * A procedure designed to help move data out of the default partition in smaller batches of rows per committed transaction than the partition interval.
+ * This procedure is ONLY for moving data out of the default. If you're moving data from another source table to the partitioned table, you can already use smaller batch sizes than the partition interval with the `partition_data_proc()` procedure (or standard `partition_data_time/id()` functions).
+ * The `partition_data_proc()` procedure can still be used to do migrate data out of the default, but the transaction interval size can never be smaller than the partition interval since the new child table cannot be made until all of the relevant data has been moved out of the default.
+ * IMPORTANT NOTE: This procedure works by first moving all the data for a target child table to another real, working table. The smaller batches are committed, so the data that is in transit before being moved to the target child table is NOT VISIBLE to users of the table. If you cannot afford to have data disappearing for the users of the table, then this asyncrhonous method WILL NOT provide the desired result. In that case, you must use a batch size equal to the interval size of the partition set and you can use `partition_data_proc()`.
+ * How this procedure works:
+    * The interval size is the amount of data that is moved in each commit.
+    * Commits are done when data is moved to the temporary storage location as well as the final child table.
+    * So the value of the loop count to move all the data for a single child table is the partition set's interval divided by the interval size given to this function times 2.
+    * For example: A daily partition set (24 hrs) is given the interval of 6 hours to this asynchronous procedure so that it commits after each block of 6 hours is moved. That means there would be 4 batches of data that first get moved to the working table then moved to the final child table for a total of 8 commits. So p_loop_count would be 8 to move all the data for a single child table in this partition set ( (24 / 6) * 2).
+    * Multiply that value for however many child tables you expect to be moved.
+    * If no loop count is given, the entire default table will be emptied out using the batch interval given.
+ * A real (not temporary) table is created as needed to hold intermediate data while it is moved. This table will be dropped whenever a child table has been created and all the data is moved to it.
+    * The naming pattern of the working table is: `originalschema.partman_tmp_storage_originaltablename`
+ * While data is being migrated, the `async_partitioning_in_progress` column in the `part_config` table will contain a value that relates to the most recent set of data that has been moved. While this column has a value, and during the running of this procedure, all maintenance for that partition set will be skipped (a warning is left in the PostgreSQL logs). To resume normal maintenance, this column must be NULL. This will automatically be set to NULL after completion of each child table.
+ * Since a real table is used to migrate data, the state of a migration is preserved between multiple runnings of this procedure. But as stated in the previous bullet, all normal partition maintenance for the partition set will be skipped while a partition set is left in a state where all the data for a given child table has not been fully moved to the target child table.
+ * `p_parent_table` - Parent table of an already created partition set.
+ * `p_loop_count` - How many times to loop through the value given for p_interval. See above bullet points for important information for what this loop count actually means when using this procedure. If not set, all data in the default table will be partitioned in a single run of the procedure.
+ * `p_interval` - Parameter that sets the interval size of how many rows will be committed in a single committed transaction. See above bullet points for further explanations of how this parameter is used.
+ * `p_lock_wait` - Parameter that is passed directly through to the underlying partition_data_*() function. Number of seconds to wait on rows that may be locked by another transaction. Default is to wait forever (0).
+ * `p_lock_wait_tries` - Parameter to set how many times the procedure will attempt waiting the amount of time set for p_lock_wait. Default is 10 tries.
+ * `p_wait` - Cause the procedure to pause for a given number of seconds between commits (batches) to reduce write load
+ * `p_order` -  Same as the p_order option in the called partitioning function
+ * `p_source_table` - Same as the p_source_table option in the called partitioning function
+ * `p_ignored_columns` - This option allows for filtering out specific columns when moving data from the default/parent to the proper child table(s). This is generally only required when using columns with a GENERATED ALWAYS value since directly inserting a value would fail when moving the data. Value is a text array of column names.
+ * `p_quiet` - Procedures cannot return values, so by default it emits NOTICE's to show progress. Set this option to silence these notices.
+
+
+
+
 <a id="create_partition_time"></a>
 ```sql
 create_partition_time(
@@ -760,6 +808,7 @@ Stores all configuration data for partition sets managed by the extension.
     , maintenance_order int DEFAULT NULL
     , retention_keep_publication boolean NOT NULL DEFAULT false
     , maintenance_last_run timestamptz
+    , async_partitioning_in_progress text
 
  - `parent_table`
     - Parent table of the partition set
@@ -826,6 +875,8 @@ Stores all configuration data for partition sets managed by the extension.
     - Default value is false
  - maintenance_last_run
     - Timestamp of the last successful run of maintenance for this partition set. Can be useful as a monitoring metric to ensure partition maintenance is running properly.
+ - async_partitioning_in_progress
+    - This column is used to track if an asynchronous partitioning process has been started. It is a text field that contains the value related to the last block of data that was processed. If NOT NULL, all regular maintenance for this table will be stopped until the async partitioning process has been completed successfully. See `partition_data_async()` for more information.
 
 
 <a id="part_config_sub"></a>
 
@@ -1,4 +1,4 @@
-default_version = '5.2.4'
+default_version = '5.3.0'
 comment = 'Extension to manage partitioned tables by time or ID'
 relocatable = false
 superuser = false
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-default_version = '5.2.4'`
	`1`	`+default_version = '5.3.0'`
`2`	`2`	`comment = 'Extension to manage partitioned tables by time or ID'`
`3`	`3`	`relocatable = false`
`4`	`4`	`superuser = false`