11"""
2- Implementation of UUID v7 per the October 2021 draft update
3- to RFC4122 from 2005:
4- https://datatracker.ietf.org/doc/html/draft-peabody-dispatch-new-uuid-format
2+ UUID v7 polyfill for Python < 3.14.
53
6- Stephen Simmons, v0.1.0, 2021-12-27
7- """
8-
9- # pyright: reportUnknownParameterType=false, reportMissingParameterType=false, reportUnknownVariableType=false, reportUnknownMemberType=false, reportUnknownArgumentType=false, reportDeprecated=false
4+ On Python 3.14+, we use the stdlib implementation. On older versions,
5+ we use a simplified vendored implementation.
106
11- __all__ = (
12- "uuid7" ,
13- "uuid7str" ,
14- "time_ns" ,
15- "check_timing_precision" ,
16- "uuid_to_datetime" ,
17- )
7+ The vendored code can be removed once Python 3.13 support is dropped.
8+ """
189
19- import datetime
2010import os
21- import struct
11+ import sys
2212import time
23- from typing import Callable , Optional , Union
2413import uuid
14+ from typing import Callable
2515
26- # Expose function used by uuid7() to get current time in nanoseconds
27- # since the Unix epoch.
28- time_ns = time .time_ns
29-
30-
31- def uuid7 (
32- ns : Optional [int ] = None ,
33- as_type : Optional [str ] = None ,
34- time_func : Callable [[], int ] = time_ns ,
35- _last = [0 , 0 , 0 , 0 ],
36- _last_as_of = [0 , 0 , 0 , 0 ],
37- ) -> Union [uuid .UUID , str , int , bytes ]:
38- """
39- UUID v7, following the proposed extension to RFC4122 described in
40- https://www.ietf.org/id/draft-peabody-dispatch-new-uuid-format-02.html.
41- All representations (string, byte array, int) sort chronologically,
42- with a potential time resolution of 50ns (if the system clock
43- supports this).
44-
45- Parameters
46- ----------
47-
48- ns - Optional integer with the whole number of nanoseconds
49- since Unix epoch, to set the "as of" timestamp.
50- As a special case, uuid7(ns=0) returns the zero UUID.
16+ __all__ = ("uuid7" ,)
5117
52- as_type - Optional string to return the UUID in a different format.
53- A uuid.UUID (version 7, variant 0x10) is returned unless
54- this is one of 'str', 'int', 'hex' or 'bytes'.
5518
56- time_func - Set the time function, which must return integer
57- nanoseconds since the Unix epoch, midnight on 1-Jan-1970.
58- Defaults to time.time_ns(). This is exposed because
59- time.time_ns() may have a low resolution on Windows.
19+ # Vendored implementation for Python < 3.14
20+ # From Stephen Simmons' implementation (v0.1.0, 2021-12-27)
21+ # Original: https://github.com/stevesimmons/uuid7
22+ # Adapted to match stdlib signature (no parameters)
6023
61- _last and _last_as_of - Used internally to trigger incrementing a
62- sequence counter when consecutive calls have the same time
63- values. The values [t1, t2, t3, seq] are described below.
24+ # Module-level state for sequence counter (maintains monotonicity within same timestamp)
25+ _last = [0 , 0 , 0 , 0 ]
6426
65- Returns
66- -------
6727
68- A UUID object, or if as_type is specified, a string, int or
69- bytes of length 16.
28+ def _vendored_uuid7 () -> uuid .UUID :
29+ """
30+ Generate a UUID v7 with embedded timestamp and sequence counter.
7031
71- Implementation notes
72- --------------------
32+ This implementation matches stdlib behavior by using a sequence counter
33+ to guarantee monotonic ordering when multiple UUIDs are generated within
34+ the same timestamp tick.
7335
7436 The 128 bits in the UUID are allocated as follows:
7537 - 36 bits of whole seconds
7638 - 24 bits of fractional seconds, giving approx 50ns resolution
77- - 14 bits of sequential counter, if called repeatedly in same time tick
39+ - 14 bits of sequence counter (increments when time unchanged)
7840 - 48 bits of randomness
7941 plus, at locations defined by RFC4122, 4 bits for the
80- uuid version (0b111 ) and 2 bits for the uuid variant (0b10).
42+ uuid version (0b0111 ) and 2 bits for the uuid variant (0b10).
8143
8244 0 1 2 3
8345 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
@@ -90,209 +52,48 @@ def uuid7(
9052 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
9153 rand | rand (32 bits) |
9254 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
93-
94- Indicative timings:
95- - uuid.uuid4() 2.4us
96- - uuid7() 3.7us
97- - uuid7(as_type='int') 1.6us
98- - uuid7(as_type='str') 2.5us
99-
100- Examples
101- --------
102-
103- >>> uuid7()
104- UUID('061cb26a-54b8-7a52-8000-2124e7041024')
105-
106- >>> uuid7(0)
107- UUID('00000000-0000-0000-0000-00000000000')
108-
109- >>> for fmt in ('bytes', 'hex', 'int', 'str', 'uuid', None):
110- ... print(fmt, repr(uuid7(as_type=fmt)))
111- bytes b'\x06 \x1c \xb8 \xfe \x0f \x0b |9\x80 \x00 \t jt\x85 \xb3 \xbb '
112- hex '061cb8fe0f0b7c3980011863b956b758'
113- int 8124504378724980906989670469352026642
114- str '061cb8fe-0f0b-7c39-8003-d44a7ee0bdf6'
115- uuid UUID('061cb8fe-0f0b-7c39-8004-0489578299f6')
116- None UUID('061cb8fe-0f0f-7df2-8000-afd57c2bf446')
11755 """
118- if ns is None :
119- ns = time_func ()
120- last = _last
121- else :
122- last = _last_as_of
123- ns = int (ns ) # Fail fast if not an int
124-
125- if ns == 0 :
126- # Special cose for all-zero uuid. Strictly speaking not a UUIDv7.
127- t1 = t2 = t3 = t4 = 0
128- rand = b"\0 " * 6
56+ # Get current time in nanoseconds
57+ ns = time .time_ns ()
58+
59+ # Split into seconds and fractional parts for high-precision timestamp
60+ # Treat the first 8 bytes of the uuid as a long (t1) and two ints
61+ # (t2 and t3) holding 36 bits of whole seconds and 24 bits of
62+ # fractional seconds.
63+ # This gives a nominal 60ns resolution, comparable to the
64+ # timestamp precision in Linux (~200ns) and Windows (100ns ticks).
65+ sixteen_secs = 16_000_000_000
66+ t1 , rest1 = divmod (ns , sixteen_secs )
67+ t2 , rest2 = divmod (rest1 << 16 , sixteen_secs )
68+ t3 , _ = divmod (rest2 << 12 , sixteen_secs )
69+ t3 |= 7 << 12 # Put uuid version in top 4 bits, which are 0 in t3
70+
71+ # The next two bytes are an int (t4) with two bits for
72+ # the variant 2 and a 14 bit sequence counter which increments
73+ # if the time is unchanged.
74+ if t1 == _last [0 ] and t2 == _last [1 ] and t3 == _last [2 ]:
75+ # Stop the seq counter wrapping past 0x3FFF.
76+ # This won't happen in practice, but if it does,
77+ # uuids after the 16383rd with that same timestamp
78+ # will not longer be correctly ordered but
79+ # are still unique due to the 6 random bytes.
80+ if _last [3 ] < 0x3FFF :
81+ _last [3 ] += 1
12982 else :
130- # Treat the first 8 bytes of the uuid as a long (t1) and two ints
131- # (t2 and t3) holding 36 bits of whole seconds and 24 bits of
132- # fractional seconds.
133- # This gives a nominal 60ns resolution, comparable to the
134- # timestamp precision in Linux (~200ns) and Windows (100ns ticks).
135- sixteen_secs = 16_000_000_000
136- t1 , rest1 = divmod (ns , sixteen_secs )
137- t2 , rest2 = divmod (rest1 << 16 , sixteen_secs )
138- t3 , _ = divmod (rest2 << 12 , sixteen_secs )
139- t3 |= 7 << 12 # Put uuid version in top 4 bits, which are 0 in t3
140-
141- # The next two bytes are an int (t4) with two bits for
142- # the variant 2 and a 14 bit sequence counter which increments
143- # if the time is unchanged.
144- if t1 == last [0 ] and t2 == last [1 ] and t3 == last [2 ]:
145- # Stop the seq counter wrapping past 0x3FFF.
146- # This won't happen in practice, but if it does,
147- # uuids after the 16383rd with that same timestamp
148- # will not longer be correctly ordered but
149- # are still unique due to the 6 random bytes.
150- if last [3 ] < 0x3FFF :
151- last [3 ] += 1
152- else :
153- last [:] = (t1 , t2 , t3 , 0 )
154- t4 = (2 << 14 ) | last [3 ] # Put variant 0b10 in top two bits
155-
156- # Six random bytes for the lower part of the uuid
157- rand = os .urandom (6 )
83+ _last [:] = (t1 , t2 , t3 , 0 )
84+ t4 = (2 << 14 ) | _last [3 ] # Put variant 0b10 in top two bits
15885
159- # Build output
160- if as_type == "str" :
161- return f"{ t1 :>08x} -{ t2 :>04x} -{ t3 :>04x} -{ t4 :>04x} -{ rand .hex ()} "
86+ # Six random bytes for the lower part of the uuid
87+ rand = os .urandom (6 )
16288
89+ # Build the UUID from components
16390 r = int .from_bytes (rand , "big" )
16491 uuid_int = (t1 << 96 ) + (t2 << 80 ) + (t3 << 64 ) + (t4 << 48 ) + r
165- if as_type == "int" :
166- return uuid_int
167- elif as_type == "hex" :
168- return f"{ uuid_int :>032x} "
169- elif as_type == "bytes" :
170- return uuid_int .to_bytes (16 , "big" )
171- else :
172- return uuid .UUID (int = uuid_int )
173-
92+ return uuid .UUID (int = uuid_int )
17493
175- def uuid7str (ns : Optional [int ] = None ) -> str :
176- "uuid7() as a string without creating a UUID object first."
177- return uuid7 (ns , as_type = "str" ) # type: ignore
17894
179-
180- def check_timing_precision (
181- timing_func : Optional [Callable [[], int ]] = None ,
182- ) -> str :
183- """
184- Message indicating the timing precision from various time/clock
185- functions that might be used for UUIDv7 generation.
186-
187- This tests time.time_ns(), time.perf_counter_ns()
188- and datetime.datetime.utcnow converted to ns.
189-
190- A user-supplied timing function may also be provided.
191- It must return the number of ns since the Unix Epoch
192- (midnight at 1-Jan-1970).
193-
194- Note that time.time_ns() updates every 200us under Linux
195- and potentially as infrequently as every 5ms under Windows.
196-
197- Usage:
198- >>> check_timing_precision()
199- # Under Linux
200- time.time_ns() has a timing precision of 221ns rather than 221ns (1,000 distinct samples in 0.00s)
201- time.perf_counter_ns() has a timing precision of 215ns rather than 215ns (1,000 distinct samples in 0.00s)
202- datetime.datetime.utcnow has a timing precision of 1,046ns rather than 679ns (1,000 distinct samples in 0.00s)
203- # Under Windows
204- time.time_ns() has a timing precision of 4,950,500ns rather than 709ns (705,068 samples of which 101 are distinct, in 0.50s)
205- time.perf_counter_ns() has a timing precision of 823ns rather than 823ns (1,000 samples of which 1,000 are distinct, in 0.00s)
206- datetime.datetime.utcnow has a timing precision of 5,882,365ns rather than 2,812ns (177,792 samples of which 85 are distinct, in 0.50s)
207- """
208- timing_funcs = [
209- ("time.time_ns()" , time .time_ns ),
210- ("time.perf_counter_ns()" , time .perf_counter_ns ),
211- (
212- "datetime.datetime.utcnow" ,
213- lambda : int (datetime .datetime .utcnow ().timestamp () * 1_000_000_000 ),
214- ),
215- ]
216- if timing_func is not None :
217- timing_funcs .append (("user-supplied" , timing_func ))
218-
219- lines = []
220- for desc , fn in timing_funcs :
221- started_ns = time .perf_counter_ns ()
222- values = set ()
223- ctr = 0
224- while True :
225- values .add (fn ())
226- ctr += 1
227- elapsed_ns = time .perf_counter_ns () - started_ns
228- if elapsed_ns > 500_000_000 or len (values ) >= 1000 :
229- break
230- precision_ns = elapsed_ns / len (values )
231- ideal_precision_ns = elapsed_ns / ctr
232- lines .append (
233- f"{ desc } has a timing precision of { precision_ns :0,.0f} ns \
234- rather than { ideal_precision_ns :0,.0f} ns ({ ctr :,} samples of which \
235- { len (values ):,} are distinct, in { elapsed_ns / 1_000_000_000 :0.2f} s)"
236- )
237-
238- return "\n " .join (lines )
239-
240-
241- def timestamp_ns (
242- s : Union [str , uuid .UUID , int ],
243- suppress_error = True ,
244- ) -> Optional [int ]:
245- """
246- Recover the timestamp from a UUIDv7, passed in
247- as a string, integer or a UUID object.
248-
249- If the UUID is not a version 7 UUID, either raise a ValueError
250- or return None, depending on suppress_error.
251-
252- Usage:
253- >>> uuid_to_datetime("1eb22fe4-3f0c-62b1-a88c-8dc55231702f")
254- datetime.datetime(2020, 11, 10, 2, 41, 42, 182162)
255- """
256- if isinstance (s , uuid .UUID ):
257- x = s .bytes
258- elif not s :
259- x = b"\0 " * 16
260- elif isinstance (s , int ):
261- x = int .to_bytes (s , length = 16 , byteorder = "big" )
262- else : # String form that should look like a UUID
263- int_uuid = int (str (s ).replace ("-" , "" ), base = 16 )
264- x = int .to_bytes (int_uuid , length = 16 , byteorder = "big" )
265-
266- uuid_version = x [6 ] >> 4
267- if uuid_version == 7 :
268- bits = struct .unpack (">IHHHHI" , x )
269- uuid_version = (bits [2 ] >> 12 ) & 0xF
270- # uuid_variant = (bits[3] >> 62) & 0x3
271- whole_secs = (bits [0 ] << 4 ) + (bits [1 ] >> 12 )
272- frac_binary = (
273- ((bits [1 ] & 0x0FFF ) << 26 ) + ((bits [2 ] & 0x0FFF ) << 14 ) + (bits [3 ] & 0x3FFF )
274- )
275- frac_ns , _ = divmod (frac_binary * 1_000_000_000 , 1 << 38 )
276- ns_since_epoch = whole_secs * 1_000_000_000 + frac_ns
277- return ns_since_epoch
278- elif suppress_error :
279- return None
280- else :
281- raise ValueError (
282- f"{ str (s )} is a version { uuid_version } UUID, \
283- not v7 so we cannot extract the timestamp."
284- )
285-
286-
287- def uuid_to_datetime (
288- s : Union [str , uuid .UUID , int ],
289- suppress_error = True ,
290- ) -> Optional [datetime .datetime ]:
291- ns_since_epoch = timestamp_ns (s , suppress_error = suppress_error )
292- if ns_since_epoch is None :
293- return None
294- else :
295- return datetime .datetime .fromtimestamp (
296- ns_since_epoch / 1_000_000_000 ,
297- tz = datetime .timezone .utc ,
298- )
95+ # On Python 3.14+, use stdlib uuid7; otherwise use vendored implementation
96+ if sys .version_info >= (3 , 14 ):
97+ from uuid import uuid7
98+ else :
99+ uuid7 : Callable [[], uuid .UUID ] = _vendored_uuid7
0 commit comments