Skip to content

Commit 6f5068f

Browse files
authored
Add support for decimal logical type (#86)
* IN PROGRESS Added in types and fields definitions for Decimal. Added in tests for fields. NEED MORE TESTS AT MIN. * Changed field logic to error out on no or bad default, restricted fake to match schema, updated tests * Added comments to utils * Shifted decimal serialization functions from utils to serialization * Added schema and serialization tests * Updated docs * Linted, tested, and cleaned up * Added in # type: ignore, Checker was complaining about dataclasses_avroschema/fields.py:816: error: Incompatible types in assignment (expression has type "type", variable has type "Type[InmutableField]") dataclasses_avroschema/fields.py:816: note: "type.__call__" has type "Callable[[VarArg(Any), KwArg(Any)], Any]" and couldn't figure out why * Upping code coverage * Cleaned up code per review, added in minor validation checks
1 parent 0a819c4 commit 6f5068f

12 files changed

Lines changed: 480 additions & 16 deletions

File tree

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ coverage
7979

8080
# Workspace files are user-specific
8181
*.sublime-workspace
82+
# Removes PyCharm/JetBrains workspace files
83+
.idea/
8284

8385
# Project files should be checked into the repository, unless a significant
8486
# proportion of contributors will probably not be using Sublime Text

dataclasses_avroschema/fields.py

Lines changed: 83 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import collections
33
import dataclasses
44
import datetime
5+
import decimal
56
import json
67
import random
78
import typing
@@ -12,7 +13,7 @@
1213
from faker import Faker
1314
from pytz import utc
1415

15-
from dataclasses_avroschema import types, utils
16+
from dataclasses_avroschema import serialization, types, utils
1617

1718
fake = Faker()
1819
p = inflect.engine()
@@ -33,6 +34,7 @@
3334
TIME_MILLIS = "time-millis"
3435
TIMESTAMP_MILLIS = "timestamp-millis"
3536
UUID = "uuid"
37+
DECIMAL = "decimal"
3638
LOGICAL_DATE = {"type": INT, "logicalType": DATE}
3739
LOGICAL_TIME = {"type": INT, "logicalType": TIME_MILLIS}
3840
LOGICAL_DATETIME = {"type": LONG, "logicalType": TIMESTAMP_MILLIS}
@@ -61,20 +63,25 @@
6163

6264
PYTHON_PRIMITIVE_CONTAINERS = (list, tuple, dict)
6365

64-
PYTHON_LOGICAL_TYPES = (
65-
datetime.date,
66-
datetime.time,
67-
datetime.datetime,
68-
uuid.uuid4,
69-
uuid.UUID,
70-
)
66+
PYTHON_LOGICAL_TYPES = (datetime.date, datetime.time, datetime.datetime, uuid.uuid4, uuid.UUID, decimal.Decimal)
7167

7268
PYTHON_PRIMITIVE_TYPES = PYTHON_INMUTABLE_TYPES + PYTHON_PRIMITIVE_CONTAINERS
7369

7470
PRIMITIVE_AND_LOGICAL_TYPES = PYTHON_INMUTABLE_TYPES + PYTHON_LOGICAL_TYPES
7571

7672
PythonImnutableTypes = typing.Union[
77-
str, int, bool, float, list, tuple, dict, datetime.date, datetime.time, datetime.datetime, uuid.UUID
73+
str,
74+
int,
75+
bool,
76+
float,
77+
list,
78+
tuple,
79+
dict,
80+
datetime.date,
81+
datetime.time,
82+
datetime.datetime,
83+
uuid.UUID,
84+
decimal.Decimal,
7885
]
7986

8087

@@ -638,6 +645,71 @@ def fake(self) -> typing.Any:
638645
return self.type.fake()
639646

640647

648+
@dataclasses.dataclass
649+
class DecimalField(BaseField):
650+
651+
precision: int = -1
652+
scale: int = 0
653+
654+
def __post_init__(self) -> None:
655+
self.set_precision_scale()
656+
657+
def set_precision_scale(self) -> None:
658+
if self.default != types.MissingSentinel:
659+
if isinstance(self.default, decimal.Decimal):
660+
sign, digits, scale = self.default.as_tuple()
661+
self.scale = scale * -1 # Make scale positive, as that's what Avro expects
662+
# decimal.Context has a precision property
663+
# BUT the precision property is independent of the number of digits stored in the Decimal instance
664+
# # # FROM THE DOCS HERE https://docs.python.org/3/library/decimal.html
665+
# The context precision does not affect how many digits are stored.
666+
# That is determined exclusively by the number of digits in value.
667+
# For example, Decimal('3.00000') records all five zeros even if the context precision is only three.
668+
# # #
669+
# Avro is concerned with *what form the number takes* and not with *handling errors in the Python env*
670+
# so we take the number of digits stored in the decimal as Avro precision
671+
self.precision = len(digits)
672+
elif isinstance(self.default, types.Decimal):
673+
self.scale = self.default.scale
674+
self.precision = self.default.precision
675+
else:
676+
raise ValueError("decimal.Decimal default types must be either decimal.Decimal or types.Decimal")
677+
else:
678+
raise ValueError(
679+
"decimal.Decimal default types must be specified to provide precision and scale,"
680+
" and must be either decimal.Decimal or types.Decimal"
681+
)
682+
683+
# Validation on precision and scale per Avro schema
684+
if self.precision <= 0:
685+
raise ValueError("Precision must be a positive integer greater than zero")
686+
687+
if self.scale < 0 or self.precision < self.scale:
688+
raise ValueError("Scale must be zero or a positive integer less than or equal to the precision.")
689+
690+
# Just pull the precision from default context and default out scale
691+
# Not ideal
692+
#
693+
# self.precision = decimal.Context().prec
694+
695+
def get_avro_type(self) -> typing.Dict[str, typing.Any]:
696+
avro_type = {"type": BYTES, "logicalType": DECIMAL, "precision": self.precision, "scale": self.scale}
697+
698+
return avro_type
699+
700+
def get_default_value(self) -> typing.Union[str, dataclasses._MISSING_TYPE, None]:
701+
default = self.default
702+
if isinstance(default, types.Decimal):
703+
default = default.default
704+
705+
if default == types.MissingSentinel:
706+
return dataclasses.MISSING
707+
return serialization.decimal_to_str(default, self.precision, self.scale)
708+
709+
def fake(self) -> decimal.Decimal:
710+
return fake.pydecimal(right_digits=self.scale, left_digits=self.precision - self.scale)
711+
712+
641713
INMUTABLE_FIELDS_CLASSES = {
642714
bool: BooleanField,
643715
int: LongField,
@@ -665,6 +737,7 @@ def fake(self) -> typing.Any:
665737
uuid.uuid4: UUIDField,
666738
uuid.UUID: UUIDField,
667739
bytes: BytesField,
740+
decimal.Decimal: DecimalField,
668741
}
669742

670743
PRIMITIVE_LOGICAL_TYPES_FIELDS_CLASSES = {
@@ -742,7 +815,7 @@ def field_factory(
742815
default_factory=default_factory,
743816
)
744817
elif native_type in PYTHON_LOGICAL_TYPES:
745-
klass = LOGICAL_TYPES_FIELDS_CLASSES[native_type]
818+
klass = LOGICAL_TYPES_FIELDS_CLASSES[native_type] # type: ignore
746819
return klass(name=name, type=native_type, default=default, metadata=metadata)
747820
else:
748821
return RecordField(name=name, type=native_type, default=default, metadata=metadata)

dataclasses_avroschema/serialization.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import datetime
2+
import decimal
23
import io
34
import typing
45
import uuid
@@ -61,6 +62,41 @@ def time_to_str(value: datetime.time) -> str:
6162
return value.strftime(TIME_STR_FORMAT)
6263

6364

65+
def decimal_to_str(value: decimal.Decimal, precision: int, scale: int = 0) -> str:
66+
value_bytes = prepare_bytes_decimal(value, precision, scale)
67+
return r"\u" + value_bytes.hex()
68+
69+
70+
# This is an almost complete copy of fastavro's _logical_writers_py.prepare_bytes_decimal
71+
# the only tweak is to pass in scale/precision directly instead of a schema
72+
# This is needed to properly serialize a default decimal.Decimal into the avro schema
73+
def prepare_bytes_decimal(data: decimal.Decimal, precision: int, scale: int = 0) -> bytes:
74+
"""Convert decimal.Decimal to bytes"""
75+
76+
sign, digits, exp = data.as_tuple()
77+
78+
if len(digits) > precision:
79+
raise ValueError("The decimal precision is bigger than allowed by schema")
80+
81+
delta = exp + scale
82+
83+
if delta < 0:
84+
raise ValueError("Scale provided in schema does not match the decimal")
85+
86+
unscaled_datum = 0
87+
for digit in digits:
88+
unscaled_datum = (unscaled_datum * 10) + digit
89+
90+
unscaled_datum = 10 ** delta * unscaled_datum
91+
92+
bytes_req = (unscaled_datum.bit_length() + 8) // 8
93+
94+
if sign:
95+
unscaled_datum = -unscaled_datum
96+
97+
return unscaled_datum.to_bytes(bytes_req, byteorder="big", signed=True)
98+
99+
64100
def to_json(data: typing.Dict[str, typing.Any]) -> typing.Dict:
65101
json_data = {}
66102

@@ -73,7 +109,7 @@ def to_json(data: typing.Dict[str, typing.Any]) -> typing.Dict:
73109
value = date_to_str(value)
74110
elif isinstance(value, datetime.time):
75111
value = time_to_str(value)
76-
elif isinstance(value, uuid.UUID):
112+
elif isinstance(value, (uuid.UUID, decimal.Decimal)):
77113
value = str(value)
78114

79115
json_data[field] = value

dataclasses_avroschema/types.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,25 @@ def __repr__(self) -> str:
4949
return f"{self.symbols}"
5050

5151

52-
CUSTOM_TYPES = ("Fixed", "Enum")
52+
@dataclasses.dataclass
53+
class Decimal(typing.Generic[T]):
54+
"""
55+
Represents an Avro Decimal type
56+
57+
precision (int): Specifying the number precision
58+
scale(int): Specifying the number scale. Default 0
59+
"""
60+
61+
precision: int
62+
scale: int = 0
63+
default: typing.Any = dataclasses.field(default=MissingSentinel)
64+
_dataclasses_custom_type: str = "Decimal"
65+
66+
# Decimal serializes to bytes, which doesn't support namespace
67+
aliases: typing.Optional[typing.List] = None
68+
69+
def __repr__(self) -> str:
70+
return f"Decimal precision: {self.precision} scale:{self.scale}"
71+
72+
73+
CUSTOM_TYPES = ("Fixed", "Enum", "Decimal")

docs/fields_specification.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,16 @@ Language implementations must ignore unknown logical types when reading, and sho
8181

8282
* UUID: Represents a uuid as a string
8383

84+
* Decimal: Represents a decimal.Decimal as bytes
85+
8486
| Avro Type | Logical Type |Python Type |
8587
|-----------|--------------|-------------|
8688
| int | date | datetime.date
8789
| int | time-millis | datetime.time |
8890
| long | timestamp-millis | datetime.datetime |
8991
| string | uuid | uuid.uuid4 |
9092
| string | uuid | uuid.UUID |
93+
| bytes | decimal | decimal.Decimal |
9194

9295
### Avro Field and Python Types Summary
9396

@@ -115,6 +118,7 @@ Python Type | Avro Type | Logical Type |
115118
| datetime.time | int | time-millis |
116119
| datetime.datetim| long | timestamp-millis |
117120
| uuid.uuid4 | string | uuid |
121+
| decimal.Decimal | bytes | decimal |
118122

119123
## Adding Custom Field-level Attributes
120124

docs/logical_types.md

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ The following list represent the avro logical types mapped to python types:
99
| long | timestamp-millis | datetime.datetime |
1010
| string | uuid | uuid.uuid4 |
1111
| string | uuid | uuid.UUID |
12+
| bytes | decimal | decimal.Decimal
1213

1314
### Date
1415

@@ -205,3 +206,76 @@ UUIDLogicalTypes.avro_schema()
205206
"doc": "UUID logical types"
206207
}'
207208
```
209+
210+
### Decimal
211+
212+
The below code shows an example on how to use decimals. There's a few important things to note:
213+
* A default IS REQUIRED in order to set scale and precision on the Avro schema
214+
* It is strongly recommended to set these explicitly using `types.Decimal(scale=, precision=)`
215+
* They can be set implicitly by using a default `decimal.Decimal`
216+
* If set implicitly, scale and precision will be derived from the default as follows:
217+
```python
218+
default: decimal.Decimal = decimal.Decimal('3.14')
219+
sign, digits, exp = default.as_tuple()
220+
precision = len(digits)
221+
scale = exp * -1 # Avro schema defines scale as a positive, as_tuple provides negative
222+
```
223+
* This CAN and WILL have strange consequences if not careful, ESPECIALLY if constructing `decimal.Decimal` with a float. For example:
224+
```python
225+
string_definition: decimal.Decimal = decimal.Decimal('3.14')
226+
# scale = 2, precision = 3
227+
float_definition: decimal.Decimal = decimal.Decimal(3.14)
228+
# scale = 51, precision = 52
229+
```
230+
231+
```python
232+
import decimal
233+
234+
from dataclasses_avroschema import AvroModel, types
235+
236+
237+
class DecimalLogicalTypes(AvroModel):
238+
"Decimal logical types"
239+
explicit: decimal.Decimal = types.Decimal(scale=2, precision=3)
240+
explicit_with_default: decimal.Decimal = types.Decimal(scale=2, precision=3, default=decimal.Decimal('3.14'))
241+
implicit: decimal.Decimal = decimal.Decimal('3.14') # sets scale = 2, precision = 3, derived from provided default
242+
# will_error: decimal.Decimal # THIS WILL ERROR
243+
DecimalLogicalTypes.avro_schema()
244+
245+
'{
246+
"type": "record",
247+
"name": "DecimalLogicalTypes",
248+
"fields": [
249+
{
250+
"name": "explicit",
251+
"type": {
252+
"type": "bytes",
253+
"logicalType": "decimal",
254+
"precision": 3,
255+
"scale": 2
256+
}
257+
},
258+
{
259+
"name": "explicit_with_default",
260+
"type": {
261+
"type": "bytes",
262+
"logicalType": "decimal",
263+
"precision": 3,
264+
"scale": 2
265+
},
266+
"default": "\\u013a"
267+
},
268+
{
269+
"name": "implicit",
270+
"type": {
271+
"type": "bytes",
272+
"logicalType": "decimal",
273+
"precision": 3,
274+
"scale": 2
275+
},
276+
"default": "\\u013a"
277+
}
278+
],
279+
"doc": "Decimal logical types"
280+
}'
281+
```

tests/fake/test_fake.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import dataclasses
22
import datetime
3+
import decimal
34
import typing
45
import uuid
56

6-
from dataclasses_avroschema import AvroModel
7+
from dataclasses_avroschema import AvroModel, types
78

89

910
def test_fake_primitive_types(user_dataclass):
@@ -131,3 +132,17 @@ class User(AvroModel):
131132
teamates: typing.Dict[str, typing.Type["User"]] = None
132133

133134
assert isinstance(User.fake(), User)
135+
136+
137+
def test_decimals():
138+
"""
139+
Test Decimal logical types
140+
"""
141+
142+
class User(AvroModel):
143+
name: str
144+
age: int
145+
test_score_1: decimal.Decimal = decimal.Decimal("100.00")
146+
test_score_2: decimal.Decimal = types.Decimal(scale=5, precision=11)
147+
148+
assert isinstance(User.fake(), User)

0 commit comments

Comments
 (0)