Skip to content

[performance] Optimize load/dump function to remove for loop #131

Open
@rnag

Description

@rnag
  • Dataclass Wizard version: 0.23.0
  • Python version: 3.12
  • Operating System: Mac OS X

Description

Hoping this is not a duplicate issue, if so I'll take this down. Basically this is along term goal of mine, I want to optimize load/dump function to remove for loop. I have performance comparisons below that I was able to put together using a demo script.

What I Did

Run following code:

import json
from dataclasses import dataclass, fields
from datetime import datetime
from enum import Enum
from timeit import timeit

from dataclass_wizard.utils.type_conv import as_bool, as_str, as_int


class E(Enum):
    my_value_1 = 'My Value 1'
    my_value_2 = 'My Value 2'


def as_e(o: E):
    return o.value


type_to_meth = {
    str: as_str,
    E: as_e,
    int: as_int,
    datetime: datetime.isoformat,
    bool: as_bool,
}


@dataclass
class Container:
    date: datetime
    abc: str = 'abc'
    xyz: int = 123
    my_bool: bool = False
    my_enum: E = E.my_value_2

    def json(self):
        return {f.name: type_to_meth[f.type](getattr(self, f.name)) for f in fields(self)}

    def json_with_cached_fields(self):

        def json_with_cached_fields(self, _fields=fields(self), _type_to_meth=type_to_meth):
            return {f.name: _type_to_meth[f.type](getattr(self, f.name)) for f in _fields}

        Container.json_with_cached_fields = json_with_cached_fields
        return json_with_cached_fields(self)

    def json_with_no_for_loop(self):
        _locals = {v.__qualname__: v for v in type_to_meth.values()}
        _locals['datetime'] = datetime

        lines = [' return {']
        for f in fields(self):
            lines.append(f'"{f.name}":{type_to_meth[f.type].__qualname__}(self.{f.name}),')
        lines.append('}')

        func_lines = f'def json_with_no_for_loop(self):\n{"\n ".join(lines)}'
        # print(func_lines)
        ns = {}
        exec(func_lines, _locals, ns)
        Container.json_with_no_for_loop = ns['json_with_no_for_loop']
        return ns['json_with_no_for_loop'](self)


c = Container(date=datetime.today(), xyz=321)

n = 1_000_000
print('json():                     ', round(timeit('c.json()', globals=globals(), number=n), 3))
print('json_with_cached_fields():  ', round(timeit('c.json_with_cached_fields()', globals=globals(), number=n), 3))
print('json_with_no_for_loop():    ', round(timeit('c.json_with_no_for_loop()', globals=globals(), number=n), 3))
print()

assert c.json() == c.json_with_cached_fields() == c.json_with_no_for_loop()

print(c)
print(json.dumps(c.json_with_no_for_loop(), indent=2))

Output:

json():                      2.191
json_with_cached_fields():   1.44
json_with_no_for_loop():     1.167

Container(date=datetime.datetime(2024, 11, 3, 18, 4, 28, 182372), abc='abc', xyz=321, my_bool=False, my_enum=<E.my_value_2: 'My Value 2'>)
{
  "date": "2024-11-03T18:04:28.182372",
  "abc": "abc",
  "xyz": 321,
  "my_bool": false,
  "my_enum": "My Value 2"
}

Metadata

Metadata

Assignees

No one assigned

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions