Skip to content

took other PRs and added class-API and context-manager #8

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 49 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,61 @@
# daz : Denormals are zeros
The tool to change the CPU flag about denormals number.

[![pypi](https://img.shields.io/pypi/v/daz.svg)](https://pypi.python.org/pypi/daz)
[![GitHub license](https://img.shields.io/github/license/chainer/daz.svg)](https://github.com/chainer/daz)
[![travis](https://img.shields.io/travis/chainer/daz/master.svg)](https://travis-ci.org/chainer/daz)

# daz : Denormals are zeros
The tool to change the CPU flag about denormals number.

* **DAZ** (Denormals-Are-Zero) treats denormal inputs as zero
* **FTZ** (Flush-To-Zero) writes zero for denormal outputs


# Application

On x86-64 CPUs with 64-bit programs, the CPUs SSE unit performs the floating point operations.
When it comes to calculate with denormal (aka. subnormal) numbers, there are performance penalties.

If your specific use-case doesn't require highest accuracy with small numbers,
these can be treated as - or rounded to - zero.
This is achieved by setting the CPU-flags.
When doing so, the calculations won't be slowed down by factors!

See [Wikipedia](https://en.wikipedia.org/wiki/Subnormal_number) for additional infos on denormal numbers.

In python, especially [NumPy](https://numpy.org/) functions show a measurable benefit.


# Usage

basic functional use:
```python
import daz
daz.set_ftz()
daz.set_daz()
daz.unset_ftz()
daz.unset_daz()
daz.get_ftz()
daz.get_daz()
```

alternative 1:
```python
from daz import DAZ
# prev_daz: bool = DAZ.set_daz(daz: bool | None = True)
# prev_ftz: bool = DAZ.set_ftz(ftz: bool | None = True)
prev_daz = DAZ.set_daz(True)
prev_ftz = DAZ.set_ftz()
```

alternative 2:
```python
from daz import DAZ

# DAZ(daz: bool = True, ftz: bool = True)
with DAZ():
# daz and ftz set True
pass

with DAZ(False, True):
# daz unset, but ftz set True
pass
```
9 changes: 4 additions & 5 deletions daz/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from daz._core import set_daz # NOQA
from daz._core import set_ftz # NOQA
from daz._core import unset_daz # NOQA
from daz._core import unset_ftz # NOQA
from daz._version import __version__ # NOQA
from daz._core import get_daz, set_daz, unset_daz
from daz._core import get_ftz, set_ftz, unset_ftz
from daz._version import __version__
from .daz import DAZ
33 changes: 33 additions & 0 deletions daz/_core.c
Original file line number Diff line number Diff line change
@@ -1,45 +1,78 @@
#include <Python.h>

#if defined(__SSE__)
#include <xmmintrin.h>
#endif

static PyObject* set_daz(void)
{
#if defined(__SSE__)
unsigned int mxcsr = _mm_getcsr();
mxcsr |= (1<<6);
_mm_setcsr(mxcsr);
#endif
Py_INCREF(Py_None);
return Py_None;
}

static PyObject* set_ftz(void)
{
#if defined(__SSE__)
unsigned int mxcsr = _mm_getcsr();
mxcsr |= (1<<15);
_mm_setcsr(mxcsr);
#endif
Py_INCREF(Py_None);
return Py_None;
}

static PyObject* get_daz(void)
{
#if defined(__SSE__)
unsigned int mxcsr = _mm_getcsr();
if((1<<6)&mxcsr)
Py_RETURN_TRUE;
#endif
Py_RETURN_FALSE;
}

static PyObject* get_ftz(void)
{
#if defined(__SSE__)
unsigned int mxcsr = _mm_getcsr();
if((1<<15)&mxcsr)
Py_RETURN_TRUE;
#endif
Py_RETURN_FALSE;
}

static PyObject* unset_daz(void)
{
#if defined(__SSE__)
unsigned int mxcsr = _mm_getcsr();
mxcsr &= ~(1<<6);
_mm_setcsr(mxcsr);
#endif
Py_INCREF(Py_None);
return Py_None;
}

static PyObject* unset_ftz(void)
{
#if defined(__SSE__)
unsigned int mxcsr = _mm_getcsr();
mxcsr &= ~(1<<15);
_mm_setcsr(mxcsr);
#endif
Py_INCREF(Py_None);
return Py_None;
}

static PyMethodDef methods[] = {
{"set_ftz", (PyCFunction)set_ftz, METH_NOARGS, 0},
{"set_daz", (PyCFunction)set_daz, METH_NOARGS, 0},
{"get_ftz", (PyCFunction)get_ftz, METH_NOARGS, 0},
{"get_daz", (PyCFunction)get_daz, METH_NOARGS, 0},
{"unset_ftz", (PyCFunction)unset_ftz, METH_NOARGS, 0},
{"unset_daz", (PyCFunction)unset_daz, METH_NOARGS, 0},
{NULL, NULL, 0, NULL}
Expand Down
2 changes: 1 addition & 1 deletion daz/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.0.1'
__version__ = "0.0.2"
40 changes: 40 additions & 0 deletions daz/daz.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@

import daz._core as daz_core

class DAZ:
def __init__(self, daz: bool = True, ftz: bool = True) -> None:
self.daz: bool = daz
self.ftz: bool = ftz
self._prev_daz: bool = False
self._prev_ftz: bool = False

@staticmethod
def set_daz(daz: bool | None = True) -> bool:
"""sets 'Denormals-Are-Zero'-flag and return previous value"""
prev = daz_core.get_daz()
if daz is not None:
if daz is True:
daz_core.set_daz()
else:
daz_core.unset_daz()
return prev

@staticmethod
def set_ftz(ftz: bool | None = True) -> bool:
"""sets 'Flush-To-Zero'-flag and return previous value"""
prev = daz_core.get_ftz()
if ftz is not None:
if ftz is True:
daz_core.set_ftz()
else:
daz_core.unset_ftz()
return prev

def __enter__(self) -> tuple[bool, bool]:
self._prev_daz = DAZ.set_daz(self.daz)
self._prev_ftz = DAZ.set_ftz(self.ftz)
return self.daz, self.ftz

def __exit__(self, exc_type, exc_val, exc_tb) -> None:
DAZ.set_daz(self._prev_daz)
DAZ.set_ftz(self._prev_ftz)
15 changes: 15 additions & 0 deletions daz/daz.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
def set_ftz() -> None: ...
def set_daz() -> None: ...
def unset_ftz() -> None: ...
def unset_daz() -> None: ...
def get_ftz() -> bool: ...
def get_daz() -> bool: ...

class DAZ:
def __init__(self, daz: bool = True, ftz: bool = True) -> None: ...
@staticmethod
def set_daz(daz: bool | None = True) -> bool: ...
@staticmethod
def set_ftz(ftz: bool | None = True) -> bool: ...
def __enter__(self) -> tuple[bool, bool]: ...
def __exit__(self, exc_type, exc_val, exc_tb) -> None: ...
Empty file added daz/py.typed
Empty file.
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def reduce_requirements(reqs):
packages=[
'daz',
],
package_data={"daz": ["py.typed", "daz.pyi"]},
ext_modules=[setuptools.Extension('daz._core', ['daz/_core.c'])],
zip_safe=False,
extras_require=extras_require,
Expand Down
6 changes: 6 additions & 0 deletions tests/test_daz.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,16 @@ def setUp(self):
def check_normal(self):
assert self.normal == self.denormal * self.scale
assert self.normal / self.scale == self.denormal
assert not daz.get_daz()
assert not daz.get_ftz()

def test_normal(self):
self.check_normal()

def test_daz(self):
daz.set_daz()
assert daz.get_daz()
assert not daz.get_ftz()
assert self.normal / self.scale == 0
assert self.denormal * self.scale == 0
assert self.denormal == 0
Expand All @@ -30,6 +34,8 @@ def test_daz(self):

def test_ftz(self):
daz.set_ftz()
assert daz.get_ftz()
assert not daz.get_daz()
assert self.normal / self.scale == 0
assert self.denormal * self.scale == self.normal
assert self.denormal != 0
Expand Down