-
Notifications
You must be signed in to change notification settings - Fork 291
Switching to using memfd for input data #990
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f3dac23
3824ea2
6144b60
c88ef56
cbb0f87
8f4ba33
52948c7
77bf381
709c6db
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,88 @@ | ||
| # Based off https://github.com/django/django/blob/main/django/utils/functional.py, licensed under 3-clause BSD. | ||
| from functools import total_ordering | ||
|
|
||
| from dmoj.cptbox._cptbox import BufferProxy | ||
|
|
||
| _SENTINEL = object() | ||
|
|
||
|
|
||
| @total_ordering | ||
| class LazyBytes(BufferProxy): | ||
| """ | ||
| Encapsulate a function call and act as a proxy for methods that are | ||
| called on the result of that function. The function is not evaluated | ||
| until one of the methods on the result is called. | ||
| """ | ||
|
|
||
| def __init__(self, func): | ||
| self.__func = func | ||
| self.__value = _SENTINEL | ||
|
|
||
| def __get_value(self): | ||
| if self.__value is _SENTINEL: | ||
| self.__value = self.__func() | ||
| return self.__value | ||
|
|
||
| @classmethod | ||
| def _create_promise(cls, method_name): | ||
| # Builds a wrapper around some magic method | ||
| def wrapper(self, *args, **kw): | ||
| # Automatically triggers the evaluation of a lazy value and | ||
| # applies the given magic method of the result type. | ||
| res = self.__get_value() | ||
| return getattr(res, method_name)(*args, **kw) | ||
|
|
||
| return wrapper | ||
|
|
||
| def __cast(self): | ||
| return bytes(self.__get_value()) | ||
|
|
||
| def _get_real_buffer(self): | ||
| return self.__cast() | ||
|
|
||
| def __bytes__(self): | ||
| return self.__cast() | ||
|
|
||
| def __repr__(self): | ||
| return repr(self.__cast()) | ||
|
|
||
| def __str__(self): | ||
| return str(self.__cast()) | ||
|
|
||
| def __eq__(self, other): | ||
| if isinstance(other, LazyBytes): | ||
| other = other.__cast() | ||
| return self.__cast() == other | ||
|
|
||
| def __lt__(self, other): | ||
| if isinstance(other, LazyBytes): | ||
| other = other.__cast() | ||
| return self.__cast() < other | ||
|
|
||
| def __hash__(self): | ||
| return hash(self.__cast()) | ||
|
|
||
| def __mod__(self, rhs): | ||
| return self.__cast() % rhs | ||
|
|
||
| def __add__(self, other): | ||
| return self.__cast() + other | ||
|
|
||
| def __radd__(self, other): | ||
| return other + self.__cast() | ||
|
|
||
| def __deepcopy__(self, memo): | ||
| # Instances of this class are effectively immutable. It's just a | ||
| # collection of functions. So we don't need to do anything | ||
| # complicated for copying. | ||
| memo[id(self)] = self | ||
| return self | ||
|
|
||
|
|
||
| for type_ in bytes.mro(): | ||
| for method_name in type_.__dict__: | ||
| # All __promise__ return the same wrapper method, they | ||
| # look up the correct implementation when called. | ||
| if hasattr(LazyBytes, method_name): | ||
| continue | ||
| setattr(LazyBytes, method_name, LazyBytes._create_promise(method_name)) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,11 +1,116 @@ | ||
| import io | ||
| import mmap | ||
| import os | ||
| from abc import ABCMeta, abstractmethod | ||
| from tempfile import NamedTemporaryFile, TemporaryFile | ||
| from typing import Optional | ||
|
|
||
| from dmoj.cptbox._cptbox import memory_fd_create, memory_fd_seal | ||
| from dmoj.cptbox._cptbox import memfd_create, memfd_seal | ||
|
|
||
|
|
||
| class MemoryIO(io.FileIO): | ||
| def __init__(self) -> None: | ||
| super().__init__(memory_fd_create(), 'r+') | ||
| def _make_fd_readonly(fd): | ||
Xyene marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| new_fd = os.open(f'/proc/self/fd/{fd}', os.O_RDONLY) | ||
| try: | ||
| os.dup2(new_fd, fd) | ||
| finally: | ||
| os.close(new_fd) | ||
|
|
||
|
|
||
| class MmapableIO(io.FileIO, metaclass=ABCMeta): | ||
| def __init__(self, fd, *, prefill: Optional[bytes] = None, seal=False) -> None: | ||
| super().__init__(fd, 'r+') | ||
|
|
||
| if prefill: | ||
| self.write(prefill) | ||
| if seal: | ||
| self.seal() | ||
|
|
||
| @classmethod | ||
| @abstractmethod | ||
| def usable_with_name(cls) -> bool: | ||
| ... | ||
|
|
||
| @abstractmethod | ||
| def seal(self) -> None: | ||
| ... | ||
|
|
||
| @abstractmethod | ||
| def to_path(self) -> str: | ||
| ... | ||
|
|
||
| def to_bytes(self) -> bytes: | ||
| try: | ||
| with mmap.mmap(self.fileno(), 0, access=mmap.ACCESS_READ) as f: | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How often do we expect this will be called? Should we
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not very often, it's mostly for compatibility with old checkers etc.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds like "very often" to me, but happy to punt on this. I worry we'll hit issues with gigabyte-sized generator inputs that also have checkers, since this doubles the memory requirement.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's less of a problem than it looks. In the standard grader, we pass this magic to checkers: |
||
| return bytes(f) | ||
| except ValueError as e: | ||
| if e.args[0] == 'cannot mmap an empty file': | ||
| return b'' | ||
| raise | ||
|
|
||
|
|
||
| class NamedFileIO(MmapableIO): | ||
| _name: str | ||
|
|
||
| def __init__(self, *, prefill: Optional[bytes] = None, seal=False) -> None: | ||
| with NamedTemporaryFile(delete=False) as f: | ||
| self._name = f.name | ||
| super().__init__(os.dup(f.fileno()), prefill=prefill, seal=seal) | ||
|
|
||
| def seal(self) -> None: | ||
| self.seek(0, os.SEEK_SET) | ||
|
|
||
| def close(self) -> None: | ||
| super().close() | ||
| os.unlink(self._name) | ||
Xyene marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| def to_path(self) -> str: | ||
| return self._name | ||
|
|
||
| @classmethod | ||
| def usable_with_name(cls): | ||
| return True | ||
|
|
||
|
|
||
| class UnnamedFileIO(MmapableIO): | ||
| def __init__(self, *, prefill: Optional[bytes] = None, seal=False) -> None: | ||
| with TemporaryFile() as f: | ||
| super().__init__(os.dup(f.fileno()), prefill=prefill, seal=seal) | ||
|
|
||
| def seal(self) -> None: | ||
| self.seek(0, os.SEEK_SET) | ||
| _make_fd_readonly(self.fileno()) | ||
|
|
||
| def to_path(self) -> str: | ||
| return f'/proc/{os.getpid()}/fd/{self.fileno()}' | ||
|
|
||
| @classmethod | ||
| def usable_with_name(cls): | ||
| with cls() as f: | ||
| return os.path.exists(f.to_path()) | ||
|
|
||
|
|
||
| class MemfdIO(MmapableIO): | ||
| def __init__(self, *, prefill: Optional[bytes] = None, seal=False) -> None: | ||
| super().__init__(memfd_create(), prefill=prefill, seal=seal) | ||
|
|
||
| def seal(self) -> None: | ||
| memory_fd_seal(self.fileno()) | ||
| fd = self.fileno() | ||
| memfd_seal(fd) | ||
| _make_fd_readonly(fd) | ||
|
|
||
| def to_path(self) -> str: | ||
| return f'/proc/{os.getpid()}/fd/{self.fileno()}' | ||
|
|
||
| @classmethod | ||
| def usable_with_name(cls): | ||
| try: | ||
| with cls() as f: | ||
| return os.path.exists(f.to_path()) | ||
| except OSError: | ||
| return False | ||
|
|
||
|
|
||
| # Try to use memfd if possible, otherwise fallback to unlinked temporary files | ||
| # (UnnamedFileIO). On FreeBSD and some other systems, /proc/[pid]/fd doesn't | ||
| # exist, so to_path() will not work. We fall back to NamedFileIO in that case. | ||
| MemoryIO = next((i for i in (MemfdIO, UnnamedFileIO, NamedFileIO) if i.usable_with_name())) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually, is this function called on FreeBSD anymore? Are you creating the tempfile in Python instead?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's keep this around for now, since I'd rather this function work on all platforms, as the detection logic for the FreeBSD case is now different. If FreeBSD implements
/proc/[pid]/fdsome day, this will magically work.