diff --git a/pygbx/gbx.py b/pygbx/gbx.py index f9ae431..4014fa5 100644 --- a/pygbx/gbx.py +++ b/pygbx/gbx.py @@ -1,7 +1,7 @@ import logging from enum import IntEnum -import lzo +from pygbx.lzo import LZO import zlib import pygbx.headers as headers @@ -23,7 +23,7 @@ class GbxType(IntEnum): REPLAY_RECORD_OLD = 0x02407E000 GAME_GHOST = 0x0303F005 CTN_GHOST = 0x03092000 - CTN_GHOST_OLD = 0x2401B000 + CTN_GHOST_OLD = 0x2401B000 CTN_COLLECTOR = 0x0301A000 CTN_OBJECT_INFO = 0x0301C000 CTN_DECORATION = 0x03038000 @@ -90,6 +90,8 @@ def __init__(self, obj): self.__current_waypoint = None self.__replay_header_info = {} + self.lzo = LZO() + self.root_parser.skip(3) if self.version >= 4: self.root_parser.skip(1) @@ -127,10 +129,13 @@ def __init__(self, obj): self.root_parser.push_info() self.positions['data_size'] = self.root_parser.pop_info() - data_size = self.root_parser.read_uint32() - compressed_data_size = self.root_parser.read_uint32() - cdata = self.root_parser.read(compressed_data_size) - self.data = bytearray(lzo.decompress(cdata, False, data_size)) + uncompressed_size = self.root_parser.read_uint32() + compressed_size = self.root_parser.read_uint32() + compressed_data = self.root_parser.read(compressed_size) + self.data = self.lzo.decompress(compressed_data, uncompressed_size) + + if not self.data: + raise GbxLoadError(f'data decompression has failed') bp = ByteReader(self.data) self._read_node(self.class_id, -1, bp) diff --git a/pygbx/lzo.py b/pygbx/lzo.py new file mode 100644 index 0000000..77607ac --- /dev/null +++ b/pygbx/lzo.py @@ -0,0 +1,160 @@ +import ctypes +from os import path, name as osname +from ctypes import CDLL, c_uint32, byref, c_char_p, POINTER, sizeof, c_void_p +from logging import error + + +class LZO: + """This class contains stand alone methods of the LZO library. It calls an external library file and will run in C. + + Usage + Create an instance of this class and use it with obj.decompress(data, uncompressed_size) or obj.compress(data). + It will always return data or False on failure. The instance is reusable. + + Availability + This library should work on Windows and Linux both 32 and 64 bit. If you encounter issues + please submit an issue + + Extra info + When decompressing, the uncompressed_size argument is known before the data is uncompressed. + It is written inside of GBX data and has to be retrieved from there. + + Internal LZO functions that are used + lzo1x_999_compress + lzo1x_decompress_safe + + Other internal functions that are called from the above + lzo1x_999_compress_internal + lzo1x_999_compress_level + + lzo1x_999_compress + This is the best function for compressing data in terms of file size, but also one of the slowest. The LZO + FAQ itself says however it should be used when generating pre-compressed data (meaning stuff like + Replay/Challenge files). The decompression speed is not affected by whatever compression function was used. + + lzo1x_decompress_safe + Extremely fast decompression algorithm. It was designed for run time applications, such as it was in the + game Trackmania. Its name has the postfix _safe because it can never crash (from LZO FAQ). However there is + no guarantee that the returned data has its integrity preserved. LZO offers crc32 (and adler32) to check the + integrity, but since GBX data doesn't seem to have the checksum written anywhere, there was also no real + point of integrating the lzo_crc32 into this library. If you know where the checksum might be hidden please + write us back. + + Speed + The lzo1x_decompress_safe function is extremely fast. Benchmarking 100,000 iterations of decompressing a + TMNF replay file (36538 bytes in size) took approximately 1.08 seconds. The uncompressed size was 38595 + + The lzo1x_999_compress function is slow. Benchmarking 1000 iterations of uncompressed data from above with + a size of 38595 bytes took approximately 2.7 seconds (which still is only ~0.003 seconds for a GBX Replay) + + Comparison to compression of TMNF + A random set of 14000 replays were analyzed in terms of their compressed to decompressed ratio for the + internal function the game uses vs the lzo1x_999_compress function this library uses. + On average, the compression factor of replay files in TMNF are at about 94.33%, where as compressing that + data with lzo1x_999_compress resulted in a compression rate of about 93.60%. + + Tip: You can if you want uncompress the data in your GBX data and re-compress it with this + lzo1x_999_compress method to save a little bit of space, it is recognized and acceptable by the game + """ + + def __init__(self): + """Loads library upon object creation once""" + + # Check for architecture size + self.is64 = sizeof(c_void_p) >> 3 + + # Check for architecture (Windows/Linux supported) + if osname == 'nt': + self.__lib_ext = '.dll' + elif osname == 'posix': + self.__lib_ext = '.so' + else: + raise Exception(f'Your system cannot load the LZO libs. Required: Windows/Linux, given: {osname}') + + self.__lzo1x_lib_path = path.join(path.dirname(path.abspath(__file__)), 'lzo', 'libs', + f'lzo1x_{"64" if self.is64 else "32"}{self.__lib_ext}') + + try: + self.__lzo1x_lib = CDLL(self.__lzo1x_lib_path) + except Exception as e: + raise Exception(f'LZO library could not be loaded: {e}') + + # Specify arguments and response types + self.__lzo1x_lib.lzo1x_decompress_safe.restype = c_uint32 + self.__lzo1x_lib.lzo1x_decompress_safe.argtypes = [c_char_p, c_uint32, c_char_p, + POINTER(c_uint32)] + + self.__lzo1x_lib.lzo1x_999_compress.restype = c_uint32 + self.__lzo1x_lib.lzo1x_999_compress.argtypes = [c_char_p, c_uint32, c_char_p, + POINTER(c_uint32), ctypes.c_void_p] + + def decompress(self, data, uncompressed_size): + return self.__lzo1x_decompress_safe(data, uncompressed_size) + + def compress(self, data): + return self.__lzo1x_999_compress(data) + + def __lzo1x_decompress_safe(self, data, uncompressed_size): + if not isinstance(data, bytes): + try: + data = bytes(data) + except Exception as e: + error(f'Could not turn data into type bytes: {e}') + return False + if not isinstance(uncompressed_size, int): + error(f'uncompressed_size must be of data type int. {type(uncompressed_size)} was given') + return False + + # decompressed data goes here + out_buffer = bytes(uncompressed_size) + + # C unsigned int compressed_size + compressed_size = c_uint32(len(data)) + + # Pointer to uncompressed_size. The function takes in a pointer to uncompressed_size and uses it internally + # to store some internal temporary value which then holds the uncompressed_size and is used for something else. + # Afterwards the internal function sets our outside uncompressed_size to 0, and as the decompression process + # progresses, it writes into it the number of bytes that were written (hence the name bytes_written for the + # pointer). After the internal function returns, it will have set our outside uncompressed_size to the bytes + # that were actually written, so uncompressed_size should become the same value again, if no error has occurred + bytes_written = c_uint32(uncompressed_size) + + try: + if self.__lzo1x_lib.lzo1x_decompress_safe(data, compressed_size, out_buffer, byref(bytes_written)): + return False + + # check if the bytes that were written match out_buffer size we have originally allocated + if bytes_written.value != len(out_buffer): + return False + else: + return out_buffer + except Exception as e: + error(e) + return False + + def __lzo1x_999_compress(self, data): + if not isinstance(data, bytes): + try: + data = bytes(data) + except Exception as e: + error(f'Could not turn data into bytes data type: {e}') + return False + + # Compressed data ends up here. According to LZO FAQ, the size of this buffer is calculated with this formula: + # out_size = in_size + (in_size / 16) + 64 + 3 + # These are worst case scenario expansions (~106% of in_size) + out_buffer = bytes(len(data) + (int(len(data) / 16)) + 67) + + work_memory = bytes(524288) + uncompressed_size = c_uint32(len(data)) + bytes_written = c_uint32(0) + + try: + if self.__lzo1x_lib.lzo1x_999_compress( + data, uncompressed_size, out_buffer, byref(bytes_written), work_memory) != 0: + return False + + return out_buffer[0:bytes_written.value] + except Exception as e: + error(e) + return False diff --git a/pygbx/lzo/libs/lzo1x_32.dll b/pygbx/lzo/libs/lzo1x_32.dll new file mode 100644 index 0000000..117c94d Binary files /dev/null and b/pygbx/lzo/libs/lzo1x_32.dll differ diff --git a/pygbx/lzo/libs/lzo1x_32.so b/pygbx/lzo/libs/lzo1x_32.so new file mode 100644 index 0000000..41bbbdd Binary files /dev/null and b/pygbx/lzo/libs/lzo1x_32.so differ diff --git a/pygbx/lzo/libs/lzo1x_64.dll b/pygbx/lzo/libs/lzo1x_64.dll new file mode 100644 index 0000000..837664a Binary files /dev/null and b/pygbx/lzo/libs/lzo1x_64.dll differ diff --git a/pygbx/lzo/libs/lzo1x_64.so b/pygbx/lzo/libs/lzo1x_64.so new file mode 100644 index 0000000..3b31fbb Binary files /dev/null and b/pygbx/lzo/libs/lzo1x_64.so differ diff --git a/setup.py b/setup.py index 9a7f776..b5a5ab3 100644 --- a/setup.py +++ b/setup.py @@ -10,9 +10,6 @@ url = 'https://github.com/donadigo/pygbx', download_url = 'https://github.com/donadigo/pygbx/archive/0.1.zip', keywords = ['GBX', 'parser', 'TrackMania'], - install_requires=[ - 'python-lzo', - ], classifiers=[ 'Development Status :: 4 - Beta', 'Intended Audience :: Developers', @@ -22,5 +19,9 @@ 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', ], -) \ No newline at end of file + package_dir={'pygbx': 'pygbx'}, + package_data={'pygbx': ['lzo/libs/lzo1x.*']}, +)