|
| 1 | +<?php |
| 2 | + |
| 3 | +namespace Box\Spout\Reader\Helper\XLSX\SharedStringsCaching; |
| 4 | + |
| 5 | +/** |
| 6 | + * Class CachingStrategyFactory |
| 7 | + * |
| 8 | + * @package Box\Spout\Reader\Helper\XLSX\SharedStringsCaching |
| 9 | + */ |
| 10 | +class CachingStrategyFactory |
| 11 | +{ |
| 12 | + /** |
| 13 | + * The memory amount needed to store a string was obtained empirically from this data: |
| 14 | + * |
| 15 | + * ------------------------------------ |
| 16 | + * | Number of chars⁺ | Memory needed | |
| 17 | + * ------------------------------------ |
| 18 | + * | 3,000 | 1 MB | |
| 19 | + * | 15,000 | 2 MB | |
| 20 | + * | 30,000 | 5 MB | |
| 21 | + * | 75,000 | 11 MB | |
| 22 | + * | 150,000 | 21 MB | |
| 23 | + * | 300,000 | 43 MB | |
| 24 | + * | 750,000 | 105 MB | |
| 25 | + * | 1,500,000 | 210 MB | |
| 26 | + * | 2,250,000 | 315 MB | |
| 27 | + * | 3,000,000 | 420 MB | |
| 28 | + * | 4,500,000 | 630 MB | |
| 29 | + * ------------------------------------ |
| 30 | + * |
| 31 | + * ⁺ All characters were 1 byte long |
| 32 | + * |
| 33 | + * This gives a linear graph where each 1-byte character requires about 150 bytes to be stored. |
| 34 | + * Given that some characters can take up to 4 bytes, we need 600 bytes per character to be safe. |
| 35 | + * Also, there is on average about 20 characters per cell (this is entirely empirical data...). |
| 36 | + * |
| 37 | + * This means that in order to store one shared string in memory, the memory amount needed is: |
| 38 | + * => 20 * 600 ≈ 12KB |
| 39 | + */ |
| 40 | + const AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB = 12; |
| 41 | + |
| 42 | + /** |
| 43 | + * To avoid running out of memory when extracting a huge number of shared strings, they can be saved to temporary files |
| 44 | + * instead of in memory. Then, when accessing a string, the corresponding file contents will be loaded in memory |
| 45 | + * and the string will be quickly retrieved. |
| 46 | + * The performance bottleneck is not when creating these temporary files, but rather when loading their content. |
| 47 | + * Because the contents of the last loaded file stays in memory until another file needs to be loaded, it works |
| 48 | + * best when the indexes of the shared strings are sorted in the sheet data. |
| 49 | + * 10,000 was chosen because it creates small files that are fast to be loaded in memory. |
| 50 | + */ |
| 51 | + const MAX_NUM_STRINGS_PER_TEMP_FILE = 10000; |
| 52 | + |
| 53 | + /** @var CachingStrategyFactory|null Singleton instance */ |
| 54 | + protected static $instance = null; |
| 55 | + |
| 56 | + /** |
| 57 | + * Private constructor for singleton |
| 58 | + */ |
| 59 | + private function __construct() |
| 60 | + { |
| 61 | + } |
| 62 | + |
| 63 | + /** |
| 64 | + * Returns the singleton instance of the factory |
| 65 | + * |
| 66 | + * @return CachingStrategyFactory |
| 67 | + */ |
| 68 | + public static function getInstance() |
| 69 | + { |
| 70 | + if (self::$instance === null) { |
| 71 | + self::$instance = new CachingStrategyFactory(); |
| 72 | + } |
| 73 | + |
| 74 | + return self::$instance; |
| 75 | + } |
| 76 | + |
| 77 | + /** |
| 78 | + * Returns the best caching strategy, given the number of unique shared strings |
| 79 | + * and the amount of memory available. |
| 80 | + * |
| 81 | + * @param int $sharedStringsUniqueCount Number of unique shared strings |
| 82 | + * @param string|void $tempFolder Temporary folder where the temporary files to store shared strings will be stored |
| 83 | + * @return CachingStrategyInterface The best caching strategy |
| 84 | + */ |
| 85 | + public function getBestCachingStrategy($sharedStringsUniqueCount, $tempFolder = null) |
| 86 | + { |
| 87 | + if ($this->isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)) { |
| 88 | + return new InMemoryStrategy($sharedStringsUniqueCount); |
| 89 | + } else { |
| 90 | + return new FileBasedStrategy($tempFolder, self::MAX_NUM_STRINGS_PER_TEMP_FILE); |
| 91 | + } |
| 92 | + } |
| 93 | + |
| 94 | + /** |
| 95 | + * Returns whether it is safe to use in-memory caching, given the number of unique shared strings |
| 96 | + * and the amount of memory available. |
| 97 | + * |
| 98 | + * @param int $sharedStringsUniqueCount Number of unique shared strings |
| 99 | + * @return bool |
| 100 | + */ |
| 101 | + protected function isInMemoryStrategyUsageSafe($sharedStringsUniqueCount) |
| 102 | + { |
| 103 | + $memoryAvailable = $this->getMemoryLimitInKB(); |
| 104 | + |
| 105 | + if ($memoryAvailable === -1) { |
| 106 | + // if cannot get memory limit or if memory limit set as unlimited, don't trust and play safe |
| 107 | + return ($sharedStringsUniqueCount < self::MAX_NUM_STRINGS_PER_TEMP_FILE); |
| 108 | + } else { |
| 109 | + $memoryNeeded = $sharedStringsUniqueCount * self::AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB; |
| 110 | + return ($memoryAvailable > $memoryNeeded); |
| 111 | + } |
| 112 | + } |
| 113 | + |
| 114 | + /** |
| 115 | + * Returns the PHP "memory_limit" in Kilobytes |
| 116 | + * |
| 117 | + * @return float |
| 118 | + */ |
| 119 | + protected function getMemoryLimitInKB() |
| 120 | + { |
| 121 | + $memoryLimitFormatted = $this->getMemoryLimitFromIni(); |
| 122 | + $memoryLimitFormatted = strtolower(trim($memoryLimitFormatted)); |
| 123 | + |
| 124 | + // No memory limit |
| 125 | + if ($memoryLimitFormatted === '-1') { |
| 126 | + return -1; |
| 127 | + } |
| 128 | + |
| 129 | + if (preg_match('/(\d+)([bkmgt])b?/', $memoryLimitFormatted, $matches)) { |
| 130 | + $amount = intval($matches[1]); |
| 131 | + $unit = $matches[2]; |
| 132 | + |
| 133 | + switch ($unit) { |
| 134 | + case 'b': return ($amount / 1024); |
| 135 | + case 'k': return $amount; |
| 136 | + case 'm': return ($amount * 1024); |
| 137 | + case 'g': return ($amount * 1024 * 1024); |
| 138 | + case 't': return ($amount * 1024 * 1024 * 1024); |
| 139 | + } |
| 140 | + } |
| 141 | + |
| 142 | + return -1; |
| 143 | + } |
| 144 | + |
| 145 | + /** |
| 146 | + * Returns the formatted "memory_limit" value |
| 147 | + * |
| 148 | + * @return string |
| 149 | + */ |
| 150 | + protected function getMemoryLimitFromIni() |
| 151 | + { |
| 152 | + return ini_get('memory_limit'); |
| 153 | + } |
| 154 | +} |
0 commit comments