MDL-65763 lib: Upgrade MixMind DB reader lib to 1.4.1
[moodle.git] / lib / maxmind / MaxMind / Db / Reader / Decoder.php
CommitLineData
eacc36a2
DP
1<?php
2
3namespace MaxMind\Db\Reader;
4
b55260fc
MG
5// @codingStandardsIgnoreLine
6// We subtract 1 from the log to protect against precision loss.
7\define(__NAMESPACE__ . '\_MM_MAX_INT_BYTES', (log(PHP_INT_MAX, 2) - 1) / 8);
eacc36a2
DP
8
9class Decoder
10{
eacc36a2
DP
11 private $fileStream;
12 private $pointerBase;
b55260fc 13 private $pointerBaseByteSize;
eacc36a2
DP
14 // This is only used for unit testing
15 private $pointerTestHack;
16 private $switchByteOrder;
17
b55260fc
MG
18 const _EXTENDED = 0;
19 const _POINTER = 1;
20 const _UTF8_STRING = 2;
21 const _DOUBLE = 3;
22 const _BYTES = 4;
23 const _UINT16 = 5;
24 const _UINT32 = 6;
25 const _MAP = 7;
26 const _INT32 = 8;
27 const _UINT64 = 9;
28 const _UINT128 = 10;
29 const _ARRAY = 11;
30 const _CONTAINER = 12;
31 const _END_MARKER = 13;
32 const _BOOLEAN = 14;
33 const _FLOAT = 15;
eacc36a2
DP
34
35 public function __construct(
36 $fileStream,
37 $pointerBase = 0,
38 $pointerTestHack = false
39 ) {
40 $this->fileStream = $fileStream;
41 $this->pointerBase = $pointerBase;
b55260fc
MG
42
43 $this->pointerBaseByteSize = $pointerBase > 0 ? log($pointerBase, 2) / 8 : 0;
eacc36a2
DP
44 $this->pointerTestHack = $pointerTestHack;
45
46 $this->switchByteOrder = $this->isPlatformLittleEndian();
47 }
48
eacc36a2
DP
49 public function decode($offset)
50 {
51 list(, $ctrlByte) = unpack(
52 'C',
53 Util::read($this->fileStream, $offset, 1)
54 );
b55260fc 55 ++$offset;
eacc36a2 56
b55260fc 57 $type = $ctrlByte >> 5;
eacc36a2
DP
58
59 // Pointers are a special case, we don't read the next $size bytes, we
60 // use the size to determine the length of the pointer and then follow
61 // it.
b55260fc 62 if ($type === self::_POINTER) {
eacc36a2
DP
63 list($pointer, $offset) = $this->decodePointer($ctrlByte, $offset);
64
65 // for unit testing
66 if ($this->pointerTestHack) {
b55260fc 67 return [$pointer];
eacc36a2
DP
68 }
69
70 list($result) = $this->decode($pointer);
71
b55260fc 72 return [$result, $offset];
eacc36a2
DP
73 }
74
b55260fc 75 if ($type === self::_EXTENDED) {
eacc36a2
DP
76 list(, $nextByte) = unpack(
77 'C',
78 Util::read($this->fileStream, $offset, 1)
79 );
80
b55260fc 81 $type = $nextByte + 7;
eacc36a2 82
b55260fc 83 if ($type < 8) {
eacc36a2 84 throw new InvalidDatabaseException(
b55260fc
MG
85 'Something went horribly wrong in the decoder. An extended type '
86 . 'resolved to a type number < 8 ('
87 . $type
88 . ')'
eacc36a2
DP
89 );
90 }
91
b55260fc 92 ++$offset;
eacc36a2
DP
93 }
94
95 list($size, $offset) = $this->sizeFromCtrlByte($ctrlByte, $offset);
96
97 return $this->decodeByType($type, $offset, $size);
98 }
99
100 private function decodeByType($type, $offset, $size)
101 {
102 switch ($type) {
b55260fc 103 case self::_MAP:
eacc36a2 104 return $this->decodeMap($size, $offset);
b55260fc 105 case self::_ARRAY:
eacc36a2 106 return $this->decodeArray($size, $offset);
b55260fc
MG
107 case self::_BOOLEAN:
108 return [$this->decodeBoolean($size), $offset];
eacc36a2
DP
109 }
110
111 $newOffset = $offset + $size;
112 $bytes = Util::read($this->fileStream, $offset, $size);
113 switch ($type) {
b55260fc
MG
114 case self::_BYTES:
115 case self::_UTF8_STRING:
116 return [$bytes, $newOffset];
117 case self::_DOUBLE:
eacc36a2 118 $this->verifySize(8, $size);
b55260fc
MG
119
120 return [$this->decodeDouble($bytes), $newOffset];
121 case self::_FLOAT:
eacc36a2 122 $this->verifySize(4, $size);
b55260fc
MG
123
124 return [$this->decodeFloat($bytes), $newOffset];
125 case self::_INT32:
126 return [$this->decodeInt32($bytes, $size), $newOffset];
127 case self::_UINT16:
128 case self::_UINT32:
129 case self::_UINT64:
130 case self::_UINT128:
131 return [$this->decodeUint($bytes, $size), $newOffset];
eacc36a2
DP
132 default:
133 throw new InvalidDatabaseException(
b55260fc 134 'Unknown or unexpected type: ' . $type
eacc36a2
DP
135 );
136 }
137 }
138
139 private function verifySize($expected, $actual)
140 {
b55260fc 141 if ($expected !== $actual) {
eacc36a2
DP
142 throw new InvalidDatabaseException(
143 "The MaxMind DB file's data section contains bad data (unknown data type or corrupt data)"
144 );
145 }
146 }
147
148 private function decodeArray($size, $offset)
149 {
b55260fc 150 $array = [];
eacc36a2 151
b55260fc 152 for ($i = 0; $i < $size; ++$i) {
eacc36a2
DP
153 list($value, $offset) = $this->decode($offset);
154 array_push($array, $value);
155 }
156
b55260fc 157 return [$array, $offset];
eacc36a2
DP
158 }
159
160 private function decodeBoolean($size)
161 {
b55260fc 162 return $size === 0 ? false : true;
eacc36a2
DP
163 }
164
165 private function decodeDouble($bits)
166 {
b55260fc
MG
167 // This assumes IEEE 754 doubles, but most (all?) modern platforms
168 // use them.
169 //
170 // We are not using the "E" format as that was only added in
171 // 7.0.15 and 7.1.1. As such, we must switch byte order on
172 // little endian machines.
eacc36a2 173 list(, $double) = unpack('d', $this->maybeSwitchByteOrder($bits));
b55260fc 174
eacc36a2
DP
175 return $double;
176 }
177
178 private function decodeFloat($bits)
179 {
b55260fc
MG
180 // This assumes IEEE 754 floats, but most (all?) modern platforms
181 // use them.
182 //
183 // We are not using the "G" format as that was only added in
184 // 7.0.15 and 7.1.1. As such, we must switch byte order on
185 // little endian machines.
eacc36a2 186 list(, $float) = unpack('f', $this->maybeSwitchByteOrder($bits));
b55260fc 187
eacc36a2
DP
188 return $float;
189 }
190
b55260fc 191 private function decodeInt32($bytes, $size)
eacc36a2 192 {
b55260fc
MG
193 switch ($size) {
194 case 0:
195 return 0;
196 case 1:
197 case 2:
198 case 3:
199 $bytes = str_pad($bytes, 4, "\x00", STR_PAD_LEFT);
200 break;
201 case 4:
202 break;
203 default:
204 throw new InvalidDatabaseException(
205 "The MaxMind DB file's data section contains bad data (unknown data type or corrupt data)"
206 );
207 }
208
eacc36a2 209 list(, $int) = unpack('l', $this->maybeSwitchByteOrder($bytes));
b55260fc 210
eacc36a2
DP
211 return $int;
212 }
213
214 private function decodeMap($size, $offset)
215 {
b55260fc 216 $map = [];
eacc36a2 217
b55260fc 218 for ($i = 0; $i < $size; ++$i) {
eacc36a2
DP
219 list($key, $offset) = $this->decode($offset);
220 list($value, $offset) = $this->decode($offset);
221 $map[$key] = $value;
222 }
223
b55260fc 224 return [$map, $offset];
eacc36a2
DP
225 }
226
eacc36a2
DP
227 private function decodePointer($ctrlByte, $offset)
228 {
229 $pointerSize = (($ctrlByte >> 3) & 0x3) + 1;
230
231 $buffer = Util::read($this->fileStream, $offset, $pointerSize);
232 $offset = $offset + $pointerSize;
233
b55260fc
MG
234 switch ($pointerSize) {
235 case 1:
236 $packed = (pack('C', $ctrlByte & 0x7)) . $buffer;
237 list(, $pointer) = unpack('n', $packed);
238 $pointer += $this->pointerBase;
239 break;
240 case 2:
241 $packed = "\x00" . (pack('C', $ctrlByte & 0x7)) . $buffer;
242 list(, $pointer) = unpack('N', $packed);
243 $pointer += $this->pointerBase + 2048;
244 break;
245 case 3:
246 $packed = (pack('C', $ctrlByte & 0x7)) . $buffer;
247
248 // It is safe to use 'N' here, even on 32 bit machines as the
249 // first bit is 0.
250 list(, $pointer) = unpack('N', $packed);
251 $pointer += $this->pointerBase + 526336;
252 break;
253 case 4:
254 // We cannot use unpack here as we might overflow on 32 bit
255 // machines
256 $pointerOffset = $this->decodeUint($buffer, $pointerSize);
257
258 $byteLength = $pointerSize + $this->pointerBaseByteSize;
259
260 if ($byteLength <= _MM_MAX_INT_BYTES) {
261 $pointer = $pointerOffset + $this->pointerBase;
262 } elseif (\extension_loaded('gmp')) {
263 $pointer = gmp_strval(gmp_add($pointerOffset, $this->pointerBase));
264 } elseif (\extension_loaded('bcmath')) {
265 $pointer = bcadd($pointerOffset, $this->pointerBase);
266 } else {
267 throw new \RuntimeException(
268 'The gmp or bcmath extension must be installed to read this database.'
269 );
270 }
271 }
eacc36a2 272
b55260fc 273 return [$pointer, $offset];
eacc36a2
DP
274 }
275
b55260fc 276 private function decodeUint($bytes, $byteLength)
eacc36a2 277 {
b55260fc 278 if ($byteLength === 0) {
eacc36a2
DP
279 return 0;
280 }
281
eacc36a2
DP
282 $integer = 0;
283
b55260fc
MG
284 for ($i = 0; $i < $byteLength; ++$i) {
285 $part = \ord($bytes[$i]);
eacc36a2 286
eacc36a2 287 // We only use gmp or bcmath if the final value is too big
b55260fc
MG
288 if ($byteLength <= _MM_MAX_INT_BYTES) {
289 $integer = ($integer << 8) + $part;
290 } elseif (\extension_loaded('gmp')) {
291 $integer = gmp_strval(gmp_add(gmp_mul($integer, 256), $part));
292 } elseif (\extension_loaded('bcmath')) {
293 $integer = bcadd(bcmul($integer, 256), $part);
eacc36a2
DP
294 } else {
295 throw new \RuntimeException(
296 'The gmp or bcmath extension must be installed to read this database.'
297 );
298 }
299 }
eacc36a2 300
b55260fc 301 return $integer;
eacc36a2
DP
302 }
303
304 private function sizeFromCtrlByte($ctrlByte, $offset)
305 {
306 $size = $ctrlByte & 0x1f;
b55260fc
MG
307
308 if ($size < 29) {
309 return [$size, $offset];
310 }
311
312 $bytesToRead = $size - 28;
eacc36a2 313 $bytes = Util::read($this->fileStream, $offset, $bytesToRead);
eacc36a2 314
b55260fc
MG
315 if ($size === 29) {
316 $size = 29 + \ord($bytes);
317 } elseif ($size === 30) {
318 list(, $adjust) = unpack('n', $bytes);
319 $size = 285 + $adjust;
eacc36a2 320 } elseif ($size > 30) {
b55260fc
MG
321 list(, $adjust) = unpack('N', "\x00" . $bytes);
322 $size = ($adjust & (0x0FFFFFFF >> (32 - (8 * $bytesToRead))))
eacc36a2
DP
323 + 65821;
324 }
325
b55260fc 326 return [$size, $offset + $bytesToRead];
eacc36a2
DP
327 }
328
329 private function maybeSwitchByteOrder($bytes)
330 {
331 return $this->switchByteOrder ? strrev($bytes) : $bytes;
332 }
333
334 private function isPlatformLittleEndian()
335 {
336 $testint = 0x00FF;
337 $packed = pack('S', $testint);
b55260fc 338
eacc36a2
DP
339 return $testint === current(unpack('v', $packed));
340 }
341}