From f24fe25c7d17e9523a6787fce44fb2a0cb8722b7 Mon Sep 17 00:00:00 2001 From: Claudio Sanches Date: Tue, 13 Mar 2018 14:50:01 -0300 Subject: [PATCH] Included new GeoLite2 DB reader library --- includes/libraries/geolite2/Reader.php | 309 +++++++++++++++++ .../libraries/geolite2/Reader/Decoder.php | 311 ++++++++++++++++++ .../Reader/InvalidDatabaseException.php | 10 + .../libraries/geolite2/Reader/Metadata.php | 69 ++++ includes/libraries/geolite2/Reader/Util.php | 26 ++ 5 files changed, 725 insertions(+) create mode 100644 includes/libraries/geolite2/Reader.php create mode 100644 includes/libraries/geolite2/Reader/Decoder.php create mode 100644 includes/libraries/geolite2/Reader/InvalidDatabaseException.php create mode 100644 includes/libraries/geolite2/Reader/Metadata.php create mode 100644 includes/libraries/geolite2/Reader/Util.php diff --git a/includes/libraries/geolite2/Reader.php b/includes/libraries/geolite2/Reader.php new file mode 100644 index 00000000000..4ccab914466 --- /dev/null +++ b/includes/libraries/geolite2/Reader.php @@ -0,0 +1,309 @@ +get method. + */ +class Reader +{ + private static $DATA_SECTION_SEPARATOR_SIZE = 16; + private static $METADATA_START_MARKER = "\xAB\xCD\xEFMaxMind.com"; + private static $METADATA_START_MARKER_LENGTH = 14; + private static $METADATA_MAX_SIZE = 131072; // 128 * 1024 = 128KB + + private $decoder; + private $fileHandle; + private $fileSize; + private $ipV4Start; + private $metadata; + + /** + * Constructs a Reader for the MaxMind DB format. The file passed to it must + * be a valid MaxMind DB file such as a GeoIp2 database file. + * + * @param string $database + * the MaxMind DB file to use + * + * @throws \InvalidArgumentException for invalid database path or unknown arguments + * @throws \MaxMind\Db\Reader\InvalidDatabaseException + * if the database is invalid or there is an error reading + * from it + */ + public function __construct($database) + { + if (func_num_args() !== 1) { + throw new \InvalidArgumentException( + 'The constructor takes exactly one argument.' + ); + } + + if (!is_readable($database)) { + throw new \InvalidArgumentException( + "The file \"$database\" does not exist or is not readable." + ); + } + $this->fileHandle = @fopen($database, 'rb'); + if ($this->fileHandle === false) { + throw new \InvalidArgumentException( + "Error opening \"$database\"." + ); + } + $this->fileSize = @filesize($database); + if ($this->fileSize === false) { + throw new \UnexpectedValueException( + "Error determining the size of \"$database\"." + ); + } + + $start = $this->findMetadataStart($database); + $metadataDecoder = new Decoder($this->fileHandle, $start); + list($metadataArray) = $metadataDecoder->decode($start); + $this->metadata = new Metadata($metadataArray); + $this->decoder = new Decoder( + $this->fileHandle, + $this->metadata->searchTreeSize + self::$DATA_SECTION_SEPARATOR_SIZE + ); + } + + /** + * Looks up the address in the MaxMind DB. + * + * @param string $ipAddress + * the IP address to look up + * + * @throws \BadMethodCallException if this method is called on a closed database + * @throws \InvalidArgumentException if something other than a single IP address is passed to the method + * @throws InvalidDatabaseException + * if the database is invalid or there is an error reading + * from it + * + * @return array the record for the IP address + */ + public function get($ipAddress) + { + if (func_num_args() !== 1) { + throw new \InvalidArgumentException( + 'Method takes exactly one argument.' + ); + } + + if (!is_resource($this->fileHandle)) { + throw new \BadMethodCallException( + 'Attempt to read from a closed MaxMind DB.' + ); + } + + if (!filter_var($ipAddress, FILTER_VALIDATE_IP)) { + throw new \InvalidArgumentException( + "The value \"$ipAddress\" is not a valid IP address." + ); + } + + if ($this->metadata->ipVersion === 4 && strrpos($ipAddress, ':')) { + throw new \InvalidArgumentException( + "Error looking up $ipAddress. You attempted to look up an" + . ' IPv6 address in an IPv4-only database.' + ); + } + $pointer = $this->findAddressInTree($ipAddress); + if ($pointer === 0) { + return null; + } + + return $this->resolveDataPointer($pointer); + } + + private function findAddressInTree($ipAddress) + { + // XXX - could simplify. Done as a byte array to ease porting + $rawAddress = array_merge(unpack('C*', inet_pton($ipAddress))); + + $bitCount = count($rawAddress) * 8; + + // The first node of the tree is always node 0, at the beginning of the + // value + $node = $this->startNode($bitCount); + + for ($i = 0; $i < $bitCount; $i++) { + if ($node >= $this->metadata->nodeCount) { + break; + } + $tempBit = 0xFF & $rawAddress[$i >> 3]; + $bit = 1 & ($tempBit >> 7 - ($i % 8)); + + $node = $this->readNode($node, $bit); + } + if ($node === $this->metadata->nodeCount) { + // Record is empty + return 0; + } elseif ($node > $this->metadata->nodeCount) { + // Record is a data pointer + return $node; + } + throw new InvalidDatabaseException('Something bad happened'); + } + + private function startNode($length) + { + // Check if we are looking up an IPv4 address in an IPv6 tree. If this + // is the case, we can skip over the first 96 nodes. + if ($this->metadata->ipVersion === 6 && $length === 32) { + return $this->ipV4StartNode(); + } + // The first node of the tree is always node 0, at the beginning of the + // value + return 0; + } + + private function ipV4StartNode() + { + // This is a defensive check. There is no reason to call this when you + // have an IPv4 tree. + if ($this->metadata->ipVersion === 4) { + return 0; + } + + if ($this->ipV4Start) { + return $this->ipV4Start; + } + $node = 0; + + for ($i = 0; $i < 96 && $node < $this->metadata->nodeCount; $i++) { + $node = $this->readNode($node, 0); + } + $this->ipV4Start = $node; + + return $node; + } + + private function readNode($nodeNumber, $index) + { + $baseOffset = $nodeNumber * $this->metadata->nodeByteSize; + + // XXX - probably could condense this. + switch ($this->metadata->recordSize) { + case 24: + $bytes = Util::read($this->fileHandle, $baseOffset + $index * 3, 3); + list(, $node) = unpack('N', "\x00" . $bytes); + + return $node; + case 28: + $middleByte = Util::read($this->fileHandle, $baseOffset + 3, 1); + list(, $middle) = unpack('C', $middleByte); + if ($index === 0) { + $middle = (0xF0 & $middle) >> 4; + } else { + $middle = 0x0F & $middle; + } + $bytes = Util::read($this->fileHandle, $baseOffset + $index * 4, 3); + list(, $node) = unpack('N', chr($middle) . $bytes); + + return $node; + case 32: + $bytes = Util::read($this->fileHandle, $baseOffset + $index * 4, 4); + list(, $node) = unpack('N', $bytes); + + return $node; + default: + throw new InvalidDatabaseException( + 'Unknown record size: ' + . $this->metadata->recordSize + ); + } + } + + private function resolveDataPointer($pointer) + { + $resolved = $pointer - $this->metadata->nodeCount + + $this->metadata->searchTreeSize; + if ($resolved > $this->fileSize) { + throw new InvalidDatabaseException( + "The MaxMind DB file's search tree is corrupt" + ); + } + + list($data) = $this->decoder->decode($resolved); + + return $data; + } + + /* + * This is an extremely naive but reasonably readable implementation. There + * are much faster algorithms (e.g., Boyer-Moore) for this if speed is ever + * an issue, but I suspect it won't be. + */ + private function findMetadataStart($filename) + { + $handle = $this->fileHandle; + $fstat = fstat($handle); + $fileSize = $fstat['size']; + $marker = self::$METADATA_START_MARKER; + $markerLength = self::$METADATA_START_MARKER_LENGTH; + $metadataMaxLengthExcludingMarker + = min(self::$METADATA_MAX_SIZE, $fileSize) - $markerLength; + + for ($i = 0; $i <= $metadataMaxLengthExcludingMarker; $i++) { + for ($j = 0; $j < $markerLength; $j++) { + fseek($handle, $fileSize - $i - $j - 1); + $matchBit = fgetc($handle); + if ($matchBit !== $marker[$markerLength - $j - 1]) { + continue 2; + } + } + + return $fileSize - $i; + } + throw new InvalidDatabaseException( + "Error opening database file ($filename). " . + 'Is this a valid MaxMind DB file?' + ); + } + + /** + * @throws \InvalidArgumentException if arguments are passed to the method + * @throws \BadMethodCallException if the database has been closed + * + * @return Metadata object for the database + */ + public function metadata() + { + if (func_num_args()) { + throw new \InvalidArgumentException( + 'Method takes no arguments.' + ); + } + + // Not technically required, but this makes it consistent with + // C extension and it allows us to change our implementation later. + if (!is_resource($this->fileHandle)) { + throw new \BadMethodCallException( + 'Attempt to read from a closed MaxMind DB.' + ); + } + + return $this->metadata; + } + + /** + * Closes the MaxMind DB and returns resources to the system. + * + * @throws \Exception + * if an I/O error occurs + */ + public function close() + { + if (!is_resource($this->fileHandle)) { + throw new \BadMethodCallException( + 'Attempt to close a closed MaxMind DB.' + ); + } + fclose($this->fileHandle); + } +} diff --git a/includes/libraries/geolite2/Reader/Decoder.php b/includes/libraries/geolite2/Reader/Decoder.php new file mode 100644 index 00000000000..40ae27e049e --- /dev/null +++ b/includes/libraries/geolite2/Reader/Decoder.php @@ -0,0 +1,311 @@ + 'extended', + 1 => 'pointer', + 2 => 'utf8_string', + 3 => 'double', + 4 => 'bytes', + 5 => 'uint16', + 6 => 'uint32', + 7 => 'map', + 8 => 'int32', + 9 => 'uint64', + 10 => 'uint128', + 11 => 'array', + 12 => 'container', + 13 => 'end_marker', + 14 => 'boolean', + 15 => 'float', + ]; + + public function __construct( + $fileStream, + $pointerBase = 0, + $pointerTestHack = false + ) { + $this->fileStream = $fileStream; + $this->pointerBase = $pointerBase; + $this->pointerTestHack = $pointerTestHack; + + $this->switchByteOrder = $this->isPlatformLittleEndian(); + } + + public function decode($offset) + { + list(, $ctrlByte) = unpack( + 'C', + Util::read($this->fileStream, $offset, 1) + ); + $offset++; + + $type = $this->types[$ctrlByte >> 5]; + + // Pointers are a special case, we don't read the next $size bytes, we + // use the size to determine the length of the pointer and then follow + // it. + if ($type === 'pointer') { + list($pointer, $offset) = $this->decodePointer($ctrlByte, $offset); + + // for unit testing + if ($this->pointerTestHack) { + return [$pointer]; + } + + list($result) = $this->decode($pointer); + + return [$result, $offset]; + } + + if ($type === 'extended') { + list(, $nextByte) = unpack( + 'C', + Util::read($this->fileStream, $offset, 1) + ); + + $typeNum = $nextByte + 7; + + if ($typeNum < 8) { + throw new InvalidDatabaseException( + 'Something went horribly wrong in the decoder. An extended type ' + . 'resolved to a type number < 8 (' + . $this->types[$typeNum] + . ')' + ); + } + + $type = $this->types[$typeNum]; + $offset++; + } + + list($size, $offset) = $this->sizeFromCtrlByte($ctrlByte, $offset); + + return $this->decodeByType($type, $offset, $size); + } + + private function decodeByType($type, $offset, $size) + { + switch ($type) { + case 'map': + return $this->decodeMap($size, $offset); + case 'array': + return $this->decodeArray($size, $offset); + case 'boolean': + return [$this->decodeBoolean($size), $offset]; + } + + $newOffset = $offset + $size; + $bytes = Util::read($this->fileStream, $offset, $size); + switch ($type) { + case 'utf8_string': + return [$this->decodeString($bytes), $newOffset]; + case 'double': + $this->verifySize(8, $size); + + return [$this->decodeDouble($bytes), $newOffset]; + case 'float': + $this->verifySize(4, $size); + + return [$this->decodeFloat($bytes), $newOffset]; + case 'bytes': + return [$bytes, $newOffset]; + case 'uint16': + case 'uint32': + return [$this->decodeUint($bytes), $newOffset]; + case 'int32': + return [$this->decodeInt32($bytes), $newOffset]; + case 'uint64': + case 'uint128': + return [$this->decodeBigUint($bytes, $size), $newOffset]; + default: + throw new InvalidDatabaseException( + 'Unknown or unexpected type: ' . $type + ); + } + } + + private function verifySize($expected, $actual) + { + if ($expected !== $actual) { + throw new InvalidDatabaseException( + "The MaxMind DB file's data section contains bad data (unknown data type or corrupt data)" + ); + } + } + + private function decodeArray($size, $offset) + { + $array = []; + + for ($i = 0; $i < $size; $i++) { + list($value, $offset) = $this->decode($offset); + array_push($array, $value); + } + + return [$array, $offset]; + } + + private function decodeBoolean($size) + { + return $size === 0 ? false : true; + } + + private function decodeDouble($bits) + { + // XXX - Assumes IEEE 754 double on platform + list(, $double) = unpack('d', $this->maybeSwitchByteOrder($bits)); + + return $double; + } + + private function decodeFloat($bits) + { + // XXX - Assumes IEEE 754 floats on platform + list(, $float) = unpack('f', $this->maybeSwitchByteOrder($bits)); + + return $float; + } + + private function decodeInt32($bytes) + { + $bytes = $this->zeroPadLeft($bytes, 4); + list(, $int) = unpack('l', $this->maybeSwitchByteOrder($bytes)); + + return $int; + } + + private function decodeMap($size, $offset) + { + $map = []; + + for ($i = 0; $i < $size; $i++) { + list($key, $offset) = $this->decode($offset); + list($value, $offset) = $this->decode($offset); + $map[$key] = $value; + } + + return [$map, $offset]; + } + + private $pointerValueOffset = [ + 1 => 0, + 2 => 2048, + 3 => 526336, + 4 => 0, + ]; + + private function decodePointer($ctrlByte, $offset) + { + $pointerSize = (($ctrlByte >> 3) & 0x3) + 1; + + $buffer = Util::read($this->fileStream, $offset, $pointerSize); + $offset = $offset + $pointerSize; + + $packed = $pointerSize === 4 + ? $buffer + : (pack('C', $ctrlByte & 0x7)) . $buffer; + + $unpacked = $this->decodeUint($packed); + $pointer = $unpacked + $this->pointerBase + + $this->pointerValueOffset[$pointerSize]; + + return [$pointer, $offset]; + } + + private function decodeUint($bytes) + { + list(, $int) = unpack('N', $this->zeroPadLeft($bytes, 4)); + + return $int; + } + + private function decodeBigUint($bytes, $byteLength) + { + $maxUintBytes = log(PHP_INT_MAX, 2) / 8; + + if ($byteLength === 0) { + return 0; + } + + $numberOfLongs = ceil($byteLength / 4); + $paddedLength = $numberOfLongs * 4; + $paddedBytes = $this->zeroPadLeft($bytes, $paddedLength); + $unpacked = array_merge(unpack("N$numberOfLongs", $paddedBytes)); + + $integer = 0; + + // 2^32 + $twoTo32 = '4294967296'; + + foreach ($unpacked as $part) { + // We only use gmp or bcmath if the final value is too big + if ($byteLength <= $maxUintBytes) { + $integer = ($integer << 32) + $part; + } elseif (extension_loaded('gmp')) { + $integer = gmp_strval(gmp_add(gmp_mul($integer, $twoTo32), $part)); + } elseif (extension_loaded('bcmath')) { + $integer = bcadd(bcmul($integer, $twoTo32), $part); + } else { + throw new \RuntimeException( + 'The gmp or bcmath extension must be installed to read this database.' + ); + } + } + + return $integer; + } + + private function decodeString($bytes) + { + // XXX - NOOP. As far as I know, the end user has to explicitly set the + // encoding in PHP. Strings are just bytes. + return $bytes; + } + + private function sizeFromCtrlByte($ctrlByte, $offset) + { + $size = $ctrlByte & 0x1f; + $bytesToRead = $size < 29 ? 0 : $size - 28; + $bytes = Util::read($this->fileStream, $offset, $bytesToRead); + $decoded = $this->decodeUint($bytes); + + if ($size === 29) { + $size = 29 + $decoded; + } elseif ($size === 30) { + $size = 285 + $decoded; + } elseif ($size > 30) { + $size = ($decoded & (0x0FFFFFFF >> (32 - (8 * $bytesToRead)))) + + 65821; + } + + return [$size, $offset + $bytesToRead]; + } + + private function zeroPadLeft($content, $desiredLength) + { + return str_pad($content, $desiredLength, "\x00", STR_PAD_LEFT); + } + + private function maybeSwitchByteOrder($bytes) + { + return $this->switchByteOrder ? strrev($bytes) : $bytes; + } + + private function isPlatformLittleEndian() + { + $testint = 0x00FF; + $packed = pack('S', $testint); + + return $testint === current(unpack('v', $packed)); + } +} diff --git a/includes/libraries/geolite2/Reader/InvalidDatabaseException.php b/includes/libraries/geolite2/Reader/InvalidDatabaseException.php new file mode 100644 index 00000000000..d2a9a775f28 --- /dev/null +++ b/includes/libraries/geolite2/Reader/InvalidDatabaseException.php @@ -0,0 +1,10 @@ +binaryFormatMajorVersion = + $metadata['binary_format_major_version']; + $this->binaryFormatMinorVersion = + $metadata['binary_format_minor_version']; + $this->buildEpoch = $metadata['build_epoch']; + $this->databaseType = $metadata['database_type']; + $this->languages = $metadata['languages']; + $this->description = $metadata['description']; + $this->ipVersion = $metadata['ip_version']; + $this->nodeCount = $metadata['node_count']; + $this->recordSize = $metadata['record_size']; + $this->nodeByteSize = $this->recordSize / 4; + $this->searchTreeSize = $this->nodeCount * $this->nodeByteSize; + } + + public function __get($var) + { + return $this->$var; + } +} diff --git a/includes/libraries/geolite2/Reader/Util.php b/includes/libraries/geolite2/Reader/Util.php new file mode 100644 index 00000000000..87ebbf133f3 --- /dev/null +++ b/includes/libraries/geolite2/Reader/Util.php @@ -0,0 +1,26 @@ +