From a29399a268a4182c895aaf6da9d9b7584fdff290 Mon Sep 17 00:00:00 2001 From: david-fairbanks42 Date: Mon, 14 Apr 2025 08:20:15 -0400 Subject: [PATCH] Complete rework --- .gitignore | 4 - composer.json | 10 +- composer.lock | 309 +++++++++++++++++++++--------------- {config => data}/.gitignore | 0 main.php | 24 --- src/BaseSource.php | 99 ++++++++++++ src/BotDetect.php | 84 ++++++++++ src/GeoPoint.php | 4 +- src/LogSource.php | 238 --------------------------- src/S3Source.php | 144 +++++++++++++++++ src/UpdateIpDb.php | 68 ++++++++ update.php | 19 +++ worker.php | 27 ++++ 13 files changed, 631 insertions(+), 399 deletions(-) rename {config => data}/.gitignore (100%) delete mode 100755 main.php create mode 100644 src/BaseSource.php create mode 100644 src/BotDetect.php delete mode 100644 src/LogSource.php create mode 100644 src/S3Source.php create mode 100644 src/UpdateIpDb.php create mode 100755 update.php create mode 100755 worker.php diff --git a/.gitignore b/.gitignore index 9dab66a..676c9a0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,3 @@ .env vendor .idea - -IP2LOCATION-LITE-DB5.BIN -LICENSE_LITE.TXT -README_LITE.TXT diff --git a/composer.json b/composer.json index f908ea1..53b7a9f 100644 --- a/composer.json +++ b/composer.json @@ -1,19 +1,21 @@ { - "name": "maker-dave/elastic-push", - "description": "Pull remote log files from S3 and inject new lines into local elasticsearch or analysis", + "name": "maker-dave/opensearch-ingest", + "description": "Pull remote log files from S3 and inject new lines into local OpenSearch or analysis", "type": "project", "version": "1.0", "require": { "php": "^8.1", "ext-curl": "*", + "ext-zip": "*", "aws/aws-sdk-php": "^3.314", "vlucas/phpdotenv": "^5.6", - "ip2location/ip2location-php": "^8.3" + "ip2location/ip2location-php": "^8.3", + "jaybizzle/crawler-detect": "^1.3" }, "license": "private", "autoload": { "psr-4": { - "MakerDave\\ElasticPush\\": "src/" + "MakerDave\\OpenSearchIngest\\": "src/" } }, "authors": [ diff --git a/composer.lock b/composer.lock index 0498200..e76c3b7 100644 --- a/composer.lock +++ b/composer.lock @@ -4,20 +4,20 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "c356f4d6e2b0283facee8041cbc68f4c", + "content-hash": "930e662e8eddf6241fcf8a61ce63bff7", "packages": [ { "name": "aws/aws-crt-php", - "version": "v1.2.5", + "version": "v1.2.7", "source": { "type": "git", "url": "https://github.com/awslabs/aws-crt-php.git", - "reference": "0ea1f04ec5aa9f049f97e012d1ed63b76834a31b" + "reference": "d71d9906c7bb63a28295447ba12e74723bd3730e" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/awslabs/aws-crt-php/zipball/0ea1f04ec5aa9f049f97e012d1ed63b76834a31b", - "reference": "0ea1f04ec5aa9f049f97e012d1ed63b76834a31b", + "url": "https://api.github.com/repos/awslabs/aws-crt-php/zipball/d71d9906c7bb63a28295447ba12e74723bd3730e", + "reference": "d71d9906c7bb63a28295447ba12e74723bd3730e", "shasum": "" }, "require": { @@ -56,22 +56,22 @@ ], "support": { "issues": "https://github.com/awslabs/aws-crt-php/issues", - "source": "https://github.com/awslabs/aws-crt-php/tree/v1.2.5" + "source": "https://github.com/awslabs/aws-crt-php/tree/v1.2.7" }, - "time": "2024-04-19T21:30:56+00:00" + "time": "2024-10-18T22:15:13+00:00" }, { "name": "aws/aws-sdk-php", - "version": "3.314.1", + "version": "3.342.26", "source": { "type": "git", "url": "https://github.com/aws/aws-sdk-php.git", - "reference": "85207a1f342443f40711ac8fe246120821370f11" + "reference": "99f6f8521117534990e608e040b083015465c49c" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/aws/aws-sdk-php/zipball/85207a1f342443f40711ac8fe246120821370f11", - "reference": "85207a1f342443f40711ac8fe246120821370f11", + "url": "https://api.github.com/repos/aws/aws-sdk-php/zipball/99f6f8521117534990e608e040b083015465c49c", + "reference": "99f6f8521117534990e608e040b083015465c49c", "shasum": "" }, "require": { @@ -79,31 +79,30 @@ "ext-json": "*", "ext-pcre": "*", "ext-simplexml": "*", - "guzzlehttp/guzzle": "^6.5.8 || ^7.4.5", - "guzzlehttp/promises": "^1.4.0 || ^2.0", - "guzzlehttp/psr7": "^1.9.1 || ^2.4.5", - "mtdowling/jmespath.php": "^2.6", - "php": ">=7.2.5", - "psr/http-message": "^1.0 || ^2.0" + "guzzlehttp/guzzle": "^7.4.5", + "guzzlehttp/promises": "^2.0", + "guzzlehttp/psr7": "^2.4.5", + "mtdowling/jmespath.php": "^2.8.0", + "php": ">=8.1", + "psr/http-message": "^2.0" }, "require-dev": { "andrewsville/php-token-reflection": "^1.4", "aws/aws-php-sns-message-validator": "~1.0", "behat/behat": "~3.0", - "composer/composer": "^1.10.22", + "composer/composer": "^2.7.8", "dms/phpunit-arraysubset-asserts": "^0.4.0", "doctrine/cache": "~1.4", "ext-dom": "*", "ext-openssl": "*", "ext-pcntl": "*", "ext-sockets": "*", - "nette/neon": "^2.3", - "paragonie/random_compat": ">= 2", "phpunit/phpunit": "^5.6.3 || ^8.5 || ^9.5", - "psr/cache": "^1.0", - "psr/simple-cache": "^1.0", - "sebastian/comparator": "^1.2.3 || ^4.0", - "yoast/phpunit-polyfills": "^1.0" + "psr/cache": "^2.0 || ^3.0", + "psr/simple-cache": "^2.0 || ^3.0", + "sebastian/comparator": "^1.2.3 || ^4.0 || ^5.0", + "symfony/filesystem": "^v6.4.0 || ^v7.1.0", + "yoast/phpunit-polyfills": "^2.0" }, "suggest": { "aws/aws-php-sns-message-validator": "To validate incoming SNS notifications", @@ -124,7 +123,10 @@ ], "psr-4": { "Aws\\": "src/" - } + }, + "exclude-from-classmap": [ + "src/data/" + ] }, "notification-url": "https://packagist.org/downloads/", "license": [ @@ -149,32 +151,32 @@ "sdk" ], "support": { - "forum": "https://forums.aws.amazon.com/forum.jspa?forumID=80", + "forum": "https://github.com/aws/aws-sdk-php/discussions", "issues": "https://github.com/aws/aws-sdk-php/issues", - "source": "https://github.com/aws/aws-sdk-php/tree/3.314.1" + "source": "https://github.com/aws/aws-sdk-php/tree/3.342.26" }, - "time": "2024-06-13T18:07:00+00:00" + "time": "2025-04-11T18:10:41+00:00" }, { "name": "graham-campbell/result-type", - "version": "v1.1.2", + "version": "v1.1.3", "source": { "type": "git", "url": "https://github.com/GrahamCampbell/Result-Type.git", - "reference": "fbd48bce38f73f8a4ec8583362e732e4095e5862" + "reference": "3ba905c11371512af9d9bdd27d99b782216b6945" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/GrahamCampbell/Result-Type/zipball/fbd48bce38f73f8a4ec8583362e732e4095e5862", - "reference": "fbd48bce38f73f8a4ec8583362e732e4095e5862", + "url": "https://api.github.com/repos/GrahamCampbell/Result-Type/zipball/3ba905c11371512af9d9bdd27d99b782216b6945", + "reference": "3ba905c11371512af9d9bdd27d99b782216b6945", "shasum": "" }, "require": { "php": "^7.2.5 || ^8.0", - "phpoption/phpoption": "^1.9.2" + "phpoption/phpoption": "^1.9.3" }, "require-dev": { - "phpunit/phpunit": "^8.5.34 || ^9.6.13 || ^10.4.2" + "phpunit/phpunit": "^8.5.39 || ^9.6.20 || ^10.5.28" }, "type": "library", "autoload": { @@ -203,7 +205,7 @@ ], "support": { "issues": "https://github.com/GrahamCampbell/Result-Type/issues", - "source": "https://github.com/GrahamCampbell/Result-Type/tree/v1.1.2" + "source": "https://github.com/GrahamCampbell/Result-Type/tree/v1.1.3" }, "funding": [ { @@ -215,26 +217,26 @@ "type": "tidelift" } ], - "time": "2023-11-12T22:16:48+00:00" + "time": "2024-07-20T21:45:45+00:00" }, { "name": "guzzlehttp/guzzle", - "version": "7.8.1", + "version": "7.9.3", "source": { "type": "git", "url": "https://github.com/guzzle/guzzle.git", - "reference": "41042bc7ab002487b876a0683fc8dce04ddce104" + "reference": "7b2f29fe81dc4da0ca0ea7d42107a0845946ea77" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/guzzle/guzzle/zipball/41042bc7ab002487b876a0683fc8dce04ddce104", - "reference": "41042bc7ab002487b876a0683fc8dce04ddce104", + "url": "https://api.github.com/repos/guzzle/guzzle/zipball/7b2f29fe81dc4da0ca0ea7d42107a0845946ea77", + "reference": "7b2f29fe81dc4da0ca0ea7d42107a0845946ea77", "shasum": "" }, "require": { "ext-json": "*", - "guzzlehttp/promises": "^1.5.3 || ^2.0.1", - "guzzlehttp/psr7": "^1.9.1 || ^2.5.1", + "guzzlehttp/promises": "^1.5.3 || ^2.0.3", + "guzzlehttp/psr7": "^2.7.0", "php": "^7.2.5 || ^8.0", "psr/http-client": "^1.0", "symfony/deprecation-contracts": "^2.2 || ^3.0" @@ -245,9 +247,9 @@ "require-dev": { "bamarni/composer-bin-plugin": "^1.8.2", "ext-curl": "*", - "php-http/client-integration-tests": "dev-master#2c025848417c1135031fdf9c728ee53d0a7ceaee as 3.0.999", + "guzzle/client-integration-tests": "3.0.2", "php-http/message-factory": "^1.1", - "phpunit/phpunit": "^8.5.36 || ^9.6.15", + "phpunit/phpunit": "^8.5.39 || ^9.6.20", "psr/log": "^1.1 || ^2.0 || ^3.0" }, "suggest": { @@ -325,7 +327,7 @@ ], "support": { "issues": "https://github.com/guzzle/guzzle/issues", - "source": "https://github.com/guzzle/guzzle/tree/7.8.1" + "source": "https://github.com/guzzle/guzzle/tree/7.9.3" }, "funding": [ { @@ -341,20 +343,20 @@ "type": "tidelift" } ], - "time": "2023-12-03T20:35:24+00:00" + "time": "2025-03-27T13:37:11+00:00" }, { "name": "guzzlehttp/promises", - "version": "2.0.2", + "version": "2.2.0", "source": { "type": "git", "url": "https://github.com/guzzle/promises.git", - "reference": "bbff78d96034045e58e13dedd6ad91b5d1253223" + "reference": "7c69f28996b0a6920945dd20b3857e499d9ca96c" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/guzzle/promises/zipball/bbff78d96034045e58e13dedd6ad91b5d1253223", - "reference": "bbff78d96034045e58e13dedd6ad91b5d1253223", + "url": "https://api.github.com/repos/guzzle/promises/zipball/7c69f28996b0a6920945dd20b3857e499d9ca96c", + "reference": "7c69f28996b0a6920945dd20b3857e499d9ca96c", "shasum": "" }, "require": { @@ -362,7 +364,7 @@ }, "require-dev": { "bamarni/composer-bin-plugin": "^1.8.2", - "phpunit/phpunit": "^8.5.36 || ^9.6.15" + "phpunit/phpunit": "^8.5.39 || ^9.6.20" }, "type": "library", "extra": { @@ -408,7 +410,7 @@ ], "support": { "issues": "https://github.com/guzzle/promises/issues", - "source": "https://github.com/guzzle/promises/tree/2.0.2" + "source": "https://github.com/guzzle/promises/tree/2.2.0" }, "funding": [ { @@ -424,20 +426,20 @@ "type": "tidelift" } ], - "time": "2023-12-03T20:19:20+00:00" + "time": "2025-03-27T13:27:01+00:00" }, { "name": "guzzlehttp/psr7", - "version": "2.6.2", + "version": "2.7.1", "source": { "type": "git", "url": "https://github.com/guzzle/psr7.git", - "reference": "45b30f99ac27b5ca93cb4831afe16285f57b8221" + "reference": "c2270caaabe631b3b44c85f99e5a04bbb8060d16" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/guzzle/psr7/zipball/45b30f99ac27b5ca93cb4831afe16285f57b8221", - "reference": "45b30f99ac27b5ca93cb4831afe16285f57b8221", + "url": "https://api.github.com/repos/guzzle/psr7/zipball/c2270caaabe631b3b44c85f99e5a04bbb8060d16", + "reference": "c2270caaabe631b3b44c85f99e5a04bbb8060d16", "shasum": "" }, "require": { @@ -452,8 +454,8 @@ }, "require-dev": { "bamarni/composer-bin-plugin": "^1.8.2", - "http-interop/http-factory-tests": "^0.9", - "phpunit/phpunit": "^8.5.36 || ^9.6.15" + "http-interop/http-factory-tests": "0.9.0", + "phpunit/phpunit": "^8.5.39 || ^9.6.20" }, "suggest": { "laminas/laminas-httphandlerrunner": "Emit PSR-7 responses" @@ -524,7 +526,7 @@ ], "support": { "issues": "https://github.com/guzzle/psr7/issues", - "source": "https://github.com/guzzle/psr7/tree/2.6.2" + "source": "https://github.com/guzzle/psr7/tree/2.7.1" }, "funding": [ { @@ -540,7 +542,7 @@ "type": "tidelift" } ], - "time": "2023-12-03T20:05:35+00:00" + "time": "2025-03-27T12:30:47+00:00" }, { "name": "ip2location/ip2location-php", @@ -587,17 +589,69 @@ "time": "2020-11-23T04:30:39+00:00" }, { - "name": "mtdowling/jmespath.php", - "version": "2.7.0", + "name": "jaybizzle/crawler-detect", + "version": "v1.3.4", "source": { "type": "git", - "url": "https://github.com/jmespath/jmespath.php.git", - "reference": "bbb69a935c2cbb0c03d7f481a238027430f6440b" + "url": "https://github.com/JayBizzle/Crawler-Detect.git", + "reference": "d3b7ff28994e1b0de764ab7412fa269a79634ff3" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/jmespath/jmespath.php/zipball/bbb69a935c2cbb0c03d7f481a238027430f6440b", - "reference": "bbb69a935c2cbb0c03d7f481a238027430f6440b", + "url": "https://api.github.com/repos/JayBizzle/Crawler-Detect/zipball/d3b7ff28994e1b0de764ab7412fa269a79634ff3", + "reference": "d3b7ff28994e1b0de764ab7412fa269a79634ff3", + "shasum": "" + }, + "require": { + "php": ">=7.1.0" + }, + "require-dev": { + "phpunit/phpunit": "^4.8|^5.5|^6.5|^9.4" + }, + "type": "library", + "autoload": { + "psr-4": { + "Jaybizzle\\CrawlerDetect\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Mark Beech", + "email": "m@rkbee.ch", + "role": "Developer" + } + ], + "description": "CrawlerDetect is a PHP class for detecting bots/crawlers/spiders via the user agent", + "homepage": "https://github.com/JayBizzle/Crawler-Detect/", + "keywords": [ + "crawler", + "crawler detect", + "crawler detector", + "crawlerdetect", + "php crawler detect" + ], + "support": { + "issues": "https://github.com/JayBizzle/Crawler-Detect/issues", + "source": "https://github.com/JayBizzle/Crawler-Detect/tree/v1.3.4" + }, + "time": "2025-03-05T23:12:10+00:00" + }, + { + "name": "mtdowling/jmespath.php", + "version": "2.8.0", + "source": { + "type": "git", + "url": "https://github.com/jmespath/jmespath.php.git", + "reference": "a2a865e05d5f420b50cc2f85bb78d565db12a6bc" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/jmespath/jmespath.php/zipball/a2a865e05d5f420b50cc2f85bb78d565db12a6bc", + "reference": "a2a865e05d5f420b50cc2f85bb78d565db12a6bc", "shasum": "" }, "require": { @@ -614,7 +668,7 @@ "type": "library", "extra": { "branch-alias": { - "dev-master": "2.7-dev" + "dev-master": "2.8-dev" } }, "autoload": { @@ -648,22 +702,22 @@ ], "support": { "issues": "https://github.com/jmespath/jmespath.php/issues", - "source": "https://github.com/jmespath/jmespath.php/tree/2.7.0" + "source": "https://github.com/jmespath/jmespath.php/tree/2.8.0" }, - "time": "2023-08-25T10:54:48+00:00" + "time": "2024-09-04T18:46:31+00:00" }, { "name": "phpoption/phpoption", - "version": "1.9.2", + "version": "1.9.3", "source": { "type": "git", "url": "https://github.com/schmittjoh/php-option.git", - "reference": "80735db690fe4fc5c76dfa7f9b770634285fa820" + "reference": "e3fac8b24f56113f7cb96af14958c0dd16330f54" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/schmittjoh/php-option/zipball/80735db690fe4fc5c76dfa7f9b770634285fa820", - "reference": "80735db690fe4fc5c76dfa7f9b770634285fa820", + "url": "https://api.github.com/repos/schmittjoh/php-option/zipball/e3fac8b24f56113f7cb96af14958c0dd16330f54", + "reference": "e3fac8b24f56113f7cb96af14958c0dd16330f54", "shasum": "" }, "require": { @@ -671,13 +725,13 @@ }, "require-dev": { "bamarni/composer-bin-plugin": "^1.8.2", - "phpunit/phpunit": "^8.5.34 || ^9.6.13 || ^10.4.2" + "phpunit/phpunit": "^8.5.39 || ^9.6.20 || ^10.5.28" }, "type": "library", "extra": { "bamarni-bin": { "bin-links": true, - "forward-command": true + "forward-command": false }, "branch-alias": { "dev-master": "1.9-dev" @@ -713,7 +767,7 @@ ], "support": { "issues": "https://github.com/schmittjoh/php-option/issues", - "source": "https://github.com/schmittjoh/php-option/tree/1.9.2" + "source": "https://github.com/schmittjoh/php-option/tree/1.9.3" }, "funding": [ { @@ -725,7 +779,7 @@ "type": "tidelift" } ], - "time": "2023-11-12T21:59:55+00:00" + "time": "2024-07-20T21:41:07+00:00" }, { "name": "psr/http-client", @@ -933,16 +987,16 @@ }, { "name": "symfony/deprecation-contracts", - "version": "v3.5.0", + "version": "v3.5.1", "source": { "type": "git", "url": "https://github.com/symfony/deprecation-contracts.git", - "reference": "0e0d29ce1f20deffb4ab1b016a7257c4f1e789a1" + "reference": "74c71c939a79f7d5bf3c1ce9f5ea37ba0114c6f6" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/deprecation-contracts/zipball/0e0d29ce1f20deffb4ab1b016a7257c4f1e789a1", - "reference": "0e0d29ce1f20deffb4ab1b016a7257c4f1e789a1", + "url": "https://api.github.com/repos/symfony/deprecation-contracts/zipball/74c71c939a79f7d5bf3c1ce9f5ea37ba0114c6f6", + "reference": "74c71c939a79f7d5bf3c1ce9f5ea37ba0114c6f6", "shasum": "" }, "require": { @@ -950,12 +1004,12 @@ }, "type": "library", "extra": { + "thanks": { + "url": "https://github.com/symfony/contracts", + "name": "symfony/contracts" + }, "branch-alias": { "dev-main": "3.5-dev" - }, - "thanks": { - "name": "symfony/contracts", - "url": "https://github.com/symfony/contracts" } }, "autoload": { @@ -980,7 +1034,7 @@ "description": "A generic function and convention to trigger deprecation notices", "homepage": "https://symfony.com", "support": { - "source": "https://github.com/symfony/deprecation-contracts/tree/v3.5.0" + "source": "https://github.com/symfony/deprecation-contracts/tree/v3.5.1" }, "funding": [ { @@ -996,24 +1050,24 @@ "type": "tidelift" } ], - "time": "2024-04-18T09:32:20+00:00" + "time": "2024-09-25T14:20:29+00:00" }, { "name": "symfony/polyfill-ctype", - "version": "v1.29.0", + "version": "v1.31.0", "source": { "type": "git", "url": "https://github.com/symfony/polyfill-ctype.git", - "reference": "ef4d7e442ca910c4764bce785146269b30cb5fc4" + "reference": "a3cc8b044a6ea513310cbd48ef7333b384945638" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/polyfill-ctype/zipball/ef4d7e442ca910c4764bce785146269b30cb5fc4", - "reference": "ef4d7e442ca910c4764bce785146269b30cb5fc4", + "url": "https://api.github.com/repos/symfony/polyfill-ctype/zipball/a3cc8b044a6ea513310cbd48ef7333b384945638", + "reference": "a3cc8b044a6ea513310cbd48ef7333b384945638", "shasum": "" }, "require": { - "php": ">=7.1" + "php": ">=7.2" }, "provide": { "ext-ctype": "*" @@ -1024,8 +1078,8 @@ "type": "library", "extra": { "thanks": { - "name": "symfony/polyfill", - "url": "https://github.com/symfony/polyfill" + "url": "https://github.com/symfony/polyfill", + "name": "symfony/polyfill" } }, "autoload": { @@ -1059,7 +1113,7 @@ "portable" ], "support": { - "source": "https://github.com/symfony/polyfill-ctype/tree/v1.29.0" + "source": "https://github.com/symfony/polyfill-ctype/tree/v1.31.0" }, "funding": [ { @@ -1075,24 +1129,24 @@ "type": "tidelift" } ], - "time": "2024-01-29T20:11:03+00:00" + "time": "2024-09-09T11:45:10+00:00" }, { "name": "symfony/polyfill-mbstring", - "version": "v1.29.0", + "version": "v1.31.0", "source": { "type": "git", "url": "https://github.com/symfony/polyfill-mbstring.git", - "reference": "9773676c8a1bb1f8d4340a62efe641cf76eda7ec" + "reference": "85181ba99b2345b0ef10ce42ecac37612d9fd341" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/polyfill-mbstring/zipball/9773676c8a1bb1f8d4340a62efe641cf76eda7ec", - "reference": "9773676c8a1bb1f8d4340a62efe641cf76eda7ec", + "url": "https://api.github.com/repos/symfony/polyfill-mbstring/zipball/85181ba99b2345b0ef10ce42ecac37612d9fd341", + "reference": "85181ba99b2345b0ef10ce42ecac37612d9fd341", "shasum": "" }, "require": { - "php": ">=7.1" + "php": ">=7.2" }, "provide": { "ext-mbstring": "*" @@ -1103,8 +1157,8 @@ "type": "library", "extra": { "thanks": { - "name": "symfony/polyfill", - "url": "https://github.com/symfony/polyfill" + "url": "https://github.com/symfony/polyfill", + "name": "symfony/polyfill" } }, "autoload": { @@ -1139,7 +1193,7 @@ "shim" ], "support": { - "source": "https://github.com/symfony/polyfill-mbstring/tree/v1.29.0" + "source": "https://github.com/symfony/polyfill-mbstring/tree/v1.31.0" }, "funding": [ { @@ -1155,30 +1209,30 @@ "type": "tidelift" } ], - "time": "2024-01-29T20:11:03+00:00" + "time": "2024-09-09T11:45:10+00:00" }, { "name": "symfony/polyfill-php80", - "version": "v1.29.0", + "version": "v1.31.0", "source": { "type": "git", "url": "https://github.com/symfony/polyfill-php80.git", - "reference": "87b68208d5c1188808dd7839ee1e6c8ec3b02f1b" + "reference": "60328e362d4c2c802a54fcbf04f9d3fb892b4cf8" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/polyfill-php80/zipball/87b68208d5c1188808dd7839ee1e6c8ec3b02f1b", - "reference": "87b68208d5c1188808dd7839ee1e6c8ec3b02f1b", + "url": "https://api.github.com/repos/symfony/polyfill-php80/zipball/60328e362d4c2c802a54fcbf04f9d3fb892b4cf8", + "reference": "60328e362d4c2c802a54fcbf04f9d3fb892b4cf8", "shasum": "" }, "require": { - "php": ">=7.1" + "php": ">=7.2" }, "type": "library", "extra": { "thanks": { - "name": "symfony/polyfill", - "url": "https://github.com/symfony/polyfill" + "url": "https://github.com/symfony/polyfill", + "name": "symfony/polyfill" } }, "autoload": { @@ -1219,7 +1273,7 @@ "shim" ], "support": { - "source": "https://github.com/symfony/polyfill-php80/tree/v1.29.0" + "source": "https://github.com/symfony/polyfill-php80/tree/v1.31.0" }, "funding": [ { @@ -1235,27 +1289,27 @@ "type": "tidelift" } ], - "time": "2024-01-29T20:11:03+00:00" + "time": "2024-09-09T11:45:10+00:00" }, { "name": "vlucas/phpdotenv", - "version": "v5.6.0", + "version": "v5.6.1", "source": { "type": "git", "url": "https://github.com/vlucas/phpdotenv.git", - "reference": "2cf9fb6054c2bb1d59d1f3817706ecdb9d2934c4" + "reference": "a59a13791077fe3d44f90e7133eb68e7d22eaff2" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/vlucas/phpdotenv/zipball/2cf9fb6054c2bb1d59d1f3817706ecdb9d2934c4", - "reference": "2cf9fb6054c2bb1d59d1f3817706ecdb9d2934c4", + "url": "https://api.github.com/repos/vlucas/phpdotenv/zipball/a59a13791077fe3d44f90e7133eb68e7d22eaff2", + "reference": "a59a13791077fe3d44f90e7133eb68e7d22eaff2", "shasum": "" }, "require": { "ext-pcre": "*", - "graham-campbell/result-type": "^1.1.2", + "graham-campbell/result-type": "^1.1.3", "php": "^7.2.5 || ^8.0", - "phpoption/phpoption": "^1.9.2", + "phpoption/phpoption": "^1.9.3", "symfony/polyfill-ctype": "^1.24", "symfony/polyfill-mbstring": "^1.24", "symfony/polyfill-php80": "^1.24" @@ -1272,7 +1326,7 @@ "extra": { "bamarni-bin": { "bin-links": true, - "forward-command": true + "forward-command": false }, "branch-alias": { "dev-master": "5.6-dev" @@ -1307,7 +1361,7 @@ ], "support": { "issues": "https://github.com/vlucas/phpdotenv/issues", - "source": "https://github.com/vlucas/phpdotenv/tree/v5.6.0" + "source": "https://github.com/vlucas/phpdotenv/tree/v5.6.1" }, "funding": [ { @@ -1319,19 +1373,20 @@ "type": "tidelift" } ], - "time": "2023-11-12T22:43:29+00:00" + "time": "2024-07-20T21:52:34+00:00" } ], "packages-dev": [], "aliases": [], "minimum-stability": "stable", - "stability-flags": [], + "stability-flags": {}, "prefer-stable": false, "prefer-lowest": false, "platform": { "php": "^8.1", - "ext-curl": "*" + "ext-curl": "*", + "ext-zip": "*" }, - "platform-dev": [], + "platform-dev": {}, "plugin-api-version": "2.6.0" } diff --git a/config/.gitignore b/data/.gitignore similarity index 100% rename from config/.gitignore rename to data/.gitignore diff --git a/main.php b/main.php deleted file mode 100755 index e1b266a..0000000 --- a/main.php +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env php -safeLoad(); - -foreach (new DirectoryIterator(__DIR__ . '/config') as $file) { - if ($file->isDot() || $file->isDir()) { - continue; - } - if ($file->getExtension() !== 'json') { - continue; - } - - $worker = new LogSource($file->getPathname()); - $worker->work(); -} diff --git a/src/BaseSource.php b/src/BaseSource.php new file mode 100644 index 0000000..0ec72f3 --- /dev/null +++ b/src/BaseSource.php @@ -0,0 +1,99 @@ + + * @version 1.0 + */ +abstract class BaseSource +{ + protected const BULK_SIZE = 50; + + protected array $events; + protected int $sendCount = 0; + + protected function addEvent(array $data, string $ingestIndex, DateTime $date = null, string $id = null): void + { + if ($date === null) { + $date = date_create($data['time']); + $date->setTimezone(new DateTimeZone('UTC')); + } + if ($id === null) { + $id = hash('sha1', json_encode($data)); + } + + $data['@timestamp'] = date_format($date, 'c'); + $data['time'] = date_format($date, 'c'); + + if (! empty($data['remoteIp'])) { + $location = GeoPoint::fromIp($data['remoteIp']); + if ($location) { + $data['location'] = $location; + } + } + if (! empty($data['userAgent'])) { + $data['isBot'] = BotDetect::fromAgentString($data['userAgent']); + } + + $this->events[] = ['index' => $ingestIndex, 'hash' => $id, 'date' => $data['time'], 'line' => json_encode($data)]; + } + + protected function send(): void + { + $this->sendOpenSearch(); + } + + protected function sendOpenSearch(): void + { + $body = ''; + foreach ($this->events as $item) { + $body .= json_encode(['create' => ['_index' => $item['index'], '_id' => $item['hash']]]) . "\n"; + $body .= $item['line'] . "\n"; + } + $this->events = []; + + $curl = curl_init(); + curl_setopt_array($curl, [ + CURLOPT_URL => $_ENV['OPENSEARCH_HOST'] . '/_bulk', + CURLOPT_HTTPHEADER => [ + 'Content-Type: application/json', + 'Authorization: Basic ' . base64_encode($_ENV['OPENSEARCH_USER'] . ':' . $_ENV['OPENSEARCH_PASS']), + ], + //CURLOPT_USERNAME => $_ENV['OPENSEARCH_USER'], + //CURLOPT_USERPWD => $_ENV['OPENSEARCH_PASS'], + CURLOPT_RETURNTRANSFER => true, + CURLOPT_SSL_VERIFYPEER => false, + CURLOPT_SSL_VERIFYHOST => false, + CURLOPT_USERAGENT => 'MakerDave Log Sender 1.0', + CURLOPT_POST => true, + CURLOPT_POSTFIELDS => $body, + //CURLOPT_VERBOSE => true, + ]); + + $resp = curl_exec($curl); + $code = curl_getinfo($curl, CURLINFO_HTTP_CODE); + curl_close($curl); + + $response = json_decode($resp, true); + if (json_last_error() !== JSON_ERROR_NONE) { + echo "Error decoding response ($code):\n$resp\n"; + exit(0); + } + if ($response['errors']) { + echo "Errors: " . $resp . "\n"; + } + } +} \ No newline at end of file diff --git a/src/BotDetect.php b/src/BotDetect.php new file mode 100644 index 0000000..62448f3 --- /dev/null +++ b/src/BotDetect.php @@ -0,0 +1,84 @@ + + * @version 1.0 + */ +class BotDetect +{ + protected static CrawlerDetect $crawlerDetect; + protected static array $cache = []; + + public static function fromAgentString(string $ua): bool + { + if (isset(self::$cache[$ua])) { + return self::$cache[$ua]; + } + + foreach ([ + 'Googlebot', + 'bingbot', + 'MJ12bot', + 'Barkrowler', + 'Bytespider', + 'DotBot', + 'DataForSeoBot', + 'SemrushBot', + 'BLEXBot', + 'AhrefsBot', + 'CensysInspect', + 'PetalBot', + 'Applebot', + 'Pinterestbot', + 'oBot', + 'SearchBot', + 'GPTBot', + 'PerplexityBot', + 'Amazonbot', + 'DuckDuckGo-Favicons-Bot', + 'SeznamBot', + 'zoominfobot', + 'webprosbot', + 'InternetMeasurement', + 'compatible; BW/', + 'Pandalytics', + 'Photon/', + ' curl/', + ' libcurl/', + 'facebookexternalhit', + 'Amazon-Route53-Health-Check-Service', + ] as $str) { + if (str_contains($ua, $str)) { + self::$cache[$ua] = true; + + return true; + } + } + + self::ensureCrawlerDetect(); + self::$cache[$ua] = self::$crawlerDetect->isCrawler($ua); + + return self::$cache[$ua]; + } + + protected static function ensureCrawlerDetect(): void + { + if (isset(self::$crawlerDetect)) { + return; + } + + self::$crawlerDetect = new CrawlerDetect(); + } +} \ No newline at end of file diff --git a/src/GeoPoint.php b/src/GeoPoint.php index 3adee5f..abcb9f7 100644 --- a/src/GeoPoint.php +++ b/src/GeoPoint.php @@ -6,7 +6,7 @@ * @license Proprietary */ -namespace MakerDave\ElasticPush; +namespace MakerDave\OpenSearchIngest; use IP2Location\Database as Ip2LocationDb; @@ -58,7 +58,7 @@ class GeoPoint } try { - self::$ip2Location = new Ip2LocationDb(__DIR__ . '/../IP2LOCATION-LITE-DB5.BIN'); + self::$ip2Location = new Ip2LocationDb(__DIR__ . '/../data/IP2LOCATION-LITE-DB5.BIN'); } catch (\Exception $e) { } diff --git a/src/LogSource.php b/src/LogSource.php deleted file mode 100644 index 56d25ef..0000000 --- a/src/LogSource.php +++ /dev/null @@ -1,238 +0,0 @@ - - * @version 1.0 - */ -class LogSource -{ - protected const BULK_SIZE = 50; - - protected object $config; - protected array $files; - protected DateTime|null $lastDate = null; - protected string|null $lastHash = null; - protected bool $lastFound = false; - protected array $events; - protected int $skipCount = 0; - protected int $sendCount = 0; - - protected S3Client $client; - - /** - * @param string $configPath - * @throws Exception - */ - public function __construct( - protected string $configPath - ) { - $config = json_decode(file_get_contents($configPath)); - if (!is_object($config)) { - throw new Exception('Unable to parse log configuration file: ' . $configPath); - } - $this->config = $config; - - if (isset($this->config->file) && is_string($this->config->file)) { - $this->files = [$this->config->file]; - } elseif (isset($this->config->file) && is_array($this->config->file)) { - $this->files = $this->config->file; - } elseif (isset($this->config->files) && is_string($this->config->files)) { - $this->files = [$this->config->files]; - } else { - $this->files = $this->config->files; - } - - if (count($this->files) > 1) { - $this->files = array_unique($this->files); - rsort($this->files); - } - - if ($this->config->date !== null) { - $lastDate = date_create($this->config->date); - if ($lastDate === false) { - throw new Exception('Unable to parse last date: ' . $configPath); - } - $lastDate->setTimezone(new DateTimeZone('UTC')); - $this->lastDate = $lastDate; - } - if ($this->config->hash !== null) { - $this->lastHash = $this->config->hash; - } else { - $this->lastFound = true; - } - - $this->client = new S3Client([ - 'profile' => 'david-fairbanks42', - 'version' => 'latest', - 'region' => 'us-east-1', - ]); - } - - public function work(): void - { - foreach ($this->files as $file) { - if ($this->getFile($file) === false) { - continue; - } - $this->processFile($file); - unlink($_ENV['TEMP_LOG_STORE_DIR'] . DIRECTORY_SEPARATOR . $file); - } - - if (! empty($this->events)) { - $this->send(); - } - - echo sprintf( - "%s\n\tSkipped %d log entries\n\tSent %d log entries\n", - basename($this->configPath), - $this->skipCount, - $this->sendCount - ); - - if ($this->config->hash !== null) { - file_put_contents($this->configPath, json_encode($this->config)); - } - } - - protected function getFile(string $file): bool - { - if (! file_exists($_ENV['TEMP_LOG_STORE_DIR'])) { - mkdir($_ENV['TEMP_LOG_STORE_DIR']); - } - if (! is_dir($_ENV['TEMP_LOG_STORE_DIR'])) { - throw new Exception('Target directory ' . $_ENV['TEMP_LOG_STORE_DIR'] . ' exists but is not a directory'); - } - - try { - $this->client->getObject([ - 'Bucket' => 'fairbanks-publishing-cloudtrail', - 'Key' => 'app-logs/' . $file, - 'SaveAs' => $_ENV['TEMP_LOG_STORE_DIR'] . DIRECTORY_SEPARATOR . $file, - ]); - return true; - } catch (AwsException $e) { - echo sprintf('ERROR: Unable get log file %s: %s', $file, $e->getMessage()); - - return false; - } - } - - protected function processFile(string $file): void - { - $fh = fopen($_ENV['TEMP_LOG_STORE_DIR'] . DIRECTORY_SEPARATOR . $file, 'r'); - - $lineNumber = 0; - while (!feof($fh)) { - $lineNumber ++; - $line = trim(fgets($fh)); - if (empty($line)) { - continue; - } - - $data = json_decode($line, true); - if (! is_array($data)) { - continue; - } - $date = date_create($data['time']); - if ($date === false) { - continue; - } - $date->setTimezone(new DateTimeZone('UTC')); - - if ($this->lastDate !== null && $date < $this->lastDate) { - $this->skipCount ++; - continue; - } - - $hash = hash('sha1', '(' . $lineNumber . ') ' . $line); - if ($this->lastFound === false && $this->lastDate !== null && $date == $this->lastDate && $hash == $this->lastHash) { - $this->lastFound = true; - $this->skipCount ++; - continue; - } - - if ($this->lastFound === false && $this->lastDate !== null && $date > $this->lastDate) { - $this->lastFound = true; - } - - if ($this->lastFound === false) { - continue; - } - - $data['@timestamp'] = date_format($date, 'c'); - $data['time'] = date_format($date, 'c'); - - if (! empty($data['remoteIp'])) { - $location = GeoPoint::fromIp($data['remoteIp']); - if ($location) { - $data['location'] = $location; - } - } - - $this->events[] = ['hash' => $hash, 'date' => $data['time'], 'line' => json_encode($data)]; - $this->sendCount ++; - - $this->config->date = $data['time']; - $this->config->hash = $hash; - - if (count($this->events) == self::BULK_SIZE) { - $this->send(); - $this->events = []; - } - } - fclose($fh); - } - - protected function send(): void - { - $body = ''; - foreach ($this->events as $item) { - $body .= json_encode(['create' => ['_index' => $this->config->index, '_id' => $item['hash']]]) . "\n"; - $body .= $item['line'] . "\n"; - } - - $curl = curl_init(); - curl_setopt_array($curl, [ - CURLOPT_URL => $_ENV['ELASTICSEARCH_HOST'] . '/_bulk', - CURLOPT_HTTPHEADER => [ - 'Content-Type: application/json', - //'Authorization: ApiKey ' . $_ENV['ELASTICSEARCH_API_KEY'], - ], - CURLOPT_RETURNTRANSFER => true, - CURLOPT_SSL_VERIFYPEER => false, - CURLOPT_SSL_VERIFYHOST => false, - CURLOPT_USERAGENT => 'MakerDave Log Sender 1.0', - CURLOPT_POST => true, - CURLOPT_POSTFIELDS => $body, - //CURLOPT_VERBOSE => true, - ]); - - $resp = curl_exec($curl); - curl_close($curl); - - $response = json_decode($resp, true); - if (json_last_error() !== JSON_ERROR_NONE) { - echo "Error decoding response:\n$resp\n"; - exit(0); - } - if ($response['errors']) { - echo $resp . "\n"; - } - } -} \ No newline at end of file diff --git a/src/S3Source.php b/src/S3Source.php new file mode 100644 index 0000000..b38b873 --- /dev/null +++ b/src/S3Source.php @@ -0,0 +1,144 @@ + + * @version 1.0 + */ +class S3Source extends BaseSource +{ + protected const MIN_FILE_AGE_SECONDS = 30; + + protected S3Client $client; + + public function __construct() + { + $this->client = new S3Client([ + 'credentials' => [ + 'key' => $_ENV['AWS_ACCESS_KEY'], + 'secret' => $_ENV['AWS_SECRET_KEY'], + ], + 'version' => 'latest', + 'region' => $_ENV['AWS_DEFAULT_REGION'], + ]); + } + + public function process(): void + { + $list = $this->client->listObjectsV2([ + 'Bucket' => $_ENV['S3_BUCKET'], + 'Prefix' => $_ENV['S3_PREFIX'], + ]); + + $now = new DateTime(); + $now->setTimezone(new DateTimeZone('UTC')); + + $count = 0; + if (! empty($list['Contents'])) { + foreach ($list['Contents'] as $object) { + if ($this->isFileTooNew($object['LastModified'], $now)) { + echo "Skipping for age: {$object['Key']}\n"; + continue; + } + echo "Working: {$object['Key']}\n"; + $ingestIndex = $this->getIngestIndex($object['Key']); + $contents = $this->getContents($object['Key']); + $this->workContents($contents, $ingestIndex, $object['Key']); + $this->deleteFile($object['Key']); + $count++; + } + } + + if (! empty($this->events)) { + $this->send(); + } + + echo sprintf( + "Sent %d log entries from %d files (v %s)\n", + $this->sendCount, + $count, + $_ENV['BUILD_VERSION'] + ); + } + + protected function isFileTooNew($lastModified, DateTime $now): bool + { + $diff = $now->getTimestamp() - $lastModified->getTimestamp(); + + return $diff < self::MIN_FILE_AGE_SECONDS; + } + + protected function getContents(string $key): string + { + $response = $this->client->getObject([ + 'Bucket' => $_ENV['S3_BUCKET'], + 'Key' => $key, + ]); + + return $response['Body']->getContents(); + } + + protected function getIngestIndex(string $key): string + { + // [Key] => fluent-bit-logs/sys/store-20240703-1328-wjkeWc37 + [, $tag,] = explode('/', $key, 3); + + return match($tag) { + 'sys' => 'system-health', + 'access' => 'access-logs', + default => throw new Exception('Invalid ingest index: ' . $key), + }; + } + + protected function workContents(string $contents, string $ingestIndex, string $key): void + { + foreach (explode("\n", $contents) as $index => $line) { + $line = trim($line); + if (empty($line)) { + continue; + } + $data = json_decode($line, true); + if (json_last_error() !== JSON_ERROR_NONE) { + echo "Error decoding line $index of $key\n"; + echo "\t$line\n"; + continue; + } + if (! is_array($data) || empty($data)) { + echo "Invalid line $index of $key\n"; + echo "\t$line\n"; + continue; + } + + unset($data['date']); // date field is created by fluent-bit + + $this->addEvent($data, $ingestIndex); + $this->sendCount ++; + + if (count($this->events) == self::BULK_SIZE) { + $this->send(); + } + } + } + + protected function deleteFile(string $key): void + { + $this->client->deleteObject([ + 'Bucket' => $_ENV['S3_BUCKET'], + 'Key' => $key, + ]); + } +} \ No newline at end of file diff --git a/src/UpdateIpDb.php b/src/UpdateIpDb.php new file mode 100644 index 0000000..6f940e4 --- /dev/null +++ b/src/UpdateIpDb.php @@ -0,0 +1,68 @@ + + * @version 1.0 + */ +class UpdateIpDb +{ + public function update(string $dir) + { + $url = sprintf( + 'https://www.ip2location.com/download/?token=%s&file=%s', + $_ENV['IP2LOCATION_TOKEN'], + 'DB5LITEBIN' + ); + $name = 'IP2LOCATION-LITE-DB5.BIN.ZIP'; + $zipPath = $dir . DIRECTORY_SEPARATOR . 'data' . DIRECTORY_SEPARATOR . $name; + $extractPath = $dir . DIRECTORY_SEPARATOR . 'data' . DIRECTORY_SEPARATOR . 'ip2location-update'; + + if (file_exists($zipPath)) { + unlink($zipPath); + } + + $fh = fopen($url, 'r'); + $size = file_put_contents($zipPath, $fh); + fclose($fh); + + if ($size == 0) { + throw new Exception('Error downloading update'); + } + + $zip = new ZipArchive(); + $r = $zip->open($zipPath); + if ($r === true) { + $zip->extractTo($extractPath); + $zip->close(); + } else { + throw new Exception('Error opening zip archive'); + } + + foreach (new DirectoryIterator($extractPath) as $file) { + if ($file->isFile() && ! $file->isDot()) { + if (file_exists($dir . DIRECTORY_SEPARATOR . 'data' . DIRECTORY_SEPARATOR . $file->getFilename())) { + unlink($dir . DIRECTORY_SEPARATOR . 'data' . DIRECTORY_SEPARATOR . $file->getFilename()); + } + + rename($file->getPathname(), $dir . DIRECTORY_SEPARATOR . 'data' . DIRECTORY_SEPARATOR . $file->getFilename()); + } + } + + rmdir($extractPath); + unlink($zipPath); + } +} \ No newline at end of file diff --git a/update.php b/update.php new file mode 100755 index 0000000..e55c1b0 --- /dev/null +++ b/update.php @@ -0,0 +1,19 @@ +#!/usr/bin/env php +safeLoad(); + +$updater = new UpdateIpDb(); +$updater->update(__DIR__); diff --git a/worker.php b/worker.php new file mode 100755 index 0000000..341d9e2 --- /dev/null +++ b/worker.php @@ -0,0 +1,27 @@ +#!/usr/bin/env php +safeLoad(); + +$source = new S3Source(); +$source->process();