Browse Source

FEATURE: Screenshot whole website into sub folder

feature/kickstart
Daniel Siepmann 4 years ago
parent
commit
5d2e8a934d
Signed by: Daniel Siepmann
GPG Key ID: 33D6629915560EF4
  1. 3
      .gitignore
  2. 11
      comparison
  3. 23
      composer.json
  4. 101
      src/Command/CreateBaseCommand.php
  5. 60
      src/Model/UrlListDto.php
  6. 207
      src/Service/ScreenshotCrawlerService.php

3
.gitignore vendored

@ -0,0 +1,3 @@
/vendor/
/composer.lock
/output/

11
comparison

@ -0,0 +1,11 @@
#!/usr/bin/env php
<?php
require __DIR__ . '/vendor/autoload.php';
use Codappix\WebsiteComparison\Command\CreateBaseCommand;
use Symfony\Component\Console\Application;
$application = new Application();
$application->add(new CreateBaseCommand());
$application->run();

23
composer.json

@ -0,0 +1,23 @@
{
"name": "codappix/website-comparison",
"description": "Compares a Website visually by comparing Screenshots.",
"type": "project",
"license": "GPL-2.0-or-later",
"authors": [
{
"name": "Daniel Siepmann",
"email": "coding@daniel-siepmann.de"
}
],
"autoload": {
"psr-4": {
"Codappix\\WebsiteComparison\\": "src/"
}
},
"require": {
"facebook/webdriver": "^1.6",
"symfony/console": "^4.1",
"symfony/process": "^4.1",
"guzzlehttp/psr7": "^1.4"
}
}

101
src/Command/CreateBaseCommand.php

@ -0,0 +1,101 @@
<?php
namespace Codappix\WebsiteComparison\Command;
/*
* Copyright (C) 2018 Daniel Siepmann <coding@daniel-siepmann.de>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
use Codappix\WebsiteComparison\Service\ScreenshotCrawlerService;
use Facebook\WebDriver\Chrome\ChromeDriver;
use Facebook\WebDriver\Chrome\ChromeDriverService;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Process\Exception\ProcessFailedException;
use Symfony\Component\Process\Process;
/**
*
*/
class CreateBaseCommand extends Command
{
/**
* @var Process
*/
protected $chromeProcess;
protected function configure()
{
$this
->setName('comparison:createbase')
->setDescription('Creates the base for comparison.')
->setHelp('Crawls and screenshots the original website, as a base for future comparison.')
->addOption(
'screenshotDir',
null,
InputOption::VALUE_OPTIONAL,
'Define the sub directory to use for storing created Screenshots.',
'output'
)
->addOption(
'screenshotWidth',
null,
InputOption::VALUE_OPTIONAL,
'The width for screen resolution and screenshots.',
3840
)
->addArgument(
'baseUrl',
InputArgument::REQUIRED,
'E.g. https://typo3.org/ the base url of the website to crawl.'
)
;
}
protected function execute(InputInterface $input, OutputInterface $output)
{
$screenshotCrawler = new ScreenshotCrawlerService(
$output,
$this->getDriver(),
$input->getArgument('baseUrl'),
$input->getOption('screenshotDir'),
$input->getOption('screenshotWidth')
);
$screenshotCrawler->crawl();
}
protected function getDriver(): ChromeDriver
{
$chromeDriverService = new ChromeDriverService(
'/usr/lib/chromium-browser/chromedriver',
9515,
[
'--port=9515',
'--headless',
]
);
$driver = ChromeDriver::start(null, $chromeDriverService);
return $driver;
}
}

60
src/Model/UrlListDto.php

@ -0,0 +1,60 @@
<?php
namespace Codappix\WebsiteComparison\Model;
/*
* Copyright (C) 2018 Daniel Siepmann <coding@daniel-siepmann.de>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
/**
*
*/
class UrlListDto
{
protected $finishedUrls = [];
protected $upcomingUrls = [];
public function addUrl(string $link)
{
if ($this->isUrlKnown($link)) {
return;
}
$this->upcomingUrls[] = $link;
}
public function getNextUrl(): string
{
return reset($this->upcomingUrls) ?? '';
}
public function markUrlAsFinished(string $link)
{
$upcomingEntry = array_search($link, $this->upcomingUrls);
unset($this->upcomingUrls[$upcomingEntry]);
$this->finishedUrls[] = $link;
}
public function isUrlKnown(string $link): bool
{
return in_array($link, $this->finishedUrls) || in_array($link, $this->upcomingUrls);
}
}

207
src/Service/ScreenshotCrawlerService.php

@ -0,0 +1,207 @@
<?php
namespace Codappix\WebsiteComparison\Service;
/*
* Copyright (C) 2018 Daniel Siepmann <coding@daniel-siepmann.de>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
use Codappix\WebsiteComparison\Model\UrlListDto;
use Facebook\WebDriver\Remote\RemoteWebDriver;
use Facebook\WebDriver\Remote\RemoteWebElement;
use Facebook\WebDriver\WebDriverBy;
use GuzzleHttp\Psr7\Uri;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Process\Process;
/**
*
*/
class ScreenshotCrawlerService
{
/**
* @var OutputInterface
*/
protected $output;
/**
* @var RemoteWebDriver
*/
protected $driver;
/**
* @var string
*/
protected $baseUrl = '';
/**
* @var string
*/
protected $screenshotDir = '';
/**
* @var int
*/
protected $screenshotWidth = 3840;
public function __construct(
OutputInterface $output,
RemoteWebDriver $driver,
string $baseUrl,
string $screenshotDir = 'output/',
int $screenshotWidth = 3840
) {
$this->output = $output;
$this->driver = $driver;
$this->baseUrl = rtrim($baseUrl, '/') . '/';
$this->screenshotDir = implode(DIRECTORY_SEPARATOR, [
dirname(dirname(dirname(__FILE__))),
rtrim($screenshotDir, '/')
]) . DIRECTORY_SEPARATOR;
$this->screenshotWidth = $screenshotWidth;
}
public function crawl()
{
$this->createScreenshotDirIfNecessary();
$linkList = new UrlListDto();
$linkList->addUrl($this->baseUrl);
while ($url = $linkList->getNextUrl()) {
$this->driver->get($url);
$screenshotHeight = $this->driver->findElement(WebDriverBy::cssSelector('body'))
->getSize()
->getHeight();
$this->createScreenshot($this->driver->getCurrentURL(), $screenshotHeight);
$linkList->markUrlAsFinished($url);
array_map([$linkList, 'addUrl'], $this->fetchFurtherLinks(
$this->driver->findElements(WebDriverBy::cssSelector('a'))
));
}
}
/**
* @throws \Exception If folder could not be created.
*/
protected function createScreenshotDirIfNecessary(string $subPath = '')
{
$dir = $this->screenshotDir;
if ($subPath !== '') {
$dir = $dir . DIRECTORY_SEPARATOR . trim($subPath, DIRECTORY_SEPARATOR);
}
if (!is_dir($dir)) {
mkdir($dir, 0777, true);
}
if (!is_dir($this->screenshotDir)) {
throw new \Exception('Could not create screenshot dir: "' . $dir . '".', 1535528875);
}
}
protected function createScreenshot(string $url, int $height)
{
$screenshotTarget = $this->getScreenshotTarget($url);
$this->createScreenshotDirIfNecessary(dirname($screenshotTarget));
$screenshotProcess = new Process([
'chromium-browser',
'--headless',
'--disable-gpu',
'--window-size=' . $this->screenshotWidth . ',' . $height,
'--screenshot=' . $this->screenshotDir . $screenshotTarget,
$url
]);
// TODO: Check for success
$screenshotProcess->run();
if ($this->output->isVerbose()) {
$this->output->writeln(sprintf(
'<info>Created screenshot "%s" for url "%s".</info>',
$this->screenshotDir . $screenshotTarget,
$url
));
}
}
protected function getScreenshotTarget(string $url)
{
$uri = new Uri($url);
return implode(
DIRECTORY_SEPARATOR,
array_filter(
[
$uri->getScheme(),
$uri->getHost(),
trim($uri->getPath(), '/'),
$uri->getQuery(),
],
function (string $string) {
return trim($string, ' /') !== '';
}
)
) . '.png';
}
protected function fetchFurtherLinks(array $webElements): array
{
$links = [];
foreach ($webElements as $webElement) {
try {
$link = $this->fetchLinkFromElement($webElement);
} catch (\Exception $e) {
continue;
}
$links[] = $link;
}
return $links;
}
protected function fetchLinkFromElement(RemoteWebElement $element): string
{
$uri = null;
$href = $element->getAttribute('href');
if (is_string($href)) {
$uri = new Uri($href);
}
if ($uri === null) {
throw new \Exception('Did not get a Uri for element.', 1535530859);
}
if ($this->isInternalLink($uri)) {
return (string) $uri;
}
throw new \Exception('Was external link.', 1535639056);
}
protected function isInternalLink(Uri $uri): bool
{
$validHosts = [
'',
(new Uri($this->baseUrl))->getHost(),
];
return in_array($uri->getHost(), $validHosts);
}
}
Loading…
Cancel
Save