Asked by: melkawakibi
Introduction
I'm working on a project for scanning website on vulnerabilities threats. Therefore I need to program a Spider to index all the pages.
I'm using a combination of two libraries to program the Spider.
1) Symfony\Component\BrowserKit\Client //is a abstract class
2) mmerian\phpcrawl\PHPCrawler //is a concrete class with override function
In order to use them it is required to extends both of them because one is abstract and the other has an override function I need to make it practical.
PHP doesn't allow multiple inheritance, is there a way around this issue?
Spider.php
<?php
namespace App\Core;
use PHPCrawler; //I need to inherit this object
use PHPCrawlerDocumentInfo;
use Symfony\Component\BrowserKit\Client as BaseClient;
class Spider extends BaseClient
{
private $url;
private $phpCrawler;
public function __construct($url){
parent::__construct();
//I have instantiated the object instead of inheriting it.
$this->phpCrawler = new PHPCrawler;
$this->url = $url;
}
public function setup(){
$this->phpCrawler->setURL($this->url);
$this->phpCrawler->addContentTypeReceiveRule("#text/html#");
$this->phpCrawler->addURLFilterRule("#\.(jpg|jpeg|gif|png|css)$# i");
}
public function start(){
$this->setup();
echo 'Starting spider' . PHP_EOL;
$this->phpCrawler->go();
$report = $this->phpCrawler->getProcessReport();
echo "Summary:". PHP_EOL;
echo "Links followed: ".$report->links_followed . PHP_EOL;
echo "Documents received: ".$report->files_received . PHP_EOL;
echo "Bytes received: ".$report->bytes_received." bytes". PHP_EOL;
echo "Process runtime: ".$report->process_runtime." sec" . PHP_EOL;
if(!empty($this->phpCrawler->links_found)){
echo 'not empty';
}
}
//Override - This doesn't work because it is not inherit
public function handleDocumentInfo(PHPCrawlerDocumentInfo $pageInfo){
$this->parseHTMLDocument($pageInfo->url, $pageInfo->content);
}
public function parseHTMLDocument($url, $content){
$crawler = $this->request('GET', $url);
$crawler->filter('a')->each(function (Crawler $node, $i){
echo $node->attr('href');
});
}
//This is a abstract function
public function doRequest($request){}
}
No comments:
Post a Comment