Para quem gosta de utilizar webspiders e capturar sites em massa e de alguma forma precisa separar determinado tipos de conteudo, eis um separador de joomlas com multithread para acelerar o work! (:
Em breve postando um "mini_bot" para separar diversos ao mesmo tempo ^^
OutPut:
Code:
Em breve postando um "mini_bot" para separar diversos ao mesmo tempo ^^
OutPut:
n4sss@blue-wind:~/lov3/Coding/php/joomla$ php Jseparator.php
Joomla_separator by n4sss with multithread
php Jseparator.php site_list valids threads
Joomla_separator by n4sss with multithread
php Jseparator.php site_list valids threads
Código PHP:
<?php
/*
* Cli Joomla Separator by n4sss
* Multithread option
*
* Contact: nstruct@jabber.org
* */
set_time_limit(0);
error_reporting(0);
function separate($list, $valids, $threads)
{
$multi = curl_multi_init();
$bolean = array_chunk($list,$threads);
$inc = 1;
foreach($bolean as $url)
{
for($i=0;$i<=count($url)-1;$i++)
{
$ch[$i] = curl_init();
curl_setopt($ch[$i], CURLOPT_URL, $url[$i]);
curl_setopt($ch[$i], CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($ch[$i], CURLOPT_TIMEOUT, 5);
curl_multi_add_handle($multi, $ch[$i]);
}do
{
curl_multi_exec($multi, $handl);usleep(1);
}while($handl > 0);
foreach($ch as $ch_id => $bol)
{
$grep[$ch_id] = curl_multi_getcontent($bol);
curl_multi_remove_handle($multi, $bol);
print "[$inc] $url[$ch_id]\n";
if(preg_match('#/templates/|/modules/#si',$grep[$ch_id]))
{
$jm = @fopen($valids, 'a');
@fwrite($jm, $url[$ch_id]."\n");
fclose($jm);
echo "joomla: $url[$ch_id]\n";
flush();
}
$inc = $inc + 1;
}
}
}
if(!@$argv[1] || !@$argv[2] || !@$argv[3])
{
print "Joomla_separator by n4sss with multithread\n";
print "php $argv[0] site_list valids threads\n";
}
else
{
$list = explode("\n", file_get_contents("$argv[1]"));
$list = array_unique($list);
$list = array_filter($list);
$valids = trim("$argv[2]");
$threads = trim("$argv[3]");
print "Urls to test: ".count($list)."\n";
print "Valids to: ".$valids."\n";
print "Threads: ".$threads."\n";
separate($list, $valids, $threads);
}
?>
Comment