get_key("task", "get")); if($task and !empty($task)) { $this->$task(); } else { session_destroy(); $this->show_form(); } } private function show_form() { global $smarty, $db, $getpost_mgr; $this->output("grabber_form.tpl"); } private function process_main_urls() { global $db; foreach($this->main_urls as $key => $main_url) { echo "Read Base Uri: " . $main_url['site_url'] . "
"; $this->open_url_entry_base($main_url['site_url']); unset($this->main_urls[$key]); $this->do_ajax("do"); } $this->post_status(); shuffle($this->check_links); return "do_sub"; } private function do_index() { global $db, $getpost_mgr; $this->checked_links = $_SESSION['checked_links']; $this->check_links = $_SESSION['check_links']; $this->main_urls = $_SESSION['main_urls']; $this->invalid_links = $_SESSION['invalid_links']; $this->passed_links = $_SESSION['passed_links']; $this->external_links = $_SESSION['external_links']; $this->page_gen = microtime(true); $work_id = $getpost_mgr->get_key("work","get"); $this->main_urls = $_SESSION['main_urls']; $this->checked_links = $_SESSION['checked_links']; switch($work_id) { case 'start': $action = $this->getting_url_list(); $this->do_ajax($action); break; case 'do': $this->process_main_urls(); $this->do_ajax("do_sub"); break; case 'do_sub': $action = $this->process_sub_pages(); $this->post_status(); $this->do_ajax($action); break; case 'restart': break; default: break; } /*if($work_id != 0) { $last_url = $_SESSION['last']; $this->checked_links = $_SESSION['checked_links']; $this->check_links = $_SESSION['check_links']; $parsed = parse_url($last_url); if($parsed['scheme'] == "javascript") { var_dump($this->check_links); die(); if(is_array($this->check_links)) { foreach($this->check_links as $item) { if(!in_array($item, $this->checked_links)) { $last_url = $item; } } } } $this->open_url_entry($last_url); } else { unset($_SESSION['checked_links']); unset($_SESSION['check_links']); foreach($this->main_urls as $key => $row) { $this->open_url_entry($row['site_url']); } } */ } private function getting_url_list() { global $db; echo "
Getting URL´s that older than Yesterday
"; $date = date('Y-m-d H:i:s', time()-(86400*2)); $last_url = $_SESSION['last']; $query = "SELECT * FROM url_index WHERE timestep <= '" . $date . "'"; $result = $db->sqlquery($query , 1); if(is_array($result) and !empty($result)) { echo '
' . count($result) . " Base Links Found
"; $this->main_urls = $result; return "do"; } else { echo '
Links uptodate!
'; return "stop"; } return "stop"; } private function open_url_entry($url) { global $db; $agent = "Meine Browserkennung v1.0 :)"; $header[] = "Accept: text/vnd.wap.wml,*.*"; $action = "insert"; if(is_array($this->checked_links)) { if(@in_array($url, $this->passed_links)) { $this->post_status(); return 1; } } $parsed = parse_url($url); if($parsed['scheme'] == "javascript") { $this->invalid_links[] = $url; return 1; } $select_query = "SELECT * FROM site_index WHERE url = '" .$url . "'"; $select_result = $db->sqlquery($select_query, 1); $time_entry = strtotime($select_result[0]['index_date']); $last_day = time()-(86400*2); $do_link = 1; $ret = "do_sub"; if(is_array($select_result[0])) { if($time_entry <= $last_day) { $action = "update"; } else { if(!@in_array($url, $this->passed_links)) $this->passed_links[] = $url; //$this->post_status(); $do_link = 0; $ret = "do_nothing"; //return 1; } } else { $action = "insert"; } if($do_link == 1) { $website_base = curl_init($url); curl_setopt($website_base, CURLOPT_RETURNTRANSFER, 1); curl_setopt($website_base, CURLOPT_USERAGENT, $agent); curl_setopt($website_base, CURLOPT_HTTPHEADER, $header); curl_setopt($website_base, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($website_base, CURLOPT_URL, $url); curl_setopt($website_base, CURLOPT_RETURNTRANSFER, 1); $read = curl_exec($website_base); curl_close($website_base); $this->check_content($url, $read, $action); } return $ret; } private function open_url_entry_base($url) { global $db; $agent = "Meine Browserkennung v1.0 :)"; $header[] = "Accept: text/vnd.wap.wml,*.*"; $action = "insert"; if(is_array($this->checked_links)) { foreach($this->checked_links as $links) { if($links == $url) { $this->post_status(); return 1; } } } $parsed = parse_url($url); if($parsed['scheme'] == "javascript") { $this->invalid_links[] = $url; return 1; } $select_query = "SELECT * FROM url_index WHERE site_url = '" .$url . "'"; $select_result = $db->sqlquery($select_query, 1); $time_entry = strtotime($select_result[0]['timestep']); $last_day = time()-(86400*2); if(is_array($select_result[0])) { if($time_entry <= $last_day) { $action = "update"; } else { $this->passed_links[] = $url; $this->post_status(); return 1; } } else { $action = "insert"; } $website_base = curl_init($url); curl_setopt($website_base, CURLOPT_RETURNTRANSFER, 1); curl_setopt($website_base, CURLOPT_USERAGENT, $agent); curl_setopt($website_base, CURLOPT_HTTPHEADER, $header); curl_setopt($website_base, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($website_base, CURLOPT_URL, $url); curl_setopt($website_base, CURLOPT_RETURNTRANSFER, 1); $read = curl_exec($website_base); curl_close($website_base); $this->check_content($url, $read, $action); } function get_string_between($string, $start, $end){ $string = " ".$string; $ini = strpos($string,$start); if ($ini == 0) return ""; $ini += strlen($start); $len = strpos($string,$end,$ini) - $ini; return substr($string,$ini,$len); } function getMetaData($url, $content){ // get meta tags stream_context_set_default( array( 'http' => array( 'timeout' => 10 ) ) ); if($meta = @get_meta_tags($url)) { // store page $page=$content; // find where the title CONTENT begins $titleStart=strpos($page,'')+7; // find how long the title is $titleLength=strpos($page,'')-$titleStart; // extract title from $page $meta['title']=substr($page,$titleStart,$titleLength); // return array of data } return $meta; } private function update_entry($url, $content, $meta ,$action) { global $db; $meta_serialized = serialize($meta); if($action == "update") { $query = "UPDATE site_index SET `url` = '" . $url . "', `tags` = '" . mysql_escape_string($meta['keywords']) ."', `title` = '" . mysql_escape_string($meta['title']) ."', `meta_desc` = '" . mysql_escape_string($meta['description']) ."', `content` = '" . mysql_escape_string(str_replace(array(" ", "\n"), "", strip_tags($content))) . "', `type`= 'forum', `meta_options_serialized` = '" . mysql_escape_string( $meta_serialized) . "', `index_date` = NOW()) WHERE url = '" .$use_url . "'"; $db->sqlquery($query); } elseif($action == "insert") { $query = "INSERT INTO site_index (`url`,`tags`, `title`, `meta_desc`, `content`, `index_date`, `type`, `meta_options_serialized`) VALUES ('" . mysql_escape_string($url) . "', '" . mysql_escape_string($meta['keywords']) ."', '" . mysql_escape_string($meta['title']) ."', '" . mysql_escape_string($meta['description']) ."', '". mysql_escape_string(strip_tags($content)) ."', NOW(),'forum','" . mysql_escape_string($meta_serialized) . "')"; $db->sqlquery($query); } } private function check_link($current_url, $url_on_page) { global $db; $invalid_scheme = array("javascript", "file"); $external_url = 0; $pass_url = 0; if(empty($url_on_page)) throw new Exception('$ur_on_page are empty!', 4); $parsed_url = parse_url($url_on_page); $parsed_current_url = parse_url($current_url); if(is_array($parsed_url) and is_array($parsed_current_url)) { if(count($parsed_current_url) <= 3) { if(!isset($parsed_current_url['host'])) throw new Exception('#002: Unable to check Link [uncomplete base]', 2); } if(!strstr($parsed_url['path'], "/")) $parsed_url['path'] = "/"; if(!empty($parsed_url['query'])) $param_str = "?" . $parsed_url['query']; if(empty($parsed_url['scheme'])) $parsed_url['scheme'] = $parsed_current_url['scheme']; if(in_array($parsed_url['scheme'], $invalid_scheme)) throw new Exception('#003: Invalid scheme!', 3); if(isset($parsed_url['host'])) { if($parsed_url['host'] == $parsed_current_url['host'] or ("www.".$parsed_url['host']) == $parsed_current_url['host']) { $external_url = 0; } else { $external_url = 1; } $builded_url = $parsed_url['scheme'] . "://".$parsed_url['host']. $parsed_url['path'].$param_str; } else { $builded_url = $parsed_url['scheme'] . "://".$parsed_current_url['host']. $parsed_url['path'].$param_str; $external_url = 0; } if(@in_array($builded_url,$this->checked_links) or @in_array($builded_url,$this->passed_links) or @in_array($builded_url,$this->check_links)) { $pass_url = 1; return array("url" => $builded_url, "is_external" => $external_url, "is_passed" => $pass_url); } $query = "SELECT * FROM site_index WHERE url = '". $builded_url . "' LIMIT 1"; $fetched = $db->sqlquery($query); if(!is_array($fetched) or empty($fetched)) { $time_entry = strtotime($fetched[0]['index_date']); $last_day = time()-(86400*2); if($time_entry >= $last_day) $pass_url = 1; } return array("url" => $builded_url, "is_external" => $external_url, "is_passed" => $pass_url); } else { throw new Exception('#001: invalid link', 1); } } private function add_link($url, $is_external, $is_passed, $is_valid) { if($is_valid == 0) { if(!@in_array($url, $this->invalid_links)) $this->invalid_links[] = $url; return 1; } if($is_passed == 1) { if(!@in_array($url, $this->passed_links)) $this->passed_links[] = $url; return 1; } if($is_external == 1) { if(!@in_array($url, $this->external_links)) $this->external_links[] = $url; return 1; } if(!@in_array($url, $this->check_links)) { $this->check_links[] = $url; return 1; } return 0; } private function check_content($base_url, $content, $action) { global $db; $invalid_scheme = array("javascript", "file"); $base_uri_array = parse_url($base_url); $regexp = "]*href=(\"??)([^\" >]*?)\\1[^>]*>(.*)<\/a>"; $var = preg_match_all("/$regexp/siU",$content, $links, PREG_SET_ORDER); die("/$regexp/siU"); //var_dump($base_uri_array); $meta = $this->getMetaData($base_url, $content); if(is_array($meta)) { $this->update_entry($base_url, $content, $meta, $action); foreach($links as $key => $link) { try { extract($this->check_link($base_url, $link[2])); $this->add_link($url, $is_external, $is_passed, 1); } catch(Exception $e) { switch ($e->getCode()) { case 3: $this->add_link($link[2], 1, 1, 0); break; case 4: break; } } } $this->checked_links[] = $base_url; } else { $this->checked_links[] = $base_url; $this->add_link($base_url, 1, 1, 0); } } private function process_sub_pages() { global $db; if(count($this->check_links) != 0) { $i = 0; foreach($this->check_links as $key => $item) { if(!@in_array($item, $this->checked_links)) { if(!@in_array($item, $this->passed_links)) { $parsed = parse_url($item); if($parsed['scheme'] != "javascript") { echo "Read Subpageuri: " . $item; $ret = $this->open_url_entry($item); if($ret == "do_sub") { echo ' Checked!
'; } else { echo ' Passed!
'; } if($i >= 10) { return "do_sub"; } $i++; } else { $this->invalid_links[] = $item; } } } } session_destroy(); return "do"; } else { $this->post_status(); return "do"; } } private function post_status() { echo "
" . count($this->check_links). " internal Links found!
"; echo "
" . count($this->external_links). " external Links found!
"; echo "
" . count($this->invalid_links). " invalid Links!
"; echo "
" . count($this->passed_links). " Links passed!
"; echo "
" . count($this->checked_links). " Links checked!

"; echo "
Last Link in internal links: " . @end($this->check_links). "
"; echo "
Last Link in external links: " . @end($this->external_links). "
"; echo "
Last Link in invalid links: " . @end($this->invalid_links). "
"; echo "
Last Link in Passed links: " . @end($this->passed_links). "
"; echo "
Last Link in checked links: " . @end($this->checked_links). "
"; echo "
List external Links
"; if(is_array($this->external_links)) { foreach($this->external_links as $ex_url) { echo "
>" . $ex_url . "
"; } } echo "
"; } private function do_ajax($action, $last_url ='') { $_SESSION['last'] = $last_url; $_SESSION['checked_links'] = $this->checked_links; $_SESSION['check_links'] = $this->check_links; $_SESSION['main_urls'] = $this->main_urls; $_SESSION['invalid_links'] = $this->invalid_links; $_SESSION['passed_links'] = $this->passed_links; $_SESSION['external_links'] = $this->external_links; $time = $stop-$this->page_gen; switch($action) { case "do": if(count($this->main_urls) >= 1) { echo "|" .$action; exit; } break; case "start": echo "|" .$action; exit; break; default: echo "|" .$action; exit; } //echo $time . " Sekunden läuft das sript schon
"; } function output($file) { global $smarty; $smarty->display($file); } } ?>