<?php
session_start();
class bot {
	
	var $check_links = array();
	var $checked_links = array();
	var $external_links = array();
	var $passed_links = array();
	var $invalid_links = array();
	var $depth = 6;
	
	function bot() {
	}
	
	public function start() {
		global $getpost_mgr;
		
		$task = mysql_escape_string($getpost_mgr->get_key("task", "get"));
		if($task and !empty($task)) {
			$this->$task();
		}
		else {
			session_destroy();
			$this->show_form();
		}
	}
	
	private function show_form() {
		global $smarty, $db, $getpost_mgr;
		$this->output("grabber_form.tpl");
	}
	
	private function process_main_urls() {
		global $db;
		foreach($this->main_urls as $key => $main_url) {
			echo "Read Base Uri: " . $main_url['site_url'] . "<br />";
			$this->open_url_entry_base($main_url['site_url']);
			unset($this->main_urls[$key]);
			$this->do_ajax("do");
		}
		$this->post_status();
		shuffle($this->check_links);
		return "do_sub";
	}
	
	private function do_index() {
		global $db, $getpost_mgr;
		
		$this->checked_links = $_SESSION['checked_links'];
		$this->check_links = $_SESSION['check_links'];
		$this->main_urls = $_SESSION['main_urls'];
		$this->invalid_links = $_SESSION['invalid_links'];
		$this->passed_links = $_SESSION['passed_links'];
		$this->external_links = $_SESSION['external_links'];
		$this->page_gen = microtime(true);
		
		$work_id = $getpost_mgr->get_key("work","get");
		
		$this->main_urls = $_SESSION['main_urls'];
		$this->checked_links = $_SESSION['checked_links'];
		switch($work_id) {
			case 'start':
				$action = $this->getting_url_list();
				$this->do_ajax($action);
				break;
			case 'do':
				$this->process_main_urls();
				$this->do_ajax("do_sub");
				break;
			case 'do_sub':
				$action = $this->process_sub_pages();
				$this->post_status();
				$this->do_ajax($action);
				break;
			case 'restart':
				break;
			default:
				break;
			
		}
		/*if($work_id != 0) {
			$last_url = $_SESSION['last'];
			$this->checked_links = $_SESSION['checked_links'];
			$this->check_links = $_SESSION['check_links'];
			
			$parsed = parse_url($last_url);
			if($parsed['scheme'] == "javascript") {
				var_dump($this->check_links);
				die();
				if(is_array($this->check_links)) {
					foreach($this->check_links as $item) {
						if(!in_array($item, $this->checked_links)) {
							$last_url = $item;
						}
					}
				}
			}
			$this->open_url_entry($last_url);
		}
		else {
			unset($_SESSION['checked_links']);
			unset($_SESSION['check_links']);
			foreach($this->main_urls as $key => $row) {
				$this->open_url_entry($row['site_url']);
			}
		}
		*/
	}
	
	private function getting_url_list() {
		global $db;
		echo "<div>Getting URL´s that older than Yesterday</div>";
		$date = date('Y-m-d H:i:s', time()-(86400*2));
		$last_url = $_SESSION['last'];
		$query = "SELECT * FROM
					url_index
				WHERE timestep <= '" . $date . "'";
		$result = $db->sqlquery($query , 1);
		
		if(is_array($result) and !empty($result)) {
			echo '<div style="color: green;">' . count($result) . " Base Links Found</div>";
			$this->main_urls = $result;
			return "do";
		}
		else {
			echo '<div style="color:orange;">Links uptodate!</div>';
			return "stop";
		}
		return "stop";
	}
	
	private function open_url_entry($url) {
		global $db;
		$agent = "Meine Browserkennung v1.0 :)";
        $header[] = "Accept: text/vnd.wap.wml,*.*";
		
		$action = "insert";
		if(is_array($this->checked_links)) {
			if(@in_array($url, $this->passed_links)) {
				$this->post_status();
				return 1;
			}
		}
		$parsed = parse_url($url);
		if($parsed['scheme'] == "javascript") {
			$this->invalid_links[] = $url;
			return 1;
		}

		$select_query = "SELECT * FROM site_index WHERE url = '" .$url . "'";
		$select_result = $db->sqlquery($select_query, 1);
		
		$time_entry = strtotime($select_result[0]['index_date']);
		$last_day = time()-(86400*2);
		$do_link = 1;
		$ret = "do_sub";
		if(is_array($select_result[0])) {
			if($time_entry <= $last_day) {
				$action = "update";
			}
			else {
				if(!@in_array($url, $this->passed_links)) $this->passed_links[] = $url;
				//$this->post_status();
				$do_link = 0;
				$ret = "do_nothing";
				//return 1;
			}
		}
		else {
			$action = "insert";
		}
		if($do_link == 1) {
			$website_base = curl_init($url);
			curl_setopt($website_base,    CURLOPT_RETURNTRANSFER, 1);
			curl_setopt($website_base,    CURLOPT_USERAGENT, $agent);
			curl_setopt($website_base,    CURLOPT_HTTPHEADER, $header);
			curl_setopt($website_base,    CURLOPT_FOLLOWLOCATION, 1);
			curl_setopt($website_base, CURLOPT_URL, $url);
			curl_setopt($website_base, CURLOPT_RETURNTRANSFER, 1);
			$read = curl_exec($website_base);
			curl_close($website_base);
			$this->check_content($url, $read, $action);
		}
		return $ret;
		
	}
	
	private function open_url_entry_base($url) {
		global $db;
		$agent = "Meine Browserkennung v1.0 :)";
        $header[] = "Accept: text/vnd.wap.wml,*.*";
		
		$action = "insert";
		if(is_array($this->checked_links)) {
			foreach($this->checked_links as $links) {
				if($links == $url) {
					$this->post_status();
					return 1;
				}
			}
		}
		$parsed = parse_url($url);
		if($parsed['scheme'] == "javascript") {
			$this->invalid_links[] = $url;
			return 1;
		}

		$select_query = "SELECT * FROM url_index WHERE site_url = '" .$url . "'";
		$select_result = $db->sqlquery($select_query, 1);
		
		$time_entry = strtotime($select_result[0]['timestep']);
		$last_day = time()-(86400*2);
		
		if(is_array($select_result[0])) {
			if($time_entry <= $last_day) {
				$action = "update";
			}
			else {
				$this->passed_links[] = $url;
				$this->post_status();
				return 1;
			}
		}
		else {
			$action = "insert";
		}
		$website_base = curl_init($url);
        curl_setopt($website_base,    CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($website_base,    CURLOPT_USERAGENT, $agent);
        curl_setopt($website_base,    CURLOPT_HTTPHEADER, $header);
        curl_setopt($website_base,    CURLOPT_FOLLOWLOCATION, 1);
		curl_setopt($website_base, CURLOPT_URL, $url);
        curl_setopt($website_base, CURLOPT_RETURNTRANSFER, 1);
		$read = curl_exec($website_base);
		curl_close($website_base);
		
		$this->check_content($url, $read, $action);
		
	}
	
	function get_string_between($string, $start, $end){
		$string = " ".$string;
		$ini = strpos($string,$start);
		if ($ini == 0) return "";
		$ini += strlen($start);
		$len = strpos($string,$end,$ini) - $ini;
		return substr($string,$ini,$len);
	}
	function getMetaData($url, $content){
		// get meta tags
		stream_context_set_default(
			array(
				'http' => array(
					'timeout' => 10
				)
			)
		);
		if($meta = @get_meta_tags($url)) {
			
			// store page
			$page=$content;
			// find where the title CONTENT begins
			$titleStart=strpos($page,'<title>')+7;
			// find how long the title is
			$titleLength=strpos($page,'</title>')-$titleStart;
			// extract title from $page
			$meta['title']=substr($page,$titleStart,$titleLength);
			// return array of data
		}
		return $meta;
	}
	
	private function update_entry($url, $content, $meta ,$action) {
		global $db;
		$meta_serialized = serialize($meta);
		if($action == "update") {
			$query = "UPDATE 
						site_index 
					SET 
						`url` = '" . $url . "', 
						`tags` = '" . mysql_escape_string($meta['keywords']) ."',
						`title` = '" . mysql_escape_string($meta['title']) ."',
						`meta_desc` = '" . mysql_escape_string($meta['description']) ."',
						`content` = '" . mysql_escape_string(str_replace(array("  ", "\n"), "", strip_tags($content))) . "', 
						`type`= 'forum',
						`meta_options_serialized` = '" . mysql_escape_string( $meta_serialized) . "',
						`index_date` = NOW())
					WHERE
						url = '" .$use_url . "'";
		
			$db->sqlquery($query);
		}
		elseif($action == "insert") {
			$query = "INSERT INTO site_index (`url`,`tags`, `title`, `meta_desc`, `content`, `index_date`, `type`, `meta_options_serialized`) 
					VALUES ('" .
					mysql_escape_string($url) . "', '" . 
					mysql_escape_string($meta['keywords']) ."', '"  . 
					mysql_escape_string($meta['title']) ."', '" . 
					mysql_escape_string($meta['description']) ."', '". 
					mysql_escape_string(strip_tags($content)) ."', NOW(),'forum','" . 
					mysql_escape_string($meta_serialized) . "')";
		
			$db->sqlquery($query);
		}		
	}
	
	private function check_link($current_url, $url_on_page) {
		global $db;
		
		$invalid_scheme = array("javascript", "file");
		$external_url = 0;
		$pass_url = 0;
		
		if(empty($url_on_page)) throw new Exception('$ur_on_page are empty!', 4);
		$parsed_url = parse_url($url_on_page);
		$parsed_current_url = parse_url($current_url);
		
		if(is_array($parsed_url) and is_array($parsed_current_url)) {
			if(count($parsed_current_url) <= 3) {
				if(!isset($parsed_current_url['host'])) throw new Exception('#002: Unable to check Link [uncomplete base]', 2);
			}
			
			if(!strstr($parsed_url['path'], "/")) $parsed_url['path'] = "/";
			if(!empty($parsed_url['query'])) $param_str =  "?" . $parsed_url['query'];
			if(empty($parsed_url['scheme'])) $parsed_url['scheme'] = $parsed_current_url['scheme'];
			
			if(in_array($parsed_url['scheme'], $invalid_scheme)) throw new Exception('#003: Invalid scheme!', 3);
			if(isset($parsed_url['host'])) {
				if($parsed_url['host'] == $parsed_current_url['host'] or ("www.".$parsed_url['host']) == $parsed_current_url['host']) {
					$external_url = 0;
				}
				else {
					$external_url = 1;
				}
				$builded_url = $parsed_url['scheme'] . "://".$parsed_url['host']. $parsed_url['path'].$param_str;
			}
			else {
				$builded_url = $parsed_url['scheme'] . "://".$parsed_current_url['host']. $parsed_url['path'].$param_str;
				$external_url = 0;
			}
			if(@in_array($builded_url,$this->checked_links) or @in_array($builded_url,$this->passed_links) or @in_array($builded_url,$this->check_links)) {
				$pass_url = 1;
				return array("url" => $builded_url, "is_external" => $external_url, "is_passed" => $pass_url);
			}
			
			$query = "SELECT * FROM site_index WHERE url = '". $builded_url . "' LIMIT 1";
			$fetched = $db->sqlquery($query);
			
			if(!is_array($fetched) or empty($fetched)) {
				$time_entry = strtotime($fetched[0]['index_date']);
				$last_day = time()-(86400*2);
				if($time_entry >= $last_day) $pass_url = 1;
			}
			
			return array("url" => $builded_url, "is_external" => $external_url, "is_passed" => $pass_url);
		}
		else {
			throw new Exception('#001: invalid link', 1);
		}
	}
	
	private function add_link($url, $is_external, $is_passed, $is_valid) {
		
		if($is_valid == 0) {
			if(!@in_array($url, $this->invalid_links)) $this->invalid_links[] = $url;
			return 1;
		}
		if($is_passed == 1) {
			if(!@in_array($url, $this->passed_links)) $this->passed_links[] = $url;
			return 1;
		}
		if($is_external == 1) {
			if(!@in_array($url, $this->external_links)) $this->external_links[] = $url;
			return 1;
		}
		if(!@in_array($url, $this->check_links)) {
			$this->check_links[] = $url;
			return 1;
		}
		return 0;
		
	}
	
	private function check_content($base_url, $content, $action) {
		global $db;
		$invalid_scheme = array("javascript", "file");
		
		$base_uri_array = parse_url($base_url);
		$regexp = "<a\s[^>]*href=(\"??)([^\" >]*?)\\1[^>]*>(.*)<\/a>";
		$var = preg_match_all("/$regexp/siU",$content, $links, PREG_SET_ORDER);
		die("/$regexp/siU");
		//var_dump($base_uri_array);
		$meta = $this->getMetaData($base_url, $content);
		if(is_array($meta)) {
			$this->update_entry($base_url, $content, $meta, $action);
			
			foreach($links as $key => $link) {
				try {
					extract($this->check_link($base_url, $link[2]));
					$this->add_link($url, $is_external, $is_passed, 1);
				}
				catch(Exception $e) {
					switch ($e->getCode()) {
						case 3:
							$this->add_link($link[2], 1, 1, 0);
							break;
						case 4:
							break;
					}
				}
			}
			
			$this->checked_links[] = $base_url;
		}
		else {
			$this->checked_links[] = $base_url;
			$this->add_link($base_url, 1, 1, 0);
		}
	}
	
	private function process_sub_pages() {
		global $db;
		
		if(count($this->check_links) != 0) {
			$i = 0;
			foreach($this->check_links as $key => $item) {
				if(!@in_array($item, $this->checked_links)) {
					if(!@in_array($item, $this->passed_links)) {
						$parsed = parse_url($item);					
						if($parsed['scheme'] != "javascript") {
							echo "Read Subpageuri: " . $item;
							$ret = $this->open_url_entry($item);
							if($ret == "do_sub") {
								echo ' <span style="color: green;">Checked!</span><br />';
							}
							else {
								echo ' <span style="color: blue;">Passed!</span><br />';
							}
							if($i >= 10) {
								return "do_sub";
							}
							$i++;	
						}
						else {
							$this->invalid_links[] = $item;
						}
					}
				}
			}
			session_destroy();
			return "do";
		}
		else {
			$this->post_status();
			return "do";
		}
	}
	
	private function post_status() {
		echo "<hr><div style=\"color: green;\" >" . count($this->check_links). " internal Links found!</div>";
		echo " <div style=\"color: darkgreen;\" >" . count($this->external_links). " external Links found!</div>";
		echo " <div style=\"color: red;\" >" . count($this->invalid_links). " invalid Links!</div>";
		echo " <div style=\"color: blue;\" >" . count($this->passed_links). " Links passed!</div>";
		echo "<div style=\"color: orange;\" >" . count($this->checked_links). " Links checked!</div><hr />";
		echo "<div style=\"color: green;\" >Last Link in internal links: " . @end($this->check_links). "</div>";
		echo " <div style=\"color: darkgreen;\" >Last Link in external links: " . @end($this->external_links). "</div>";
		echo " <div style=\"color: red;\" >Last Link in invalid links: " . @end($this->invalid_links). "</div>";
		echo " <div style=\"color: blue;\" >Last Link in Passed links: " . @end($this->passed_links). "</div>";
		echo " <div style=\"color: orange;\" >Last Link in checked links: " . @end($this->checked_links). "</div>";
		echo "<hr /><div><div>List external Links</div>";
		if(is_array($this->external_links)) {
			foreach($this->external_links as $ex_url) {
				echo "<div> >" . $ex_url . "</div>";
			}
		}
		echo "</div>";
	}
	
	private function do_ajax($action, $last_url ='') {
		$_SESSION['last'] = $last_url;
		$_SESSION['checked_links'] = $this->checked_links;
		$_SESSION['check_links'] = $this->check_links;
		$_SESSION['main_urls'] = $this->main_urls;
		$_SESSION['invalid_links'] = $this->invalid_links;
		$_SESSION['passed_links'] = $this->passed_links;
		$_SESSION['external_links'] = $this->external_links;
		$time = $stop-$this->page_gen;
		
		switch($action) {
			case "do":
				if(count($this->main_urls) >= 1) {
					echo "|" .$action;
					exit;
				}
				break;
			case "start":
				
				echo "|" .$action;
				exit;
				break;
			default:
				echo "|" .$action;
				exit;
		}
	
		//echo $time . " Sekunden läuft das sript schon<br />";
	}
	
	function output($file) {
		global $smarty;
		$smarty->display($file);	
	}
	
}

?>