#!/usr/bin/env ruby # encoding: UTF-8 require 'rubygems' require 'time' require 'date' require 'hpricot' require 'htmlentities' require 'rexml/document' require 'oauth' require 'mime/types' require 'socket' require 'cgi' require 'httpclient' require 'net/ftp' require 'net/smtp' require 'digest/sha2.rb' require 'net/http' require 'uri' require 'fileutils' require 'mechanize' require 'nokogiri' require 'open-uri' module Get_bing_serp class BingSeeker def initialize make_agent end def aleat r = Random.new return r.rand(15..45) end def parse_web(query) formated_tab = [] query.parser.xpath('//h3').take(10).each do |cite| formated_tab << URI::extract( cite.to_s, [ 'http' ] )#cite.inner_text end return formated_tab end def run(query,max) i = 2 limit = i + max result = [] make_agent page = @agent.get('http://www.bing.com/').forms[0].tap{|f| f.q = query}.submit result << parse_web(page) next_link = page.links.find { |l| l.text == i.to_s } while !next_link.nil? sleep aleat i += 1 next_page = @agent.get next_link.href result << parse_web(next_page) next_link = next_page.links.find { |l| l.text == i.to_s } end return result #return result.uniq end private def make_agent @agent = Mechanize.new do |a| a.user_agent_alias = 'Mac Safari' a.max_history = 1 a.keep_alive=false a.read_timeout=15 end end end end