#!/usr/bin/env ruby
# encoding: UTF-8
require 'rubygems'
require 'time'
require 'date'
require 'hpricot'
require 'htmlentities'
require 'rexml/document'
require 'oauth'
require 'mime/types'
require 'socket'
require 'cgi'
require 'httpclient'
require 'net/ftp'
require 'net/smtp'
require 'digest/sha2.rb'
require 'net/http'
require 'uri'
require 'fileutils'
require 'mechanize'
require 'nokogiri'
require 'open-uri'
module Get_bing_serp
class BingSeeker
def initialize
make_agent
end
def aleat
r = Random.new
return r.rand(15..45)
end
def parse_web(query)
formated_tab = []
query.parser.xpath('//h3').take(10).each do |cite|
formated_tab << URI::extract( cite.to_s, [ 'http' ] )#cite.inner_text
end
return formated_tab
end
def run(query,max)
i = 2
limit = i + max
result = []
make_agent
page = @agent.get('http://www.bing.com/').forms[0].tap{|f| f.q = query}.submit
result << parse_web(page)
next_link = page.links.find { |l| l.text == i.to_s }
while !next_link.nil?
sleep aleat
i += 1
next_page = @agent.get next_link.href
result << parse_web(next_page)
next_link = next_page.links.find { |l| l.text == i.to_s }
end
return result
#return result.uniq
end
private
def make_agent
@agent = Mechanize.new do |a|
a.user_agent_alias = 'Mac Safari'
a.max_history = 1
a.keep_alive=false
a.read_timeout=15
end
end
end
end