ruby bing get serp


SUBMITTED BY: Guest

DATE: Sept. 28, 2014, 8:03 p.m.

FORMAT: Text only

SIZE: 1.9 kB

HITS: 1000

  1. #!/usr/bin/env ruby
  2. # encoding: UTF-8
  3. require 'rubygems'
  4. require 'time'
  5. require 'date'
  6. require 'hpricot'
  7. require 'htmlentities'
  8. require 'rexml/document'
  9. require 'oauth'
  10. require 'mime/types'
  11. require 'socket'
  12. require 'cgi'
  13. require 'httpclient'
  14. require 'net/ftp'
  15. require 'net/smtp'
  16. require 'digest/sha2.rb'
  17. require 'net/http'
  18. require 'uri'
  19. require 'fileutils'
  20. require 'mechanize'
  21. require 'nokogiri'
  22. require 'open-uri'
  23. module Get_bing_serp
  24. class BingSeeker
  25. def initialize
  26. make_agent
  27. end
  28. def aleat
  29. return r.rand(15..45)
  30. end
  31. def parse_web(query)
  32. formated_tab = []
  33. query.parser.xpath('//h3').take(10).each do |cite|
  34. formated_tab << URI::extract( cite.to_s, [ 'http' ] )#cite.inner_text
  35. end
  36. return formated_tab
  37. end
  38. def run(query,max)
  39. i = 2
  40. limit = i + max
  41. result = []
  42. make_agent
  43. page = @agent.get('http://www.bing.com/').forms[0].tap{|f| f.q = query}.submit
  44. result << parse_web(page)
  45. next_link = page.links.find { |l| l.text == i.to_s }
  46. while !next_link.nil?
  47. sleep aleat
  48. i += 1
  49. next_page = @agent.get next_link.href
  50. result << parse_web(next_page)
  51. next_link = next_page.links.find { |l| l.text == i.to_s }
  52. end
  53. return result
  54. #return result.uniq
  55. end
  56. private
  57. def make_agent
  58. @agent = Mechanize.new do |a|
  59. a.user_agent_alias = 'Mac Safari'
  60. a.max_history = 1
  61. a.keep_alive=false
  62. a.read_timeout=15
  63. end
  64. end
  65. end
  66. end