ruby bing get serp


SUBMITTED BY: Guest

DATE: Sept. 28, 2014, 8:03 p.m.

FORMAT: Text only

SIZE: 1.9 kB

HITS: 976

  1. #!/usr/bin/env ruby
  2. # encoding: UTF-8
  3. require 'rubygems'
  4. require 'time'
  5. require 'date'
  6. require 'hpricot'
  7. require 'htmlentities'
  8. require 'rexml/document'
  9. require 'oauth'
  10. require 'mime/types'
  11. require 'socket'
  12. require 'cgi'
  13. require 'httpclient'
  14. require 'net/ftp'
  15. require 'net/smtp'
  16. require 'digest/sha2.rb'
  17. require 'net/http'
  18. require 'uri'
  19. require 'fileutils'
  20. require 'mechanize'
  21. require 'nokogiri'
  22. require 'open-uri'
  23. module Get_bing_serp
  24. class BingSeeker
  25. def initialize
  26. make_agent
  27. end
  28. def aleat
  29. r = Random.new
  30. return r.rand(15..45)
  31. end
  32. def parse_web(query)
  33. formated_tab = []
  34. query.parser.xpath('//h3').take(10).each do |cite|
  35. formated_tab << URI::extract( cite.to_s, [ 'http' ] )#cite.inner_text
  36. end
  37. return formated_tab
  38. end
  39. def run(query,max)
  40. i = 2
  41. limit = i + max
  42. result = []
  43. make_agent
  44. page = @agent.get('http://www.bing.com/').forms[0].tap{|f| f.q = query}.submit
  45. result << parse_web(page)
  46. next_link = page.links.find { |l| l.text == i.to_s }
  47. while !next_link.nil?
  48. sleep aleat
  49. i += 1
  50. next_page = @agent.get next_link.href
  51. result << parse_web(next_page)
  52. next_link = next_page.links.find { |l| l.text == i.to_s }
  53. end
  54. return result
  55. #return result.uniq
  56. end
  57. private
  58. def make_agent
  59. @agent = Mechanize.new do |a|
  60. a.user_agent_alias = 'Mac Safari'
  61. a.max_history = 1
  62. a.keep_alive=false
  63. a.read_timeout=15
  64. end
  65. end
  66. end
  67. end

comments powered by Disqus