ruby find video view counter on several video sharing site


SUBMITTED BY: Guest

DATE: Sept. 28, 2014, 8:11 p.m.

FORMAT: Text only

SIZE: 5.5 kB

HITS: 896

  1. #!/usr/bin/env ruby
  2. # encoding: UTF-8
  3. require 'net/http'
  4. require 'uri'
  5. require 'public_suffix_list'
  6. require 'public_suffix'
  7. require 'fileutils'
  8. require 'mechanize'
  9. require 'nokogiri'
  10. require 'watir-webdriver'
  11. require 'selenium/webdriver'
  12. module VideoCounter
  13. class ViewsCount
  14. def initialize
  15. make_agent
  16. end
  17. def find_site(url)
  18. uri = URI.parse(url)
  19. domain = PublicSuffix.parse(uri.host)
  20. return domain.sld
  21. end
  22. def get_youtube_view(url) #ok
  23. page = @agent.get url
  24. doc = Nokogiri::HTML::DocumentFragment.parse(page.body)
  25. views = doc.search(".watch-view-count").text
  26. return views.to_s.strip
  27. end
  28. def get_dailymotion_view(url) #ok
  29. page = @agent.get url
  30. views = page.parser.xpath('//*[@id="video_views_countv2"]/b/text()')
  31. return views.to_s.strip
  32. end
  33. def get_izlesene_view(url) #ok
  34. page = @agent.get url
  35. doc = Nokogiri::HTML::DocumentFragment.parse(page.body)
  36. views = doc.search(".view_c").text
  37. return views.to_s.strip
  38. end
  39. def get_trilulilu_view(url) #ok
  40. page = @agent.get url
  41. views = page.parser.xpath('//html/body/div[5]/div[3]/div/div/div/div[3]/div/div/div/div[2]/p/strong/text()')
  42. return views[0].to_s.strip
  43. end
  44. def get_clipfish_view(url) #ok
  45. page = @agent.get url
  46. views = page.parser.xpath('//html/body/div[4]/div[6]/div/div[2]/div[3]/div[2]/div/div[2]/div[2]/div[3]/div[2]/div/div[2]/div[2]/span/text()')
  47. return views[0].to_s.strip
  48. end
  49. def get_metacafe_view(url) #ok
  50. page = @agent.get url
  51. views_tmp = page.parser.xpath('//h2[@id="Views"]/text()')
  52. if
  53. views_tmp.empty?
  54. then
  55. views = page.parser.xpath('//html/body/div/div[6]/div/section/article/hgroup/span/text()')
  56. else
  57. views = views_tmp
  58. end
  59. return views.to_s.strip
  60. end
  61. def get_myspace_view(url) #ok
  62. b = Watir::Browser.new(:phantomjs)
  63. b.goto url
  64. doc = Nokogiri::HTML(b.html)
  65. regex = "(\"stats\")"+"(:)"+"(\\{)"+"(\"AllTime\")"+"(:)"+"(\".*?\")"+"(\\})" # "stats":{"AllTime":"258,549"}
  66. doc.to_s.match(regex)
  67. views = $6
  68. return views.gsub('"', '')
  69. end
  70. def get_photobucket_view(url) # ok
  71. b = Watir::Browser.new :phantomjs
  72. b.goto url
  73. doc = Nokogiri::HTML(b.html)
  74. views = doc.xpath('//*[@id="viewCount"]')
  75. return views.children.first.text
  76. end
  77. def get_daleplay_view(url) # toutes les videos sont en erreur copyright
  78. page = @agent.get url
  79. views = page.parser.xpath('//*[@id="video_views_countv2"]/b/text()')
  80. return views.to_s.strip
  81. end
  82. def get_vxv_view(url) #ok
  83. page = @agent.get url
  84. views = page.parser.xpath('//*[@id="video_visit"]/text()')
  85. return views.to_s.strip
  86. end
  87. def get_rutube_view(url) #ok
  88. page = @agent.get url
  89. views = page.parser.xpath('//html/body/div/div[2]/div[2]/div/section/div/div/div/div/div/div/div/div/div/div[2]/div[2]/div/div[2]/ul/li[2]/span[2]/text()')
  90. return views.to_s.strip
  91. end
  92. def get_smotri_view(url) #ok
  93. page = @agent.get url
  94. doc = Nokogiri::HTML::DocumentFragment.parse(page.body)
  95. views = doc.search(".Number").first.text
  96. return views.to_s.strip
  97. end
  98. def get_miwim_view(url) #ok
  99. page = @agent.get url
  100. views = page.parser.xpath('//*[@id="useraction"]/div[1]/text()[3]').to_s.gsub('|','')
  101. return views.strip
  102. end
  103. def get_wat_view(url) #ok
  104. page = @agent.get url
  105. views_tmp = page.parser.xpath('//html/body/div[3]/div[2]/div[2]/div/div/div/div/div[2]/div/span/text()').to_s.strip
  106. views = views_tmp.split(" ")
  107. return views[0]
  108. end
  109. def get_viddler_view(url) #ok
  110. page = @agent.get url
  111. views = page.parser.xpath('//html/body/div/div[2]/div[2]/div/p/span[2]/b/text()')
  112. return views.to_s.strip
  113. end
  114. def get_veoh_view(url) #ok
  115. page = @agent.get url
  116. views = page.parser.xpath('.//*[@id="leftVideoDetails"]/div[1]/text()')
  117. return views.to_s.strip
  118. end
  119. def get_myvideo_view(url) #ok
  120. page = @agent.get url
  121. views = page.parser.xpath('//*[@id="mCnt"]/text()')
  122. return views.to_s.strip
  123. end
  124. def get_kewego_view(url) #ok
  125. page = @agent.get url
  126. views = page.parser.xpath('//html/body/div/div[4]/div[2]/div/div/div/div[2]/div/ul/li[3]/span/text()')
  127. return views.to_s.strip
  128. end
  129. private
  130. def make_agent
  131. @agent = Mechanize.new do |a|
  132. a.user_agent_alias = 'Mac Safari'
  133. a.max_history = 1
  134. a.open_timeout = 15
  135. a.read_timeout = 5
  136. a.keep_alive = false
  137. end
  138. end
  139. end
  140. end

comments powered by Disqus