ruby find video title on several video sharing platform


SUBMITTED BY: Guest

DATE: Sept. 28, 2014, 8:09 p.m.

FORMAT: Text only

SIZE: 4.9 kB

HITS: 955

  1. #!/usr/bin/env ruby
  2. # encoding: UTF-8
  3. require 'net/http'
  4. require 'uri'
  5. require 'public_suffix_list'
  6. require 'public_suffix'
  7. require 'fileutils'
  8. require 'mechanize'
  9. require 'nokogiri'
  10. require 'watir-webdriver'
  11. require 'selenium/webdriver'
  12. module VideoTitle
  13. class ViewTitle
  14. def initialize
  15. make_agent
  16. end
  17. def find_site(url)
  18. uri = URI.parse(url)
  19. domain = PublicSuffix.parse(uri.host)
  20. return domain.sld
  21. end
  22. def get_youtube_title(url) #ok
  23. page = @agent.get url
  24. views = page.parser.xpath('//*[@id="eow-title"]/text()')
  25. return views.to_s.strip # sortie en html, supprimer les tags?
  26. end
  27. def get_dailymotion_title(url) #ok
  28. page = @agent.get url
  29. views = page.at('meta[@property="og:title"]')[:content]
  30. return views
  31. end
  32. def get_izlesene_title(url) #ok
  33. page = @agent.get url
  34. views = page.at('meta[@property="og:title"]')[:content]
  35. #views = page.parser.xpath('//html/body/div[9]/div/article/div[2]/meta[10]/content()')
  36. return views.to_s.strip
  37. end
  38. def get_trilulilu_title(url) #ok
  39. page = @agent.get url
  40. views = page.parser.xpath('//html/body/div[5]/div[3]/div/div/div/div[3]/div/div/div/div/h1/text()')
  41. return views[0].to_s.strip
  42. end
  43. def get_clipfish_title(url) #ok
  44. page = @agent.get url
  45. views = page.parser.xpath('//html/body/div[4]/div[6]/div/div[2]/div[3]/div[2]/div/div/h1/text()')
  46. return views[0].to_s.strip
  47. end
  48. def get_metacafe_title(url) #ok
  49. page = @agent.get url
  50. views_tmp = page.parser.xpath('//h2[@id="Views"]/text()')
  51. if
  52. views_tmp.empty?
  53. then
  54. views = page.parser.xpath('//html/body/div/div[6]/hgroup/h1/text()')
  55. else
  56. views = views_tmp
  57. end
  58. return views.to_s.strip
  59. end
  60. def get_myspace_title(url) #ok
  61. page = @agent.get url
  62. views = page.at('meta[@property="og:title"]')[:content]
  63. return views
  64. end
  65. def get_photobucket_title(url) #ok
  66. page = @agent.get url
  67. views = page.at('meta[@property="og:title"]')[:content]
  68. return views
  69. end
  70. def get_daleplay_title(url) #ko no available vid
  71. page = @agent.get url
  72. views = page.parser.xpath('//*[@id="video_views_countv2"]/b/text()')
  73. return views.to_s.strip
  74. end
  75. def get_vxv_title(url) #ok
  76. page = @agent.get url
  77. views = page.parser.xpath('//html/body/div[4]/div/div/div[2]/h1/text()')
  78. return views.to_s.strip
  79. end
  80. def get_rutube_title(url) #ok
  81. page = @agent.get url
  82. views = page.at('meta[@property="og:title"]')[:content]
  83. return views
  84. end
  85. def get_smotri_title(url) #ok
  86. page = @agent.get url
  87. views = page.parser.xpath('//html/body/div[6]/div/div/div[3]/div/div/div[2]/div/h1/text()')
  88. return views.to_s.strip
  89. end
  90. def get_miwim_title(url) #ok
  91. page = @agent.get url
  92. views = page.parser.xpath('//*[@id="viewvideo-title"]/h1/text()')
  93. return views.to_s.strip
  94. end
  95. def get_wat_title(url) #ok
  96. page = @agent.get url
  97. views = page.parser.xpath('//html/body/div[3]/div[2]/div/div[2]/div/div/div/h1/text()')
  98. return views.to_s.strip
  99. end
  100. def get_viddler_title(url) #ok
  101. page = @agent.get url
  102. views = page.at('meta[@property="og:title"]')[:content]
  103. return views.to_s.strip
  104. end
  105. def get_veoh_title(url) #ok
  106. page = @agent.get url
  107. views = page.at('meta[@name="og:title"]')[:content]
  108. return views.to_s.strip
  109. end
  110. def get_myvideo_title(url) #ok
  111. page = @agent.get url
  112. views = page.parser.xpath('//html/body/div[6]/div[3]/div[6]/div[3]/div/div[2]/div/table/tbody/tr/td[2]/h1/text()')
  113. return views.to_s.strip
  114. end
  115. def get_kewego_title(url) #ok
  116. page = @agent.get url
  117. views = page.at('meta[@name="title"]')[:content]
  118. return views.to_s.strip
  119. end
  120. private
  121. def make_agent
  122. @agent = Mechanize.new do |a|
  123. a.user_agent_alias = 'Mac Safari'
  124. a.max_history = 1
  125. a.open_timeout = 15
  126. a.read_timeout = 5
  127. a.keep_alive = false
  128. end
  129. end
  130. end
  131. end

comments powered by Disqus