ruby find video description on several video sharing platform


SUBMITTED BY: Guest

DATE: Sept. 28, 2014, 8:10 p.m.

FORMAT: Text only

SIZE: 5.0 kB

HITS: 870

  1. #!/usr/bin/env ruby
  2. # encoding: UTF-8
  3. require 'net/http'
  4. require 'uri'
  5. require 'public_suffix_list'
  6. require 'public_suffix'
  7. require 'fileutils'
  8. require 'mechanize'
  9. require 'nokogiri'
  10. require 'watir-webdriver'
  11. require 'selenium/webdriver'
  12. module VideoDescription
  13. class ViewDesc
  14. def initialize
  15. make_agent
  16. end
  17. def find_site(url)
  18. uri = URI.parse(url)
  19. domain = PublicSuffix.parse(uri.host)
  20. return domain.sld
  21. end
  22. def get_youtube_description(url) #ok
  23. page = @agent.get url
  24. views = page.parser.xpath('//*[@id="eow-description"]')
  25. return views.to_s.strip # sortie en html, supprimer les tags?
  26. end
  27. def get_dailymotion_description(url) #ok
  28. page = @agent.get url
  29. views = page.at('meta[@property="og:description"]')[:content]
  30. return views
  31. end
  32. def get_izlesene_description(url) #ok
  33. page = @agent.get url
  34. views = page.parser.xpath('//p[@id="videoDesc"]/text()')
  35. return views.to_s.strip
  36. end
  37. def get_trilulilu_description(url) #ok
  38. page = @agent.get url
  39. views = page.parser.xpath('//html/body/div[5]/div[3]/div/div/div/div[3]/div/div/div/div/div[2]/p/span/text()')
  40. return views.to_s.strip
  41. end
  42. def get_clipfish_description(url) #ok
  43. page = @agent.get url
  44. details = page.parser.xpath('//*[@id="description-head"]/text()')
  45. details2 = page.parser.xpath('//*[@id="additional-description"]/text()')
  46. views = details.to_s.strip << details2.to_s.strip
  47. return views
  48. end
  49. def get_metacafe_description(url) #ok
  50. page = @agent.get url
  51. views = page.parser.xpath('//html/body/div/div[6]/div[5]/div/div[3]/p/text()')
  52. return views.to_s.strip
  53. end
  54. def get_myspace_description(url) #ok
  55. page = @agent.get url
  56. views = page.at('meta[@property="og:description"]')[:content]
  57. return views
  58. end
  59. def get_photobucket_description(url) #ok
  60. page = @agent.get url
  61. views = page.at('meta[@property="og:description"]')[:content]
  62. return views
  63. end
  64. def get_daleplay_description(url) #ko no available vid
  65. page = @agent.get url
  66. views = page.parser.xpath('//*[@id="video_views_countv2"]/b/text()')
  67. return views.to_s.strip
  68. end
  69. def get_vxv_description(url) #ok
  70. page = @agent.get url
  71. views = page.parser.xpath('//*[@id="description-full"]/text()')
  72. return views.to_s.strip
  73. end
  74. def get_rutube_description(url) #ok
  75. page = @agent.get url
  76. views = page.at('meta[@property="og:description"]')[:content]
  77. return views
  78. end
  79. def get_smotri_description(url) #ok
  80. page = @agent.get url
  81. views = page.parser.xpath('//html/body/div[6]/div/div/div[2]/div[5]/div/div/div[2]/div/div/div[5]/span/text()')
  82. return views.to_s.strip
  83. end
  84. def get_miwim_description(url) #ok
  85. page = @agent.get url
  86. views = page.parser.xpath('//div[@id="small-rightbox-content"]/text()')
  87. return views[6].to_s.strip
  88. end
  89. def get_wat_description(url) #ok
  90. page = @agent.get url
  91. views = page.at('meta[@itemprop="description"]')[:content]
  92. return views
  93. end
  94. def get_viddler_description(url) #ok
  95. page = @agent.get url
  96. views = page.at('meta[@property="og:description"]')[:content]
  97. return views.to_s.strip
  98. end
  99. def get_veoh_description(url) #ok
  100. page = @agent.get url
  101. views = page.at('meta[@name="og:description"]')[:content]
  102. return views.to_s.strip
  103. end
  104. def get_myvideo_description(url) #ok
  105. page = @agent.get url
  106. views = page.parser.xpath('//span[@itemprop="description"]/text()')
  107. #views = page.parser.xpath('//html/body/div[6]/div[3]/div[6]/div[2]/div/div[5]/div[4]/div[3]/div[5]/div[3]/div/div[5]/text()')
  108. return views.to_s.strip
  109. end
  110. def get_kewego_description(url) #ok
  111. page = @agent.get url
  112. views = page.parser.xpath('//html/body/div/div[4]/div[2]/div/div/div[2]/div[3]/div/p/text()')
  113. return views.to_s.strip
  114. end
  115. private
  116. def make_agent
  117. @agent = Mechanize.new do |a|
  118. a.user_agent_alias = 'Mac Safari'
  119. a.max_history = 1
  120. a.open_timeout = 15
  121. a.read_timeout = 5
  122. a.keep_alive = false
  123. end
  124. end
  125. end
  126. end

comments powered by Disqus