#!/usr/bin/env ruby # encoding: UTF-8 require 'selenium/webdriver' require 'watir-webdriver' require 'nokogiri' module FakeId class Generator START_URL = "http://www.fakenamegenerator.com/" =begin rdoc In order to create a valid instance pass keywords and localisation to scrap. =end def initialize make_agent end def finalize kill_agent end =begin rdoc Connect to the main page and set elements used to perform search. =end def gostart @b.goto START_URL end def process @b.select_list(:id, "gen").select_value("female") # female @b.select_list(:id, "n").select_value("us") # fr @b.select_list(:id, "c").select_value("us") # fr @b.button(:id,"genbtn").click end =begin rdoc Parse each result page to format informations to extract. =end def get_infos_pages doc = Nokogiri::HTML.parse(@b.html) #doc.css('br').each{ |br| br.replace(" ") } name = doc.search('//html/body/div/div[2]/div/div/div/div[3]/div[2]/div[2]/div/div/h3').text return name #infos = doc.search('//div[@id="details"]').map do | row | # { #'name' => row.xpath('/div[2]/div[2]/div/div[1]/h3').text#, #'addr' => row.xpath('/div[2]/div[2]/div/div[1]/div').text, #'tel' => row.xpath('/div[2]/div[2]/div/div[2]/ul/li[2]/span').text, #'mail' => row.xpath('div[2]/div[2]/div/div[2]/ul/li[4]/span').text, #'active_mail' => row.xpath('/div[2]/div[2]/div/div[2]/ul/li[4]/div/a').text, #'login' => row.xpath('/div[2]/div[2]/div/div[2]/ul/li[6]').text, #'password' => row.xpath('/div[2]/div[2]/div/div[2]/ul/li[8]').text, #'mother_nm' => rowdoc.xpath('/div[2]/div[2]/div/div[2]/ul/li[10]').text, #'birthday' => row.xpath('/div[2]/div[2]/div/div[2]/ul/li[12]').text, #} #end #return infos end =begin rdoc Write formatted informations in output file. =end def write_infos(file, infos) infos.each do |bloc| bloc.each do |key,value| file.write value << ";" unless value.empty? end file.write "\n" end end def write_name(file, infos) file.write infos file.write "\n" end =begin rdoc Browse all results pages and extract wished informations. =end def scrape_all(file) begin infos = get_infos_pages write_name(file, infos) rescue Exception => e #puts e.message end end private def make_agent @b = Watir::Browser.new :phantomjs end def kill_agent @b.close end end end def main include FakeId id = Generator.new sentence = "id_ls.txt" fic_out =File.open(sentence,'a') id.gostart begin 150000.times do id.process id.scrape_all(fic_out) end rescue Exception => e puts e.message ensure fic_out.close id.finalize end end main