#!/usr/bin/env ruby
# encoding: UTF-8
require 'selenium/webdriver'
require 'watir-webdriver'
require 'nokogiri'
module FakeId
class Generator
START_URL = "http://www.fakenamegenerator.com/"
=begin rdoc
In order to create a valid instance pass keywords and localisation to scrap.
=end
def initialize
make_agent
end
def finalize
kill_agent
end
=begin rdoc
Connect to the main page and set elements used to perform search.
=end
def gostart
@b.goto START_URL
end
def process
@b.select_list(:id, "gen").select_value("female") # female
@b.select_list(:id, "n").select_value("us") # fr
@b.select_list(:id, "c").select_value("us") # fr
@b.button(:id,"genbtn").click
end
=begin rdoc
Parse each result page to format informations to extract.
=end
def get_infos_pages
doc = Nokogiri::HTML.parse(@b.html)
#doc.css('br').each{ |br| br.replace(" ") }
name = doc.search('//html/body/div/div[2]/div/div/div/div[3]/div[2]/div[2]/div/div/h3').text
return name
#infos = doc.search('//div[@id="details"]').map do | row |
# {
#'name' => row.xpath('/div[2]/div[2]/div/div[1]/h3').text#,
#'addr' => row.xpath('/div[2]/div[2]/div/div[1]/div').text,
#'tel' => row.xpath('/div[2]/div[2]/div/div[2]/ul/li[2]/span').text,
#'mail' => row.xpath('div[2]/div[2]/div/div[2]/ul/li[4]/span').text,
#'active_mail' => row.xpath('/div[2]/div[2]/div/div[2]/ul/li[4]/div/a').text,
#'login' => row.xpath('/div[2]/div[2]/div/div[2]/ul/li[6]').text,
#'password' => row.xpath('/div[2]/div[2]/div/div[2]/ul/li[8]').text,
#'mother_nm' => rowdoc.xpath('/div[2]/div[2]/div/div[2]/ul/li[10]').text,
#'birthday' => row.xpath('/div[2]/div[2]/div/div[2]/ul/li[12]').text,
#}
#end
#return infos
end
=begin rdoc
Write formatted informations in output file.
=end
def write_infos(file, infos)
infos.each do |bloc|
bloc.each do |key,value|
file.write value << ";" unless value.empty?
end
file.write "\n"
end
end
def write_name(file, infos)
file.write infos
file.write "\n"
end
=begin rdoc
Browse all results pages and extract wished informations.
=end
def scrape_all(file)
begin
infos = get_infos_pages
write_name(file, infos)
rescue Exception => e
#puts e.message
end
end
private
def make_agent
@b = Watir::Browser.new :phantomjs
end
def kill_agent
@b.close
end
end
end
def main
include FakeId
id = Generator.new
sentence = "id_ls.txt"
fic_out =File.open(sentence,'a')
id.gostart
begin
150000.times do
id.process
id.scrape_all(fic_out)
end
rescue Exception => e
puts e.message
ensure
fic_out.close
id.finalize
end
end
main