Amazon BOT !


SUBMITTED BY: Guest

DATE: Aug. 3, 2014, 4:42 p.m.

FORMAT: JavaScript

SIZE: 14.8 kB

HITS: 24812

  1. var async = require('async');
  2. var request = require('request');
  3. var cheerio = require('cheerio');
  4. var User = require('../app/models/user');
  5. var csv = require('./csv');
  6. require('../config/vars');
  7. function getPrice(priceStr){
  8. var priceRg = /(?:-|\.|,|\d)*$/ig;
  9. var res = priceRg.exec(priceStr);
  10. var price = parseFloat(res[0]);
  11. price = price?price:0;
  12. return price;
  13. }
  14. exports.scrapAmazonItems = function(req, checkboxes, ebayData, callback) {
  15. // Get user settings
  16. var paypalEmail = "";
  17. var postcode = "";
  18. var ebayCountry = "";
  19. User.findOne({
  20. _id: req.user._id
  21. }, function(err, user) {
  22. if (!err) {
  23. paypalEmail = user.local.settings.paypalEmail;
  24. postcode = user.local.settings.zip;
  25. ebayCountry = user.local.settings.ebayCountry;
  26. }
  27. });
  28. var list = [];
  29. async.forEach(checkboxes, function(item, callback) { //The second argument (callback) is the "task callback" for a specific messageId
  30. // If item (URL) is undefined, ignore this loop
  31. if (typeof item === undefined) {
  32. callback();
  33. }
  34. var url = item;
  35. // var newURL = url.replace(/\/dp\//, "/dp/a/");
  36. var newURL = url;
  37. console.log("url", newURL);
  38. var newEbay = JSON.parse(ebayData[item]);
  39. // images
  40. //var re = /"large":"([^"]*)"/g;
  41. var testRe = /http:\/\/ecx\.images\-amazon\.com\/images\/I/ig;
  42. var reHiRes = /"hiRes":"(.*?)"|"hiRes":()null/ig;
  43. var reLarge = /"large":"(.*?)"|"large":()null/ig;
  44. // var re = /<div id="thumb_image_.*?" class="thumb.*?"><img alt="" src="(http:\/\/ecx\.images-amazon\.com\/images\/I\/.*?)"><\/div>/ig;
  45. // end images
  46. request(newURL, function(err, response, html) {
  47. console.log("newURL", newURL);
  48. if (err) {
  49. console.log(err)
  50. } else {
  51. var $ = cheerio.load(html);
  52. // images
  53. var imgs = [];
  54. /*while(true){
  55. var res = re.exec(html);
  56. if(!res){
  57. break;
  58. }
  59. if(res[1]){
  60. var img = res[1].replace(/\.[^\.]*?\.jpg/i,"._SL500_.jpg");
  61. imgs.push(img);
  62. }
  63. }
  64. console.log("----- Images ----: ");
  65. console.log(imgs);*/
  66. $('script').each(function(index, el) {
  67. var js = $(this).text();
  68. if(testRe.test(js)){
  69. // find hires images
  70. var hiresImgs = [];
  71. while(true){
  72. var res = reHiRes.exec(js);
  73. console.log(res);
  74. if(!res){
  75. break;
  76. }
  77. hiresImgs.push(res[1] || res[2]);
  78. }
  79. // find large images
  80. var largeImgs = [];
  81. while(true){
  82. var res = reLarge.exec(js);
  83. if(!res){
  84. break;
  85. }
  86. largeImgs.push(res[1] || res[2]);
  87. }
  88. // merge image, hires higher priority
  89. for(var i=0;i<hiresImgs.length;i++){
  90. if(hiresImgs[i]){
  91. imgs.push(hiresImgs[i]);
  92. }else if(largeImgs[i]){
  93. imgs.push(largeImgs[i]);
  94. }
  95. }
  96. return ;
  97. }
  98. });
  99. console.log("images", imgs);
  100. // end images
  101. // Remove images
  102. $('.leftImage').remove();
  103. $('.rightImage').remove();
  104. // Remove seeAll link
  105. $('div.seeAll').remove();
  106. // Remove duplicate product description
  107. $('h3.productDescriptionSource').remove();
  108. var d = $('#btAsinTitle').children().text();
  109. var l = $('.priceLarge').text();
  110. var j = {};
  111. $('#thumbs-image a').each(function() {
  112. j[$(this).children().attr('src')] = []
  113. });
  114. var p = $('#technicalProductFeaturesATF').children('ul').html();
  115. var s = $('#productDescription').html();
  116. if (!d) {
  117. d = $("#productTitle").text();
  118. l = $("#priceblock_ourprice").text();
  119. // j = JSON.parse($("#imgTagWrapperId").children().attr('data-a-dynamic-image'));
  120. j = imgs[0];
  121. p = $('#feature-bullets').children('ul').html() ? $('#feature-bullets').children('ul').html() : $('#feature-bullets').children().children().html();
  122. //s = $(".techD").html();
  123. // get product description
  124. var iframeRg = /var iframeContent = "(.*?)";/ig;
  125. var iframeRs = iframeRg.exec(html);
  126. if(iframeRs && iframeRs[1]){
  127. var iframeContent = decodeURIComponent(iframeRs[1]);
  128. var $desc = cheerio.load(iframeContent);
  129. // Remove seeAll link
  130. $desc('div.seeAll').remove();
  131. // Remove duplicate product description
  132. $desc('h3.productDescriptionSource').remove();
  133. //s = $desc('#productDescription').html();
  134. s = $desc('#productDescription div.productDescriptionWrapper').html();
  135. }else{
  136. s = $('#productDescription div.productDescriptionWrapper').html();
  137. }
  138. }
  139. s.replace(/(\r\n|\n|\r)/gm,"");
  140. // Replace non-alpha numeric characters from title with a space
  141. d = d.replace(/([^0-9A-Za-z\.\s]+)/g, '').replace(/(\s\s)/, '').trim();
  142. p = p ? '"' + p.replace(/\n/g, '').replace(/\t/g, '&nbsp; &nbsp; &nbsp; &nbsp;') + '"' : '';
  143. l = l.replace('�', '£');
  144. // Replace new lines
  145. s = s ? s.replace(/\n/g, '') : '';
  146. var countries = req.app.locals.ebayCountries;
  147. var ebaySiteID = '';
  148. //console.log("countries", countries);
  149. for (index in countries) {
  150. ///console.log("ebayCountry", ebayCountry);
  151. if (countries[index].value === ebayCountry) {
  152. //console.log("match found - " + countries[index].ebaySiteID);
  153. ebaySiteID = countries[index].ebaySiteID;
  154. ebayCurrency = countries[index].ebayCurrency;
  155. ebayCountryText = countries[index].ebayCountry;
  156. }
  157. //console.log(countries[index].label);
  158. }
  159. var n = {
  160. d: d,
  161. l: l,
  162. j: j,
  163. p: p,
  164. s: s,
  165. url: url,
  166. postcode: postcode,
  167. paypalEmail: paypalEmail,
  168. ebaySiteID: ebaySiteID,
  169. ebayCurrency: ebayCurrency,
  170. ebayPrice: newEbay.eprice
  171. };
  172. list.push(n);
  173. }
  174. callback();
  175. });
  176. }, function(err) { //This is the final callback
  177. csv.createCSV(list, callback);
  178. });
  179. }
  180. exports.scrapAmazonList = function(req, list, options, callback) {
  181. var profitFrom = parseFloat(req.body.profitFrom);
  182. var profitTo = parseFloat(req.body.profitTo);
  183. // Get user settings
  184. var paypalFees = 0;
  185. var eBayFees = 0;
  186. User.findOne({
  187. _id: req.user._id
  188. }, function(err, user) {
  189. //console.log("user findone");
  190. if (!err) {
  191. paypalFees = user.local.settings.paypalFee;
  192. eBayFees = user.local.settings.ebayListingFee;
  193. }
  194. });
  195. var i = 0;
  196. var newList = [];
  197. list = list.slice(0,20); // for fast test
  198. /* console.log("list.length");
  199. console.log(list.length);*/
  200. console.log("list length", list.length)
  201. async.forEach(list, function(item, callback) { //The second argument (callback) is the "task callback" for a specific messageId
  202. var amazonURL = getAmazonURL(req.session.ebayCountry);
  203. var url = amazonURL + 's/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=' + item.text;
  204. i++;
  205. request(url, function(err, response, html) {
  206. if (err) {
  207. console.log("error")
  208. callback(err);
  209. } else {
  210. console.log("else");
  211. var $ = cheerio.load(html);
  212. item.amazonTitle = $("#result_0 .bold").html();
  213. item.amazonPrice = $("#result_0 .red").html();
  214. item.aprice = item.amazonPrice ? parseFloat(item.amazonPrice.replace(/^\D+/g, '')) : 0;
  215. item.eprice = item.price ? parseFloat(item.price.replace(/^\D+/g, '')) : 0;
  216. item.aurl = $("#result_0 .newaps").children().attr('href');
  217. var paypalFeesAsPercentage = (parseFloat(paypalFees) / 100);
  218. //console.log("price", item.price);
  219. //console.log("eprice", item.eprice);
  220. item.paypalFees = Math.round((parseFloat(item.eprice) * paypalFeesAsPercentage) * 100) / 100;
  221. item.ebayFees = Math.round(((parseFloat(item.eprice) / 10) + parseFloat(eBayFees)) * 100) / 100;
  222. // item.profit1 = Math.round(((parseFloat(item.eprice) - (item.paypalFees + item.ebayFees)) - parseFloat(item.aprice)) * 100) / 100;
  223. /*console.log("---------------- 1111111 --------------");
  224. console.log(item.eprice);
  225. console.log(item.paypalFees);
  226. console.log(item.ebayFees);
  227. console.log(item.aprice);
  228. console.log("---------------- 2222222 --------------");
  229. console.log(parseFloat(item.eprice));
  230. console.log(item.paypalFees + item.ebayFees);
  231. console.log(getPrice(item.aprice));*/
  232. item.profit = Math.round(((parseFloat(item.eprice) - (item.paypalFees + item.ebayFees)) - getPrice(item.aprice)) * 100) / 100;
  233. var currencySymbol = '';
  234. var countries = req.app.locals.ebayCountries;
  235. for(var i=0;i<countries.length;i++){
  236. if(countries[i].value == req.body.ebayCountry){
  237. currencySymbol = countries[i]['currencySymbol'];
  238. }
  239. }
  240. item.paypalFees = currencySymbol + item.paypalFees.toFixed(2);
  241. item.ebayFees = currencySymbol + item.ebayFees.toFixed(2);
  242. var profit = item.profit.toFixed(2);
  243. item.profit = currencySymbol + item.profit.toFixed(2);
  244. item.ebayData = JSON.stringify(item);
  245. // If searching for all items OR
  246. if (options.listingsType == "all" || (options.listingsType != "all" && profit > 0.1)) {
  247. //console.log("item", item);
  248. newList.push(item);
  249. }
  250. }
  251. console.log("calling callback " + i)
  252. callback();
  253. console.log("here");
  254. });
  255. }, function(err) { //This is the final callback
  256. if (err) return callback(err, null);
  257. console.log("entering final callback");
  258. // filter list
  259. var resultList = [];
  260. for(var i=0;i<newList.length;i++){
  261. var priceRg = /(?:-|\.|,|\d)*$/ig;
  262. var res = priceRg.exec(newList[i].profit);
  263. var profit = parseFloat(res[0]);
  264. if(profitFrom != "" && profit < profitFrom){
  265. continue;
  266. }
  267. if(profitTo != "" && profit > profitTo){
  268. continue;
  269. }
  270. resultList.push(newList[i]);
  271. }
  272. console.log("calling final callback")
  273. callback(err, resultList);
  274. });
  275. }
  276. function getAmazonURL(country) {
  277. var url = '';
  278. switch (country) {
  279. case 'at':
  280. url = 'http://www.amazon.at/';
  281. break;
  282. case 'ca':
  283. url = 'http://www.amazon.ca/';
  284. break;
  285. case 'fr':
  286. url = 'http://www.amazon.fr/';
  287. break;
  288. case 'it':
  289. url = 'http://www.amazon.it/';
  290. break;
  291. case 'es':
  292. url = 'http://www.amazon.es/';
  293. break;
  294. case 'de':
  295. url = 'http://www.amazon.de/';
  296. break;
  297. case 'us':
  298. url = 'http://www.amazon.com/';
  299. break;
  300. default:
  301. url = 'http://www.amazon.co.uk/';
  302. break;
  303. }
  304. return url;
  305. }

comments powered by Disqus