subreddit image grabber php


SUBMITTED BY: Guest

DATE: Aug. 23, 2012, 12:05 a.m.

FORMAT: PHP

SIZE: 5.7 kB

HITS: 1633

  1. <?php
  2. // Subreddit image grabber!
  3. // By Kyle Barron-Kraus
  4. // Settings
  5. $subreddit = "pics"; // Subreddit to rip
  6. $savedir = "./pics"; // Directory relative to current. Folder must exist.
  7. $limit = 0; // Limit number of images to rip. 0 for no limit.
  8. $startpage = 1; // Start on certain page (usually can be left at 1)
  9. $after = ''; // Start after specific post (will be treated as page 1)
  10. // Blacklist - Copy and paste line to block more domains
  11. $blacklist[] = "example.com";
  12. $blacklist[] = "example.net";
  13. // Don't Touch below here!
  14. //-------------------------------------------------------------------------------------------//
  15. if (!is_dir($savedir))
  16. die("Please make sure the save directory exists!");
  17. $downloads = 0;
  18. if (!$blacklist)
  19. $blacklist = array();
  20. // Disable 404 errors
  21. error_reporting(E_ERROR);
  22. function run()
  23. {
  24. global $subreddit, $savedir, $limit, $downloads, $startpage, $after;
  25. $afterurl = '';
  26. $runagain = true;
  27. $page = 1;
  28. echo "Ripping subreddit ".$subreddit."...\n\n";
  29. while ($runagain == true)
  30. {
  31. echo "----------------------Page ".$page."----------------------\n";
  32. $tries = 0;
  33. $tryagain = true;
  34. while ($tries < 5 && $tryagain == true)
  35. {
  36. if ($after != '')
  37. $afterurl = "?count=1&after=".$after;
  38. $jsonurl = 'http://www.reddit.com/r/'.$subreddit.'/.json'.$afterurl;
  39. $pagejson = file_get_contents($jsonurl);
  40. $parsedjson = json_decode($pagejson,true);
  41. if (count($parsedjson['data']['children']) == 0)
  42. {
  43. $tryagain = true;
  44. $tries++;
  45. }
  46. else
  47. $tryagain = false;
  48. }
  49. if ($page >= $startpage)
  50. {
  51. foreach($parsedjson['data']['children'] as $item)
  52. {
  53. if ($limit != 0 && $downloads >= $limit)
  54. {
  55. $runagain = false;
  56. }
  57. else
  58. {
  59. if (is_image($item['data']['url']) && !is_blacklisted($item['data']['domain']))
  60. {
  61. download_file($item['data']['url'], $item['data']['title']);
  62. }
  63. elseif (is_imgur($item['data']['url']) && !is_blacklisted($item['data']['domain']))
  64. {
  65. download_file(is_imgur($item['data']['url']), $item['data']['title']);
  66. }
  67. }
  68. }
  69. }
  70. else
  71. {
  72. echo "Skipping Page...\n";
  73. sleep(2);
  74. }
  75. if ($parsedjson['data']['after'] == '' || $parsedjson['data']['after'] == null)
  76. {
  77. $runagain = false;
  78. }
  79. else
  80. {
  81. if ($runagain != false)
  82. {
  83. $lastitem = end($parsedjson['data']['children']);
  84. $after = $lastitem['data']['name'];
  85. $runagain = true;
  86. }
  87. }
  88. $page++;
  89. }
  90. echo "Done!\n";
  91. }
  92. function is_image($url)
  93. {
  94. if (preg_match("/^.*\.(jpg|jpeg|png|gif)$/i", $url))
  95. return true;
  96. else
  97. return false;
  98. }
  99. function is_imgur($url)
  100. {
  101. if (preg_match("/^http:\/\/imgur.com\/[a-zA-Z0-9]+/i", $url) || preg_match("/^http:\/\/www.imgur.com\/[a-zA-Z0-9]+/i", $url))
  102. {
  103. $imageurl = str_replace("http://imgur.com/","",$url);
  104. $imageurl = str_replace("http://www.imgur.com/","",$imageurl);
  105. $imageurl = "http://i.imgur.com/".$imageurl.".jpg";
  106. return $imageurl;
  107. }
  108. else
  109. return false;
  110. }
  111. function is_blacklisted($domain)
  112. {
  113. global $blacklist;
  114. $match = false;
  115. foreach ($blacklist as $blacklistdomain)
  116. {
  117. if (strpos($domain,$blacklistdomain))
  118. $match = true;
  119. }
  120. return $match;
  121. }
  122. function download_file($url, $title)
  123. {
  124. global $savedir, $downloads;
  125. $filename = create_filename($title, $url);
  126. $i = 1;
  127. while (file_exists($savdir."/".$filename))
  128. {
  129. $filename = create_filename($title,$url,$i);
  130. $i++;
  131. }
  132. $contents = file_get_contents($url);
  133. if ($contents != '')
  134. {
  135. file_put_contents($savedir."/".$filename, $contents);
  136. if (check_image($filename))
  137. {
  138. $downloads++;
  139. echo "File ".$downloads.": ".$title."\n";
  140. return true;
  141. }
  142. else
  143. {
  144. echo "Corrupt Image: ".$title."\n";
  145. }
  146. }
  147. else
  148. return false;
  149. }
  150. function check_image($filename)
  151. {
  152. global $savedir;
  153. $check = getimagesize($savedir."/".$filename);
  154. if (preg_match("/^image\/.+/", $check['mime']))
  155. return true;
  156. else
  157. return false;
  158. }
  159. // Sanitization from Wordpress codebase
  160. function create_filename($filename, $url, $num=0)
  161. {
  162. $filename = strip_tags($filename);
  163. $filename = preg_replace('|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $filename);
  164. $filename = str_replace('%', '', $filename);
  165. $filename = preg_replace('|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $filename);
  166. $filename = strtolower($filename);
  167. $filename = preg_replace('/&.+?;/', '', $filename);
  168. $filename = str_replace('.', '-', $filename);
  169. $filename = preg_replace('/[^%a-z0-9 _-]/', '', $filename);
  170. $filename = preg_replace('/\s+/', '-', $filename);
  171. $filename = preg_replace('|-+|', '-', $filename);
  172. $filename = trim($filename, '-');
  173. $extension = preg_match("/^.*\.(jpg|jpeg|png|gif)$/i", $url, $match);
  174. if ($num > 0)
  175. return $filename."-".$num.".".$match[1];
  176. else
  177. return $filename.".".$match[1];
  178. }
  179. run();
  180. ?>

comments powered by Disqus