<?php
// Subreddit image grabber!
// By Kyle Barron-Kraus
// Settings
$subreddit = "pics"; // Subreddit to rip
$savedir = "./pics"; // Directory relative to current. Folder must exist.
$limit = 0; // Limit number of images to rip. 0 for no limit.
$startpage = 1; // Start on certain page (usually can be left at 1)
$after = ''; // Start after specific post (will be treated as page 1)
// Blacklist - Copy and paste line to block more domains
$blacklist[] = "example.com";
$blacklist[] = "example.net";
// Don't Touch below here!
//-------------------------------------------------------------------------------------------//
if (!is_dir($savedir))
die("Please make sure the save directory exists!");
$downloads = 0;
if (!$blacklist)
$blacklist = array();
// Disable 404 errors
error_reporting(E_ERROR);
function run()
{
global $subreddit, $savedir, $limit, $downloads, $startpage, $after;
$afterurl = '';
$runagain = true;
$page = 1;
echo "Ripping subreddit ".$subreddit."...\n\n";
while ($runagain == true)
{
echo "----------------------Page ".$page."----------------------\n";
$tries = 0;
$tryagain = true;
while ($tries < 5 && $tryagain == true)
{
if ($after != '')
$afterurl = "?count=1&after=".$after;
$jsonurl = 'http://www.reddit.com/r/'.$subreddit.'/.json'.$afterurl;
$pagejson = file_get_contents($jsonurl);
$parsedjson = json_decode($pagejson,true);
if (count($parsedjson['data']['children']) == 0)
{
$tryagain = true;
$tries++;
}
else
$tryagain = false;
}
if ($page >= $startpage)
{
foreach($parsedjson['data']['children'] as $item)
{
if ($limit != 0 && $downloads >= $limit)
{
$runagain = false;
}
else
{
if (is_image($item['data']['url']) && !is_blacklisted($item['data']['domain']))
{
download_file($item['data']['url'], $item['data']['title']);
}
elseif (is_imgur($item['data']['url']) && !is_blacklisted($item['data']['domain']))
{
download_file(is_imgur($item['data']['url']), $item['data']['title']);
}
}
}
}
else
{
echo "Skipping Page...\n";
sleep(2);
}
if ($parsedjson['data']['after'] == '' || $parsedjson['data']['after'] == null)
{
$runagain = false;
}
else
{
if ($runagain != false)
{
$lastitem = end($parsedjson['data']['children']);
$after = $lastitem['data']['name'];
$runagain = true;
}
}
$page++;
}
echo "Done!\n";
}
function is_image($url)
{
if (preg_match("/^.*\.(jpg|jpeg|png|gif)$/i", $url))
return true;
else
return false;
}
function is_imgur($url)
{
if (preg_match("/^http:\/\/imgur.com\/[a-zA-Z0-9]+/i", $url) || preg_match("/^http:\/\/www.imgur.com\/[a-zA-Z0-9]+/i", $url))
{
$imageurl = str_replace("http://imgur.com/","",$url);
$imageurl = str_replace("http://www.imgur.com/","",$imageurl);
$imageurl = "http://i.imgur.com/".$imageurl.".jpg";
return $imageurl;
}
else
return false;
}
function is_blacklisted($domain)
{
global $blacklist;
$match = false;
foreach ($blacklist as $blacklistdomain)
{
if (strpos($domain,$blacklistdomain))
$match = true;
}
return $match;
}
function download_file($url, $title)
{
global $savedir, $downloads;
$filename = create_filename($title, $url);
$i = 1;
while (file_exists($savdir."/".$filename))
{
$filename = create_filename($title,$url,$i);
$i++;
}
$contents = file_get_contents($url);
if ($contents != '')
{
file_put_contents($savedir."/".$filename, $contents);
if (check_image($filename))
{
$downloads++;
echo "File ".$downloads.": ".$title."\n";
return true;
}
else
{
echo "Corrupt Image: ".$title."\n";
}
}
else
return false;
}
function check_image($filename)
{
global $savedir;
$check = getimagesize($savedir."/".$filename);
if (preg_match("/^image\/.+/", $check['mime']))
return true;
else
return false;
}
// Sanitization from Wordpress codebase
function create_filename($filename, $url, $num=0)
{
$filename = strip_tags($filename);
$filename = preg_replace('|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $filename);
$filename = str_replace('%', '', $filename);
$filename = preg_replace('|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $filename);
$filename = strtolower($filename);
$filename = preg_replace('/&.+?;/', '', $filename);
$filename = str_replace('.', '-', $filename);
$filename = preg_replace('/[^%a-z0-9 _-]/', '', $filename);
$filename = preg_replace('/\s+/', '-', $filename);
$filename = preg_replace('|-+|', '-', $filename);
$filename = trim($filename, '-');
$extension = preg_match("/^.*\.(jpg|jpeg|png|gif)$/i", $url, $match);
if ($num > 0)
return $filename."-".$num.".".$match[1];
else
return $filename.".".$match[1];
}
run();
?>