<?php

// Subreddit image grabber!
// By Kyle Barron-Kraus

// Settings

$subreddit = "pics"; // Subreddit to rip
$savedir = "./pics"; // Directory relative to current. Folder must exist.
$limit = 0;             // Limit number of images to rip. 0 for no limit.
$startpage = 1;         // Start on certain page (usually can be left at 1)
$after = '';            // Start after specific post (will be treated as page 1)

// Blacklist - Copy and paste line to block more domains
$blacklist[] = "example.com";
$blacklist[] = "example.net";




// Don't Touch below here!
//-------------------------------------------------------------------------------------------//

if (!is_dir($savedir))
	die("Please make sure the save directory exists!");

$downloads = 0;
if (!$blacklist)
	$blacklist = array();

// Disable 404 errors
error_reporting(E_ERROR);
	
function run()
{
	global $subreddit, $savedir, $limit, $downloads, $startpage, $after;
	
	$afterurl = '';
	$runagain = true;
	$page = 1;
	
	echo "Ripping subreddit ".$subreddit."...\n\n";
	
	while ($runagain == true)
	{
		echo "----------------------Page ".$page."----------------------\n";
		
		$tries = 0;
		$tryagain = true;
		
		while ($tries < 5 && $tryagain == true)
		{
			if ($after != '')
				$afterurl = "?count=1&after=".$after;
			$jsonurl = 'http://www.reddit.com/r/'.$subreddit.'/.json'.$afterurl;
			$pagejson = file_get_contents($jsonurl);
			$parsedjson = json_decode($pagejson,true);
			
			if (count($parsedjson['data']['children']) == 0)
			{
				$tryagain = true;
				$tries++;
			}
			else
				$tryagain = false;
		}
		
		
		if ($page >= $startpage)
		{
			foreach($parsedjson['data']['children'] as $item)
			{
				if ($limit != 0 && $downloads >= $limit)
				{
					$runagain = false;
				}
				else
				{
					if (is_image($item['data']['url']) && !is_blacklisted($item['data']['domain']))
					{
						download_file($item['data']['url'], $item['data']['title']);
					}
					elseif (is_imgur($item['data']['url']) && !is_blacklisted($item['data']['domain']))
					{
						download_file(is_imgur($item['data']['url']), $item['data']['title']);
					}
				}
			}
		}
		else
		{
			echo "Skipping Page...\n";
			sleep(2);
		}
		
		if ($parsedjson['data']['after'] == '' || $parsedjson['data']['after'] == null)
		{
			$runagain = false;
		}
		else
		{	
			if ($runagain != false)
			{
				$lastitem = end($parsedjson['data']['children']);
				$after = $lastitem['data']['name'];
				$runagain = true;
			}
		}
		$page++;
	}
	
	echo "Done!\n";
}

function is_image($url)
{
	if (preg_match("/^.*\.(jpg|jpeg|png|gif)$/i", $url))
		return true;
	else
		return false;
}

function is_imgur($url)
{
	if (preg_match("/^http:\/\/imgur.com\/[a-zA-Z0-9]+/i", $url) || preg_match("/^http:\/\/www.imgur.com\/[a-zA-Z0-9]+/i", $url))
	{
		$imageurl = str_replace("http://imgur.com/","",$url);
		$imageurl = str_replace("http://www.imgur.com/","",$imageurl);
		$imageurl = "http://i.imgur.com/".$imageurl.".jpg";
		return $imageurl;
	}
	else
		return false;
}

function is_blacklisted($domain)
{
	global $blacklist;
	
	$match = false;
	
	foreach ($blacklist as $blacklistdomain)
	{
		if (strpos($domain,$blacklistdomain))
			$match = true;
	}
	
	return $match;
}

function download_file($url, $title)
{
	global $savedir, $downloads;
	
	$filename = create_filename($title, $url);
	$i = 1;
	while (file_exists($savdir."/".$filename))
	{
		$filename = create_filename($title,$url,$i);
		$i++;
	}
	
	$contents = file_get_contents($url);
	
	if ($contents != '')
	{
		file_put_contents($savedir."/".$filename, $contents);
		if (check_image($filename))
		{
			$downloads++;
			echo "File ".$downloads.": ".$title."\n";
			return true;
		}
		else
		{
			echo "Corrupt Image: ".$title."\n";
		}
	}
	else
		return false;
}

function check_image($filename)
{
	global $savedir;
	
	$check = getimagesize($savedir."/".$filename);
	
	if (preg_match("/^image\/.+/", $check['mime']))
		return true;
	else
		return false;
}

// Sanitization from Wordpress codebase
function create_filename($filename, $url, $num=0)
{
	$filename = strip_tags($filename);
	$filename = preg_replace('|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $filename);
	$filename = str_replace('%', '', $filename);
	$filename = preg_replace('|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $filename);
	$filename = strtolower($filename);
	$filename = preg_replace('/&.+?;/', '', $filename);
	$filename = str_replace('.', '-', $filename);
	$filename = preg_replace('/[^%a-z0-9 _-]/', '', $filename);
	$filename = preg_replace('/\s+/', '-', $filename);
	$filename = preg_replace('|-+|', '-', $filename);
	$filename = trim($filename, '-');
	$extension = preg_match("/^.*\.(jpg|jpeg|png|gif)$/i", $url, $match);
	if ($num > 0)
		return $filename."-".$num.".".$match[1];
	else
		return $filename.".".$match[1];
}

run();

?>