#!/usr/bin/perl

# 2007-04-07
# Version .01
# mroach.com

use strict;
use warnings;

use Getopt::Long;
use XML::RSS;
use HTTP::Cookies::Netscape;
use LWP::UserAgent;
use File::Basename;
use Tie::File;

sub usage() {
	print << "EOF";
usage: $0
Required:
  -r, --rss-source    URL or file containing the RSS feed
  -p, --pattern-file  File containing a list of regular expressions in qr format
  -l, --fetch-log     File to log successful downlaods

Optional:
  -c, --cookie-file   File containing cookies in Netscape format
  -i, --bind-ip       Bind to this IP when making HTTP requests
EOF
exit 1;
}

usage() unless @ARGV;

my($ip, $cookie_file, $regex_source, $rss_source, $response, $fetch_log);

GetOptions(	"r|rss-source=s" => \$rss_source,
		      	"p|pattern-file=s" => \$regex_source,
      			"c|cookie-file:s" => \$cookie_file,
	      		"i|bind-ip:s" => \$ip,
            "l|fetch-log=s" => \$fetch_log
);

usage() unless $fetch_log;
usage() unless $rss_source;

die "Regex pattern file $regex_source does not exist." unless -e $regex_source;

my $ua = new LWP::UserAgent;
my $rss = new XML::RSS;

# Bind to the IP specified, if any
if ($ip) {
	@LWP::Protocol::http::EXTRA_SOCK_OPTS = (LocalAddr => $ip);
	@LWP::Protocol::http::EXTRA_SOCK_OPTS = (LocalAddr => $ip);
}

# For sites that require you to be logged in, read the login from a cookies file
if ($cookie_file) {
	my $cookie_jar = HTTP::Cookies::Netscape->new(file => $cookie_file);
	$ua->cookie_jar($cookie_jar);
}

# If this is a URL source, try to grab it and parse it
# Otherwise it's a local file, make sure it exists and then open it and parse it
if ($rss_source =~ /https?:/i) {
	my $response = $ua->get($rss_source);
	die "Couldn't retrieve $rss_source: ", $response->status_line unless $response;
	my $content = $response->content;
	$rss->parse($content);
} else {
	die "RSS source file $rss_source does not exist" unless -e $rss_source;
	$rss->parsefile($rss_source);
}

# If the log file doesn't exist yet, create it
-e "$fetch_log" or system "touch", $fetch_log;

# Open the regex source file
open my ($regex_file), $regex_source;

# Begin iterating over each item in the RSS feed
foreach my $item (@{$rss->{items}}) {
	my $url = $item->{link};
	my $title = $item->{title};

  # If this item exists in the fetch log, skip it
	tie my @fetch_file, 'Tie::File', $fetch_log;
	next if grep {m/\t\Q$title\E$/} @fetch_file;

  # Iterate over each regex for matching
	while (<$regex_file>) {
		chomp;

    # This line in the regex file is a comment, skip it
		next if !$_ || $_ =~ m/^#/;

    # See if this RSS item matches the current regex
		if ($title =~ m/$_/) {

      # Download the torrent directly to disk
			my $torrent_name = basename($url);
			print "Matched $title; fetching $torrent_name\n";
			$response = $ua->get($url, ":content_file" => $torrent_name);
      
			if (!$response->is_success) {
				warn "Failed to fetch $torrent_name:", $response->status_line, "\n";	
			} else {
        # Check to make sure this file is a torrent file
        # If there's a non-HTTP server error, this file will be a printed error
				chomp(my $file_type = `file -b "$torrent_name"`);
				if ($file_type ne "BitTorrent file") {
					warn "$torrent_name is not a BitTorrent file";
				} else {
          # Add an entry to the log file about this download
					chomp(my $date = `date +"%Y-%m-%d %H:%M:%S"`);
					open(my $h, ">> fetch.log");
					print $h "$date\t$title\n";
					close($h);
				}
			}
		}
	}

  # Rewind to the beginning of the regex source file
	seek($regex_file, 0, 0);
}
