Certain BitTorrent sites I frequent publish certain torrents and I wish to automate the downloading of them, hence this script. When passed an RSS file or URL, it will parse the feed and match it against a list of regexes and download the torrents if there's a match and they haven't been downloaded before. I call it from a simple bash script that runs the script and then moves all new torrents to the rtorrent autoqueue directory.
The script uses a source file that contains regular expressions in what's called qr format. This format stores all of the options and the expression itself so that you can easily use it in Perl. See normally you write expressions like /^expression$/ig. But what if you want to store the expresion in a variable? There's nowhere to store expression options like the ig in this case. There's where qr format comes in. Here's a sample sources file for my script.
# These are my podcast expressions (?i-xsm:^my.favorite.podcast.s03e[0-2][0-9]) (?i-xsm:^another.good.podcast.s01e[0-2][0-9])
1 #!/usr/bin/perl 2 3 # 2007-04-07 4 # Version .01 5 # mroach.com 6 7 use strict; 8 use warnings; 9 10 use Getopt::Long; 11 use XML::RSS; 12 use HTTP::Cookies::Netscape; 13 use LWP::UserAgent; 14 use File::Basename; 15 use Tie::File; 16 17 sub usage() { 18 print << "EOF"; 19 usage: $0 20 Required: 21 -r, --rss-source URL or file containing the RSS feed 22 -p, --pattern-file File containing a list of regular expressions in qr format 23 -l, --fetch-log File to log successful downlaods 24 25 Optional: 26 -c, --cookie-file File containing cookies in Netscape format 27 -i, --bind-ip Bind to this IP when making HTTP requests 28 EOF 29 exit 1; 30 } 31 32 usage() unless @ARGV; 33 34 my($ip, $cookie_file, $regex_source, $rss_source, $response, $fetch_log); 35 36 GetOptions( "r|rss-source=s" => \$rss_source, 37 "p|pattern-file=s" => \$regex_source, 38 "c|cookie-file:s" => \$cookie_file, 39 "i|bind-ip:s" => \$ip, 40 "l|fetch-log=s" => \$fetch_log 41 ); 42 43 usage() unless $fetch_log; 44 usage() unless $rss_source; 45 46 die "Regex pattern file $regex_source does not exist." unless -e $regex_source; 47 48 my $ua = new LWP::UserAgent; 49 my $rss = new XML::RSS; 50 51 # Bind to the IP specified, if any. Done twice to prevent a warning. 52 if ($ip) { 53 @LWP::Protocol::http::EXTRA_SOCK_OPTS = (LocalAddr => $ip); 54 @LWP::Protocol::http::EXTRA_SOCK_OPTS = (LocalAddr => $ip); 55 } 56 57 # For sites that require you to be logged in, read the login from a cookies file 58 if ($cookie_file) { 59 my $cookie_jar = HTTP::Cookies::Netscape->new(file => $cookie_file); 60 $ua->cookie_jar($cookie_jar); 61 } 62 63 # If this is a URL source, try to grab it and parse it 64 # Otherwise it's a local file, make sure it exists and then open it and parse it 65 if ($rss_source =~ /https?:/i) { 66 my $response = $ua->get($rss_source); 67 die "Couldn't retrieve $rss_source: ", $response->status_line unless $response; 68 my $content = $response->content; 69 $rss->parse($content); 70 } else { 71 die "RSS source file $rss_source does not exist" unless -e $rss_source; 72 $rss->parsefile($rss_source); 73 } 74 75 # If the log file doesn't exist yet, create it 76 -e "$fetch_log" or system "touch", $fetch_log; 77 78 # Open the regex source file 79 open my ($regex_file), $regex_source; 80 81 # Begin iterating over each item in the RSS feed 82 foreach my $item (@{$rss->{items}}) { 83 my $url = $item->{link}; 84 my $title = $item->{title}; 85 86 # If this item exists in the fetch log, skip it 87 tie my @fetch_file, 'Tie::File', $fetch_log; 88 next if grep {m/\t\Q$title\E$/} @fetch_file; 89 90 # Iterate over each regex for matching 91 while (<$regex_file>) { 92 chomp; 93 94 # This line in the regex file is a comment, skip it 95 next if !$_ || $_ =~ m/^#/; 96 97 # See if this RSS item matches the current regex 98 if ($title =~ m/$_/) { 99 100 # Download the torrent directly to disk 101 my $torrent_name = basename($url); 102 print "Matched $title; fetching $torrent_name\n"; 103 $response = $ua->get($url, ":content_file" => $torrent_name); 104 105 if (!$response->is_success) { 106 warn "Failed to fetch $torrent_name:", $response->status_line, "\n"; 107 } else { 108 # Check to make sure this file is a torrent file 109 # If there's a non-HTTP server error, this file will be a printed error 110 chomp(my $file_type = `file -b "$torrent_name"`); 111 if ($file_type ne "BitTorrent file") { 112 warn "$torrent_name is not a BitTorrent file"; 113 } else { 114 # Add an entry to the log file about this download 115 chomp(my $date = `date +"%Y-%m-%d %H:%M:%S"`); 116 open(my $h, ">> fetch.log"); 117 print $h "$date\t$title\n"; 118 close($h); 119 } 120 } 121 } 122 } 123 124 # Rewind to the beginning of the regex source file 125 seek($regex_file, 0, 0); 126 }