#!/usr/bin/perl # baygrab.pl v0.2 # Josh Jackson # Changelog # ######### # # # 2007.06.26 - 0.2 # download either tags or albums # add cookie to allow downloading 'offensive' images # # 2007.06.22 - 0.1 # downloading based on tags only # sort into album subdirectories # use strict; use LWP::UserAgent; use HTTP::Cookies; my($mode) = 'tag'; my($album, $tag); if ($ARGV[0] =~ /-album/) { $mode = 'album'; $album = $ARGV[1]; print "grabbing album $album\n"; } elsif ($ARGV[0] =~ /-tag/) { $tag = $ARGV[1]; print "grabbing tag $tag\n"; } else { $tag = $ARGV[0]; print "grabbing tag $tag\n"; } my($filecounter); sub init_agent { my($ua) = new LWP::UserAgent; $ua->agent("Mozilla/4.0 (compatible; IAmTheWalrus 5.5; OpenBDSM)"); return $ua; } sub request_cookies { my($request) = $_[0]; my($cookie_jar) = HTTP::Cookies->new; my(@rest) = ("/", "bayimg.com", undef, 0, 0, 60*60, 0); $cookie_jar->set_cookie(undef, 'show_offensive', 'true', @rest); $cookie_jar->add_cookie_header($request); return $request; } sub get_to_file { my $url = $_[0]; my $file_name = $_[1]; my($ua) = init_agent(); my($request) = request_cookies(HTTP::Request->new(GET => $url)); my $res = $ua->request($request); open(FH, ">$file_name"); print FH $res->content;; close(FH); } sub get_to_string { my($ua) = init_agent(); my($url) = $_[0]; my($request) = request_cookies(HTTP::Request->new(GET => $url)); my $res = $ua->request($request); return $res->as_string; } sub post_to_string { my($ua) = init_agent(); my $posturl = $_[0]; my $postvars = $_[1]; my $req = request_cookies(HTTP::Request->new(POST => $posturl)); $req->content_type('application/x-www-form-urlencoded'); $req->content($postvars); my $res = $ua->request($req); #print "requested address $posturl with vars $postvars\n"; my($ret) = $res->as_string; #print "result is:\n$ret\n"; return $ret; } sub fetchtag_page { my($tag) = $_[0]; my($pagenum) = $_[1]; if ($pagenum == '') { $pagenum = 1; } return post_to_string('http://bayimg.com/ajax_tags.php', "page=$pagenum&tag=$tag"); } sub fetchalbum_page { my($album) = $_[0]; my($pagenum) = $_[1]; if ($pagenum == '') { $pagenum = 1; } return post_to_string('http://bayimg.com/ajax_album.php', "page=$pagenum&id=$album"); } sub thumbpage_to_imagepages { my($x) = $_[0]; my(@url_list) = (); my(@paths) = ($x =~ /href\=\"\/.........\"/g); foreach(@paths) { push(@url_list, substr($_, 7, 9)); } return @url_list; } sub before_first { my(@parts) = split(/$_[0]/, $_[1]); return $parts[0]; } sub after_first { my(@parts) = split(/$_[0]/, $_[1]); return $parts[1]; } sub imagepage_imageurl { my($x) = $_[0]; $x =~ /src\=\"(.*)\"\ id\=\"mainImage\"/; return $1; } sub imagepage_namefromtitle { my($x) = $_[0]; $x =~ /\bayimg\ -\ image\:\ (.*)\ -\ free\ uncensored\ image\ hosting\<\/title\>/; #print "title found as $1\n"; return $1; } sub imagepage_namefrominfo { my($x) = $_[0]; $x = after_first('\', $x); $x = before_first('\Popular\ tags\ on\ the\ site\<\/h2\>', $x); $x =~ /\Image\:\ (.*)\/; #print "info line found as $1\n"; return $1; } sub imagepage_imagename { my($id) = $_[1]; my($x) = $_[0]; my($fromtitle) = imagepage_namefromtitle($_[0]); my($frominfo) = imagepage_namefrominfo($_[0]); $filecounter += 1; my($c) = sprintf("%04d", $filecounter); if (length($fromtitle) == 0) { if (length($frominfo) == 0) { $x = $id; } else { $x = $frominfo; } } else { $x = $fromtitle; } if (length($frominfo) != 0 and length($fromtitle) != 0 and $frominfo != $fromtitle) { print "title ($fromtitle) differs from info ($frominfo)\n"; } if ($x =~ /\.(rar|zip)$/) { mkdir($x); return "$x/$c-$id"; } else { return "$c-$x"; } } sub saveimage { my($page) = get_to_string("http://bayimg.com/$_[0]"); my($image_name) = imagepage_imagename($page, $_[0]); print "Saving $image_name\n"; get_to_file(imagepage_imageurl($page), $image_name); } sub last_page { my($x) = $_[0]; $x =~ /.*parameters:\ \'page\=([0-9]+)\&.*parameters:\ \'page\=([0-9]+)\&/; print "last page is $1\n"; return $1; } sub fetchpage_bymode { my($x) = $_[0]; print "will fetch page $x in $mode mode\n"; my($pagehtml) = ''; if ($mode =~ /tag/) { $pagehtml = fetchtag_page($tag, $x); } elsif ($mode =~ /album/) { $pagehtml = fetchalbum_page($album, $x); } else { print 'error: mode not set\n'; } return $pagehtml; } sub bayimg_fetchall { my(@image_pages); my($page) = 1; my($pagehtml) = fetchpage_bymode($page); my($lastpage) = last_page($pagehtml); for ($page = 1; $page <= $lastpage; $page++) { print "Page $page\n"; my($pagehtml) = fetchpage_bymode($page); @image_pages = thumbpage_to_imagepages($pagehtml); my $count = @image_pages; print "$count images to grab\n"; foreach(@image_pages) { saveimage($_); } } } bayimg_fetchall();