#!/usr/bin/perl # baygrab.pl v0.3 # Josh Jackson package Baygrab; =head1 NAME baygrab.pl - A script for downloading images form bayimg.com =head1 SYNOPSIS Baygrab is a script to download all images for a given tag or album from bayimg.com It will identify images from the same archive and collect them into a subfolder. =head1 LICENSE Copyright (c) 2007 Josh Jackson Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. =head1 CHANGELOG 2007.07.21 - 0.3 code clean up add usage information download by url fix downloading tags and albums with only 1 page of results add initial POD info 2007.06.26 - 0.2 download either tags or albums add cookie to allow downloading 'offensive' images 2007.06.22 - 0.1 downloading based on tags only sort into album subdirectories =cut use strict; use LWP::UserAgent; use HTTP::Cookies; my($mode) = 'tag'; my($album, $tag, $filecounter); sub usage { print "usage: \n"; print " $0 -album ALBUM\n"; print " $0 -url URL_TO_ALBUM_OR_TAG\n"; print " $0 [-tag] TAG\n"; exit; } sub init_agent { my($ua) = new LWP::UserAgent; $ua->agent("Mozilla/4.0 (compatible; IAmTheWalrus 5.5; OpenBDSM)"); return $ua; } sub request_cookies { my $request = shift; my $cookie_jar = HTTP::Cookies->new; my @rest = ("/", "bayimg.com", undef, 0, 0, 60*60, 0); $cookie_jar->set_cookie(undef, 'show_offensive', 'true', @rest); $cookie_jar->add_cookie_header($request); return $request; } sub get_to_file { my $url = shift; my $file_name = shift; my $ua = init_agent(); my $request = request_cookies(HTTP::Request->new(GET => $url)); my $res = $ua->request($request); open(FH, ">$file_name"); print FH $res->content;; close(FH); } sub get_to_string { my $url = shift; my $ua = init_agent(); my $request = request_cookies(HTTP::Request->new(GET => $url)); my $res = $ua->request($request); return $res->as_string; } sub post_to_string { my $posturl = shift; my $postvars = shift; my $ua = init_agent(); my $req = request_cookies(HTTP::Request->new(POST => $posturl)); $req->content_type('application/x-www-form-urlencoded'); $req->content($postvars); my $res = $ua->request($req); return $res->as_string; } sub fetchtag_page { my $tag = shift; my $pagenum = shift; if ($pagenum == '') { $pagenum = 1; } return post_to_string('http://bayimg.com/ajax_tags.php', "page=$pagenum&tag=$tag"); } sub fetchalbum_page { my $album = shift; my $pagenum = shift; if ($pagenum == '') { $pagenum = 1; } return post_to_string('http://bayimg.com/ajax_album.php', "page=$pagenum&id=$album"); } sub thumbpage_to_imagepages { my $x = shift; my @url_list = (); my @paths = ($x =~ /href\=\"\/.........\"/g); foreach(@paths) { push(@url_list, substr($_, 7, 9)); } return @url_list; } sub before_first { my @parts = split(/$_[0]/, $_[1]); return $parts[0]; } sub after_first { my @parts = split(/$_[0]/, $_[1]); return $parts[1]; } sub imagepage_imageurl { my $x = shift; $x =~ /src\=\"(.*)\"\ id\=\"mainImage\"/; return $1; } sub imagepage_namefromtitle { my $x = shift; $x =~ /\bayimg\ -\ image\:\ (.*)\ -\ free\ uncensored\ image\ hosting\<\/title\>/; return $1; } sub imagepage_namefrominfo { my($x) = $_[0]; $x = after_first('\', $x); $x = before_first('\Popular\ tags\ on\ the\ site\<\/h2\>', $x); $x =~ /\Image\:\ (.*)\/; #print "info line found as $1\n"; return $1; } sub imagepage_imagename { my $x = shift; my $id = shift; my $fromtitle = imagepage_namefromtitle($x); my $frominfo = imagepage_namefrominfo($x); $filecounter += 1; my $c = sprintf("%04d", $filecounter); if (length($fromtitle) == 0) { $x = (length($frominfo) == 0) ? $id : $frominfo; } else { $x = $fromtitle; } if (length($frominfo) != 0 and length($fromtitle) != 0 and $frominfo != $fromtitle) { print "title ($fromtitle) differs from info ($frominfo)\n"; } if ($x =~ /\.(rar|zip)$/) { mkdir($x); return "$x/$c-$id"; } else { return "$c-$x"; } } sub saveimage { my $page = get_to_string("http://bayimg.com/$_[0]"); my $image_name = imagepage_imagename($page, $_[0]); print "Saving $image_name\n"; get_to_file(imagepage_imageurl($page), $image_name); } sub last_page { my $x = shift; $x =~ /.*parameters:\ \'page\=([0-9]+)\&.*parameters:\ \'page\=([0-9]+)\&/; print "last page is $1\n"; return ($1 > 1) ? $1 : 1; } sub fetchpage_bymode { my $x = shift; print "will fetch page $x in $mode mode\n"; my($pagehtml) = ''; if ($mode =~ /tag/) { $pagehtml = fetchtag_page($tag, $x); } elsif ($mode =~ /album/) { $pagehtml = fetchalbum_page($album, $x); } else { print 'error: mode not set\n'; } return $pagehtml; } sub bayimg_fetchall { my @image_pages; my $page = 1; my $pagehtml = fetchpage_bymode($page); my $lastpage = last_page($pagehtml); for ($page = 1; $page <= $lastpage; $page++) { print "Page $page\n"; my($pagehtml) = fetchpage_bymode($page); @image_pages = thumbpage_to_imagepages($pagehtml); my $count = @image_pages; print "$count images to grab\n"; foreach(@image_pages) { saveimage($_); } } } if ($#ARGV == -1 or $ARGV[0] =~ /-help/) { usage; } elsif ($ARGV[0] =~ /-album/) { $mode = 'album'; $album = $ARGV[1]; } elsif ($ARGV[0] =~ /-tag/) { $tag = $ARGV[1]; } elsif ($ARGV[0] =~ /-url/) { if ($ARGV[1] =~ /bayimg.com\/album\/(.*)/) { $mode = 'album'; $album = $1; } elsif ($ARGV[1] =~ /bayimg.com\/tag\/(.*)/) { $tag = $1; } else { usage; } } else { $tag = $ARGV[0]; } if ($mode =~ 'album') { print "grabbing album $album\n"; } else { print "grabbing tag $tag\n"; } bayimg_fetchall();