use LWP::Simple;
use LWP::UserAgent;
use WWW::Mechanize;
use utf8;
use Web::Scraper;
use Encode;
my $url = 'http://list3.auctions.yahoo.co.jp/jp/%E7%84%A1%E5%81%9C%E9%9B%BB%E9%9B%BB%E6%BA%90%E8%A3%85%E7%BD%AE-%E3%83%8D%E3%83%83%E3%83%88%E3%83%AF%E3%83%BC%E3%82%AF-%E5%91%A8%E8%BE%BA%E6%A9%9F%E5%99%A8-%E3%82%B3%E3%83%B3%E3%83%94%E3%83%A5%E3%83%BC%E3%82%BF/2084039702-category-leaf.html?price_type=currentprice&min=&max=100000&abatch=1&istatus=0&mode=2&p=&fo=&sb=&desc=&cat=&auccat=&f=&loc_cd=&min_point=&max_point=&auchours=&aucminutes=&e_wday=&e_time=&s_wday=&s_time=&filtered=1';
my $result = &get_list($url);
@$result = sort{ $b->{price} <=> $a->{price} || $a->{desc} cmp $b->{desc} }(@$result);
foreach my $r (@$result) {
next if($r->{price} > 40000);
my $str = &add_comma( $r->{price} ) . "\t"
. qq(<a href="$r->{uri}">) . $r->{desc} . "" . "\n";
print encode('utf-8' , $str );
}
sub get_list{
my $url = shift;
my $agent = "User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50$
my $mech = WWW::Mechanize->new(agent=>$agent);
my @result;
$mech->get( $url );
push( @result, &scr( $mech->content ) );
my $re = qr/\Q次の25件\E/ ;
while( $mech->find_link('text_regex' => $re ) ){
$mech->follow_link( 'text_regex' => $re );
push( @result, &scr( $mech->content ) );
}
return \@result;
}
sub scr{
my $yahuoku2 = scraper {
process 'td.pad01>a' , uri => '@href' , 'desc' => 'TEXT';
process 'td.pad01>small>a' , 'owner' => sub{
my @list = $_->find("a");
return $list[0]->as_text;
};
process 'td.pad01>b' , 'price[]' => 'TEXT';
result 'uri','desc','owner','price';
};
my $yahuoku1 = scraper {
process 'div#list01>table>tr' , 'auctions[]' => $yahuoku2;
result 'auctions';
};
my $html = shift;
my $result = $yahuoku1->scrape( $html );
for (my $m=@$result ; $m>=0 ; $m--){
splice(@$result,$m,1) unless ( $$result[$m]->{uri} );
}
my @p;
foreach my $r (@$result){
@p = grep(/^ *[0-9,]+ *\Q円\E */ , @{$r->{price}} );
$r->{price} = $p[0];
$r->{price} =~ s/[^0-9]//g;
}
return @$result;
}