Un article de Wikipédia, l'encyclopédie libre.
[modifier] Code perl pour générer la page de suivi
#!/usr/bin/perl
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
use warnings;
use strict;
use Unicode::String;
use LWP;
use LWP::UserAgent;
use YAML::Syck;
$YAML::Syck::ImplicitTyping = 0;
my $ua = LWP::UserAgent->new;
$ua->agent("libwww-perl-$LWP::VERSION");
# Unicode::String::utf8($line)->1();
# $pagesqueuedbyname->{'title'}
my $pagesqueuedbyname =
{ Unicode::String::latin1("Catégorie:Stargate") => {} };
my $lang = "fr";
my $pagestodobyname = {};
my $pagesdonebynamespace = {};
my $pagesdonebyname = {};
my $namespaces = {};
my $CATEGORYNAMESPACE = 14;
my $TEMPLATENAMESPACE = 10;
my $IMAGENAMESPACE = 6;
my $MAXDEPTH = 10;
sub getnamespaces {
my $req =
HTTP::Request->new( GET => 'http://' . $lang
. '.wikipedia.org/w/query.php?format=yaml&what=namespaces' );
my $res = $ua->request($req);
if ( !$res->is_success ) {
die "getnamespaces(" . $lang . "): " . $res->status_line . "\n";
}
my ($listref) = Load( $res->content )->{meta}->{namespaces};
my $hashref = {};
my $elem;
foreach $elem (@$listref) {
if ( defined $elem->{'id'} ) {
if ( defined $elem->{'*'} ) {
$hashref->{ $elem->{id} } = $elem->{'*'};
}
else {
$hashref->{ $elem->{id} } = '';
}
}
}
return $hashref;
}
sub getcateg {
my ($title) = @_;
my $req =
HTTP::Request->new( GET => 'http://' . $lang
. '.wikipedia.org/w/query.php?format=yaml&what=category&cplimit=500&cptitle='
. $title );
my $res = $ua->request($req);
if ( !$res->is_success ) {
die "getcateg(" . $title . "): " . $res->status_line . "\n";
}
my @lines = split( "\n", $res->content );
my $i;
foreach $i ( 0 .. $#lines - 1 ) {
$lines[$i] =~ s/^([^:]*: [^:]+): /$1:_/g; #bug if ": "
}
my ($hashref) = Load( join( "\n", @lines ) );
return $hashref->{'pages'};
}
# MAIN
my $todocount = 1;
my $totalcount = 1;
$namespaces = getnamespaces($lang);
while ( keys %$pagesqueuedbyname > 0 ) {
$pagestodobyname = $pagesqueuedbyname;
$pagesqueuedbyname = {};
my $page;
foreach $page ( keys %{$pagestodobyname} ) {
$page =~ s/_/ /g;
my $pageh = $pagestodobyname->{$page};
if ( !defined $pageh->{'title'} ) {
$pageh->{'title'} = $page;
}
if ( ( defined $pageh->{'ns'} && $pageh->{'ns'} == $CATEGORYNAMESPACE )
|| $page =~ m/^$namespaces->{$CATEGORYNAMESPACE}:/ )
{
$pageh->{'ns'} = $CATEGORYNAMESPACE;
}
if ( !defined $pageh->{'depth'} ) {
$pageh->{'depth'} = 0;
}
if ( $pageh->{'depth'} > $MAXDEPTH ) {
warn "Max depth exceeded for $page\n";
}
else {
if (
(
defined $pageh->{'ns'}
&& $pageh->{'ns'} == $CATEGORYNAMESPACE
)
|| $page =~ m/^$namespaces->{$CATEGORYNAMESPACE}:/
)
{
my $categs = getcateg($page);
my $categ;
foreach $categ (@$categs) {
chomp( $categ->{'title'} );
$categ->{'title'} =~ s/_/ /g;
if ( !defined $categ->{'title'} || $categ->{'title'} eq '' )
{
warn "Empty title\n";
next;
}
if ( !defined $pagesdonebyname->{ $categ->{'title'} }
&& !defined $pagesqueuedbyname->{ $categ->{'title'} }
&& !defined $pagestodobyname->{ $categ->{'title'} } )
{
$pagesqueuedbyname->{ $categ->{'title'} }->{'title'} =
$categ->{'title'};
$pagesqueuedbyname->{ $categ->{'title'} }->{'depth'} =
$pageh->{'depth'} + 1;
if ( defined $categ->{'ns'} ) {
$pagesqueuedbyname->{ $categ->{'title'} }->{'ns'} =
$categ->{'ns'};
}
print STDERR ' ' x length("$todocount/$totalcount"),
"\r";
$todocount++;
$totalcount++;
print STDERR "$todocount/$totalcount\r";
}
}
}
}
push @{ $pagesdonebynamespace->{ $pageh->{'ns'} } }, $pageh;
$pagesdonebyname->{$page} = $pageh;
print STDERR ' ' x length("$todocount/$totalcount"), "\r";
$todocount--;
print STDERR "$todocount/$totalcount\r";
}
}
print STDERR "\n";
my $namespacecount = {};
my $pageh;
if ( $#{ $pagesdonebynamespace->{$IMAGENAMESPACE} } >= 0 ) {
foreach $pageh ( sort { $a->{'title'} cmp $b->{'title'} }
@{ $pagesdonebynamespace->{$IMAGENAMESPACE} } )
{
my $talk = $pageh->{'title'};
if ( $talk =~ m/^$namespaces->{$IMAGENAMESPACE}:/ ) {
$talk =~
s/^$namespaces->{$IMAGENAMESPACE}:/$namespaces->{ ( $IMAGENAMESPACE + 1 ) }:/;
}
else {
$talk =
$namespaces->{ ( $IMAGENAMESPACE + 1 ) } . ':'
. $pageh->{'title'};
}
print '[['
. $pageh->{'title'}
. '|thumb|right|100px|[[:'
. $pageh->{'title'}
. ']] <sup><small>([['
. $talk . '|'
. $namespaces->{ ( $IMAGENAMESPACE + 1 ) }
. ']])</small></sup>]]' . "\n";
delete $pagesdonebyname->{ $pageh->{'title'} };
$namespacecount->{$IMAGENAMESPACE}++;
}
}
print "\n== " . $namespaces->{$CATEGORYNAMESPACE} . " ==\n\n";
if ( $#{ $pagesdonebynamespace->{$CATEGORYNAMESPACE} } >= 0 ) {
foreach $pageh ( sort { $a->{'title'} cmp $b->{'title'} }
@{ $pagesdonebynamespace->{$CATEGORYNAMESPACE} } )
{
my $talk = $pageh->{'title'};
if ( $talk =~ m/^$namespaces->{$CATEGORYNAMESPACE}:/ ) {
$talk =~
s/^$namespaces->{$CATEGORYNAMESPACE}:/$namespaces->{ ( $CATEGORYNAMESPACE + 1 ) }:/;
}
else {
$talk =
$namespaces->{ ( $CATEGORYNAMESPACE + 1 ) } . ':'
. $pageh->{'title'};
}
print '* [[:'
. $pageh->{'title'}
. ']] <sup><small>([['
. $talk . '|'
. $namespaces->{ ( $CATEGORYNAMESPACE + 1 ) }
. ']])</small></sup>' . "\n";
delete $pagesdonebyname->{ $pageh->{'title'} };
$namespacecount->{$CATEGORYNAMESPACE}++;
}
}
my $namespace;
foreach $namespace ( sort keys %{$pagesdonebynamespace} ) {
if ( $namespace eq $IMAGENAMESPACE
|| $namespace eq $CATEGORYNAMESPACE )
{
next;
}
if ( $#{ $pagesdonebynamespace->{$namespace} } < 0 ) {
next;
}
print "\n== "
. (
( $namespaces->{$namespace} ne '' )
? $namespaces->{$namespace}
: 'Article'
)
. " ==\n\n";
foreach $pageh ( sort { $a->{'title'} cmp $b->{'title'} }
@{ $pagesdonebynamespace->{$namespace} } )
{
print '* [[' . $pageh->{'title'} . ']]';
if (
index( lc( $namespaces->{ ( $namespace + 1 ) } ),
lc( $namespaces->{$namespace} ) ) >= 0
)
{
my $talk = $pageh->{'title'};
if ( $talk =~ m/^$namespaces->{$namespace}:/ ) {
$talk =~
s/^$namespaces->{$namespace}:/$namespaces->{ ( $namespace + 1 ) }:/;
}
else {
$talk =
$namespaces->{ ( $namespace + 1 ) } . ':' . $pageh->{'title'};
}
print ' <sup><small>([[' . $talk . '|'
. $namespaces->{ ( $namespace + 1 ) }
. ']])</small></sup>';
}
print "\n";
delete $pagesdonebyname->{ $pageh->{'title'} };
$namespacecount->{$namespace}++;
}
}
print "\n";
print '{{Clr}}';
print "\n";
print "\n== Summary ==\n\n";
foreach $namespace ( sort keys %{$namespacecount} ) {
print '* '
. (
( $namespaces->{$namespace} ne '' )
? $namespaces->{$namespace}
: 'Article'
)
. ': '
. $namespacecount->{$namespace} . "\n";
}