Discussion Modèle:Portail Stargate/Suivi

Un article de Wikipédia, l'encyclopédie libre.

[modifier] Code perl pour générer la page de suivi

#!/usr/bin/perl

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

use warnings;
use strict;

use Unicode::String;

use LWP;
use LWP::UserAgent;

use YAML::Syck;
$YAML::Syck::ImplicitTyping = 0;

my $ua = LWP::UserAgent->new;
$ua->agent("libwww-perl-$LWP::VERSION");

# Unicode::String::utf8($line)->1();

# $pagesqueuedbyname->{'title'}
my $pagesqueuedbyname =
  { Unicode::String::latin1("Catégorie:Stargate") => {} };

my $lang = "fr";

my $pagestodobyname = {};

my $pagesdonebynamespace = {};
my $pagesdonebyname      = {};

my $namespaces        = {};
my $CATEGORYNAMESPACE = 14;
my $TEMPLATENAMESPACE = 10;
my $IMAGENAMESPACE    = 6;

my $MAXDEPTH = 10;

sub getnamespaces {
    my $req =
      HTTP::Request->new( GET => 'http://' . $lang
          . '.wikipedia.org/w/query.php?format=yaml&what=namespaces' );

    my $res = $ua->request($req);

    if ( !$res->is_success ) {
        die "getnamespaces(" . $lang . "): " . $res->status_line . "\n";
    }

    my ($listref) = Load( $res->content )->{meta}->{namespaces};

    my $hashref = {};

    my $elem;
    foreach $elem (@$listref) {
        if ( defined $elem->{'id'} ) {
            if ( defined $elem->{'*'} ) {
                $hashref->{ $elem->{id} } = $elem->{'*'};
            }
            else {
                $hashref->{ $elem->{id} } = '';
            }
        }
    }

    return $hashref;
}

sub getcateg {
    my ($title) = @_;

    my $req =
      HTTP::Request->new( GET => 'http://' . $lang
          . '.wikipedia.org/w/query.php?format=yaml&what=category&cplimit=500&cptitle='
          . $title );

    my $res = $ua->request($req);

    if ( !$res->is_success ) {
        die "getcateg(" . $title . "): " . $res->status_line . "\n";
    }

    my @lines = split( "\n", $res->content );
    my $i;
    foreach $i ( 0 .. $#lines - 1 ) {
        $lines[$i] =~ s/^([^:]*: [^:]+): /$1:_/g;    #bug if ": "
    }

    my ($hashref) = Load( join( "\n", @lines ) );

    return $hashref->{'pages'};
}

# MAIN

my $todocount  = 1;
my $totalcount = 1;

$namespaces = getnamespaces($lang);

while ( keys %$pagesqueuedbyname > 0 ) {
    $pagestodobyname   = $pagesqueuedbyname;
    $pagesqueuedbyname = {};

    my $page;
    foreach $page ( keys %{$pagestodobyname} ) {
        $page =~ s/_/ /g;
        my $pageh = $pagestodobyname->{$page};

        if ( !defined $pageh->{'title'} ) {
            $pageh->{'title'} = $page;
        }

        if ( ( defined $pageh->{'ns'} && $pageh->{'ns'} == $CATEGORYNAMESPACE )
            || $page =~ m/^$namespaces->{$CATEGORYNAMESPACE}:/ )
        {
            $pageh->{'ns'} = $CATEGORYNAMESPACE;
        }

        if ( !defined $pageh->{'depth'} ) {
            $pageh->{'depth'} = 0;
        }

        if ( $pageh->{'depth'} > $MAXDEPTH ) {
            warn "Max depth exceeded for $page\n";
        }
        else {
            if (
                (
                    defined $pageh->{'ns'}
                    && $pageh->{'ns'} == $CATEGORYNAMESPACE
                )
                || $page =~ m/^$namespaces->{$CATEGORYNAMESPACE}:/
              )
            {
                my $categs = getcateg($page);

                my $categ;
                foreach $categ (@$categs) {
                    chomp( $categ->{'title'} );
                    $categ->{'title'} =~ s/_/ /g;

                    if ( !defined $categ->{'title'} || $categ->{'title'} eq '' )
                    {
                        warn "Empty title\n";
                        next;
                    }

                    if (   !defined $pagesdonebyname->{ $categ->{'title'} }
                        && !defined $pagesqueuedbyname->{ $categ->{'title'} }
                        && !defined $pagestodobyname->{ $categ->{'title'} } )
                    {
                        $pagesqueuedbyname->{ $categ->{'title'} }->{'title'} =
                          $categ->{'title'};
                        $pagesqueuedbyname->{ $categ->{'title'} }->{'depth'} =
                          $pageh->{'depth'} + 1;

                        if ( defined $categ->{'ns'} ) {
                            $pagesqueuedbyname->{ $categ->{'title'} }->{'ns'} =
                              $categ->{'ns'};
                        }

                        print STDERR ' ' x length("$todocount/$totalcount"),
                          "\r";
                        $todocount++;
                        $totalcount++;
                        print STDERR "$todocount/$totalcount\r";
                    }
                }
            }
        }

        push @{ $pagesdonebynamespace->{ $pageh->{'ns'} } }, $pageh;
        $pagesdonebyname->{$page} = $pageh;

        print STDERR ' ' x length("$todocount/$totalcount"), "\r";
        $todocount--;
        print STDERR "$todocount/$totalcount\r";
    }
}

print STDERR "\n";

my $namespacecount = {};

my $pageh;
if ( $#{ $pagesdonebynamespace->{$IMAGENAMESPACE} } >= 0 ) {
    foreach $pageh ( sort { $a->{'title'} cmp $b->{'title'} }
        @{ $pagesdonebynamespace->{$IMAGENAMESPACE} } )
    {
        my $talk = $pageh->{'title'};
        if ( $talk =~ m/^$namespaces->{$IMAGENAMESPACE}:/ ) {
            $talk =~
s/^$namespaces->{$IMAGENAMESPACE}:/$namespaces->{ ( $IMAGENAMESPACE + 1 ) }:/;
        }
        else {
            $talk =
                $namespaces->{ ( $IMAGENAMESPACE + 1 ) } . ':'
              . $pageh->{'title'};
        }

        print '[['
          . $pageh->{'title'}
          . '|thumb|right|100px|[[:'
          . $pageh->{'title'}
          . ']] <sup><small>([['
          . $talk . '|'
          . $namespaces->{ ( $IMAGENAMESPACE + 1 ) }
          . ']])</small></sup>]]' . "\n";
        delete $pagesdonebyname->{ $pageh->{'title'} };
        $namespacecount->{$IMAGENAMESPACE}++;
    }
}

print "\n== " . $namespaces->{$CATEGORYNAMESPACE} . " ==\n\n";

if ( $#{ $pagesdonebynamespace->{$CATEGORYNAMESPACE} } >= 0 ) {
    foreach $pageh ( sort { $a->{'title'} cmp $b->{'title'} }
        @{ $pagesdonebynamespace->{$CATEGORYNAMESPACE} } )
    {
        my $talk = $pageh->{'title'};
        if ( $talk =~ m/^$namespaces->{$CATEGORYNAMESPACE}:/ ) {
            $talk =~
s/^$namespaces->{$CATEGORYNAMESPACE}:/$namespaces->{ ( $CATEGORYNAMESPACE + 1 ) }:/;
        }
        else {
            $talk =
                $namespaces->{ ( $CATEGORYNAMESPACE + 1 ) } . ':'
              . $pageh->{'title'};
        }

        print '* [[:'
          . $pageh->{'title'}
          . ']] <sup><small>([['
          . $talk . '|'
          . $namespaces->{ ( $CATEGORYNAMESPACE + 1 ) }
          . ']])</small></sup>' . "\n";
        delete $pagesdonebyname->{ $pageh->{'title'} };
        $namespacecount->{$CATEGORYNAMESPACE}++;
    }
}

my $namespace;
foreach $namespace ( sort keys %{$pagesdonebynamespace} ) {
    if (   $namespace eq $IMAGENAMESPACE
        || $namespace eq $CATEGORYNAMESPACE )
    {
        next;
    }

    if ( $#{ $pagesdonebynamespace->{$namespace} } < 0 ) {
        next;
    }

    print "\n== "
      . (
        ( $namespaces->{$namespace} ne '' )
        ? $namespaces->{$namespace}
        : 'Article'
      )
      . " ==\n\n";

    foreach $pageh ( sort { $a->{'title'} cmp $b->{'title'} }
        @{ $pagesdonebynamespace->{$namespace} } )
    {
        print '* [[' . $pageh->{'title'} . ']]';
        if (
            index( lc( $namespaces->{ ( $namespace + 1 ) } ),
                lc( $namespaces->{$namespace} ) ) >= 0
          )
        {
            my $talk = $pageh->{'title'};
            if ( $talk =~ m/^$namespaces->{$namespace}:/ ) {
                $talk =~
s/^$namespaces->{$namespace}:/$namespaces->{ ( $namespace + 1 ) }:/;
            }
            else {
                $talk =
                  $namespaces->{ ( $namespace + 1 ) } . ':' . $pageh->{'title'};
            }

            print ' <sup><small>([[' . $talk . '|'
              . $namespaces->{ ( $namespace + 1 ) }
              . ']])</small></sup>';
        }
        print "\n";
        delete $pagesdonebyname->{ $pageh->{'title'} };
        $namespacecount->{$namespace}++;
    }
}

print "\n";
print '{{Clr}}';
print "\n";
print "\n== Summary ==\n\n";

foreach $namespace ( sort keys %{$namespacecount} ) {
    print '* '
      . (
        ( $namespaces->{$namespace} ne '' )
        ? $namespaces->{$namespace}
        : 'Article'
      )
      . ': '
      . $namespacecount->{$namespace} . "\n";
}