Aus Wiktionary, dem freien Wörterbuch
#!/usr/bin/perl
open VARIN, "lokalizo.txt";
while (<VARIN>) {
if ($_ =~ /\$input-artikoloj_lasta \= \"([^\"]+?)\";/) {$InputFileLatest = $1;}
if ($_ =~ /\$input-artikoloj_antaŭa \= \"([^\"]+?)\";/) {$InputFileFormer = $1;}
if ($_ =~ /\$catPrefix \= \"([^\"]+?)\";/) {$catPrefix = $1;}
if ($_ =~ /\$langTemp \= \"([^\"]+?)\";/) {$langTemp = $1;}
if ($_ =~ /\$langExcept \= \"([^\"]+?)\";/) {$langExcept = $1;}
if ($_ =~ /\$thousandsSeparator \= \"([^\"]+?)\";/) {$thSep = $1;}
if ($_ =~ /\$text-new-date \= \"([^\"]+?)\";/) {$textNewDate = $1;}
if ($_ =~ /\$text-old-date \= \"([^\"]+?)\";/) {$textOldDate = $1;}
if ($_ =~ /\$text-lang \= \"([^\"]+?)\";/) {$textLang = $1;}
if ($_ =~ /\$text-terms \= \"([^\"]+?)\";/) {$textTerms = $1;}
if ($_ =~ /\$text-diff \= \"([^\"]+?)\";/) {$textDiff = $1;}
if ($_ =~ /\$text-total \= \"([^\"]+?)\";/) {$textTotal = $1;}
if ($_ =~ /\$text-color \= \"([^\"]+?)\";/) {$textColor = $1;}
if ($_ =~ /\$text-note \= \"([^\"]+?)\";/) {$textNote = $1;}
if ($_ =~ /\$text-dumps \= \"([^\"]+?)\";/) {$textDumps = $1;}
if ($_ =~ /\$text-and \= \"([^\"]+?)\";/) {$textAnd = $1;}
}
close VARIN;
print "\n\nLingvo = Esperanto (la lingvo internacia!)\n";
print "******************************************\n";
print " Utiligi la lasta dumpaĵo: $InputFileLatest\n";
open INL, "$InputFileLatest";
$isArticle = 0;
$textArea = 0;
$htmlComment = 0;
$noWikiCode = 0;
$preText = 0;
while (<INL>) {
if ($_ =~ /<title>([^<]+)<\/title>/) {
if ($1 !~ /:[A-ZÄÖÜa-zäèéöü]/) {
$isArticle = 1;
}
}
if ($_ =~ /<text xml:space="preserve">/) {
$textArea = 1;
} elsif ($_ =~ /<\/text>/) {
$htmlComment = 0;
$noWikiCode = 0;
$preText = 0;
$textArea = 0;
$isArticle = 0;
}
if ($textArea eq 1) {
# Skips multi-line HTML comments; leading comments (<!-- {{L|x}} -->== {{L|y}} ==) may not exist, assumingly
if ($_ =~ /<!--/) {$htmlComment = 1;}
if ($_ =~ /-->/) {$htmlComment = 0;}
# Non-pure article dump workaround
$noWikiCode = 0;
if ($_ =~ /< *nowiki *>[^&]+?\{ *Sprache *\|[^\}]+?\}[^&]+?< *\/ *nowiki *>/) {
$noWikiCode = 1;
}
# Non-pure article dump workaround
if ($_ =~ /< *pre *>/) {$preText = 1;}
if ($_ =~ /< *\/ *pre *>/) {$preText = 0;}
}
$except = 0;
if ($isArticle eq 1 && $textArea eq 1 && $htmlComment eq 0 && $noWikiCode eq 0 && $preText eq 0) {
if ($_ =~ /$langExcept/) {$except = $1;}
# Looks for only one per line (good idea?)
if ($_ =~ /$langTemp/ && $except eq 0) {
$langnb_latest{$1}++;
$termnb_latest++;
}
}
}
$i = 0;
print " Utiligi la antaŭa dumpaĵo: $InputFileFormer\n";
open INF, "$InputFileFormer";
$isArticle = 0;
$textArea = 0;
$htmlComment = 0;
$noWikiCode = 0;
$preText = 0;
while (<INF>) {
if ($_ =~ /<title>([^<]+)<\/title>/) {
if ($1 !~ /:[A-ZÄÖÜa-zäèéöü]/) {
$isArticle = 1;
}
}
if ($_ =~ /<text xml:space="preserve">/) {
$textArea = 1;
} elsif ($_ =~ /<\/text>/) {
$htmlComment = 0;
$noWikiCode = 0;
$preText = 0;
$textArea = 0;
$isArticle = 0;
}
if ($textArea eq 1) {
# Skips multi-line HTML comments; leading comments (<!-- {{L|x}} -->== {{L|y}} ==) may not exist, assumingly
if ($_ =~ /<!--/) {$htmlComment = 1;}
if ($_ =~ /-->/) {$htmlComment = 0;}
# Non-pure article dump workaround
$noWikiCode = 0;
if ($_ =~ /< *nowiki *>[^&]+?\{ *Sprache *\|[^\}]+?\}[^&]+?< *\/ *nowiki *>/) {
$noWikiCode = 1;
}
# Non-pure article dump workaround
if ($_ =~ /< *pre *>/) {$preText = 1;}
if ($_ =~ /< *\/ *pre *>/) {$preText = 0;}
}
$except = 0;
if ($isArticle eq 1 && $textArea eq 1 && $htmlComment eq 0 && $noWikiCode eq 0 && $preText eq 0) {
if ($_ =~ /$langExcept/) {$except = $1;}
# Looks for only one per line (good idea?)
if ($_ =~ /$langTemp/ && $except eq 0) {
$langnb_former{$1}++;
$termnb_former++;
}
}
}
$j = 0;
$termnb_diff = $termnb_latest - $termnb_former;
if ($termnb_diff > 0) {$plusSign = "+";} else {$plusSign = "";}
$termnb_latest =~ s/([0-9])([0-9]{3,3})$/$1$thSep$2/;
$termnb_former =~ s/([0-9])([0-9]{3,3})$/$1$thSep$2/;
$termnb_diff =~ s/([0-9])([0-9]{3,3})$/$1$thSep$2/;
open OUT, ">Statistiko-pri-lingvoj.txt";
print OUT "{| align=\"center\" cellpadding=\"3\" rules=\"all\" style=\"border: 1px solid #AAAAAA; border-collapse: collapse; text-align: right;\"\n";
print OUT "|-\n";
print OUT "! $textLang\n";
print OUT "! style=\"text-align: center;\" | $textTerms <sup>[[#noto|(!)]]</sup><br />($textNewDate)\n";
print OUT "! style=\"text-align: center;\" | $textTerms <sup>[[#noto|(!)]]</sup><br />($textOldDate)\n";
print OUT "! $textDiff\n";
print OUT "|-\n";
print OUT "| \'\'$textTotal\'\' || \'\'$termnb_latest\'\' || \'\'$termnb_former\'\' || \'\'$plusSign$termnb_diff\'\'\n";
foreach $lang_latest (sort {$langnb_latest{$b} <=> $langnb_latest{$a}} (keys %langnb_latest)) {
$i = $i + 1;
$langnb_diff{$lang_latest} = $langnb_latest{$lang_latest} - $langnb_former{$lang_latest};
$langnb_latest{$lang_latest} =~ s/([0-9])([0-9]{3,3})$/$1$thSep$2/;
$langnb_former{$lang_latest} =~ s/([0-9])([0-9]{3,3})$/$1$thSep$2/;
$langnb_diff{$lang_latest} =~ s/([0-9])([0-9]{3,3})$/$1$thSep$2/;
if ($langnb_diff{$lang_latest} > 0) {$plusSign{$lang_latest} = "+";} else {$plusSign{$lang_latest} = "";}
if ($i % 2) {$color = " style=\"background-color: $textColor;\"";} else {$color = "";}
print OUT "|-$color\n";
print OUT "| [[:$catPrefix:$lang_latest|$lang_latest]] || $langnb_latest{$lang_latest} || $langnb_former{$lang_latest} || $plusSign{$lang_latest}$langnb_diff{$lang_latest}\n";
}
print OUT "|}\n\n";
close INL;
close INF;
print OUT "<div id=\"noto\" name=\"noto\" style=\"margin-top: 1em;\">$textNote</div>\n\n";
print OUT "'''$textDumps:''' \'\'$InputFileLatest\'\' $textAnd \'\'$InputFileFormer\'\'";
close OUT;
print "******************************************\n";
print "Lingvo = Esperanto (la lingvo internacia!)\n\n\n";