Wiktionary:Statistik/Statistiko-pri-lingvoj.pl

Aus Wiktionary, dem freien Wörterbuch
#!/usr/bin/perl

open VARIN, "lokalizo.txt";
while (<VARIN>) {
 if ($_ =~ /\$input-artikoloj_lasta \= \"([^\"]+?)\";/) {$InputFileLatest = $1;}
 if ($_ =~ /\$input-artikoloj_antaŭa \= \"([^\"]+?)\";/) {$InputFileFormer = $1;}
 if ($_ =~ /\$catPrefix \= \"([^\"]+?)\";/) {$catPrefix = $1;}
 if ($_ =~ /\$langTemp \= \"([^\"]+?)\";/) {$langTemp = $1;}
 if ($_ =~ /\$langExcept \= \"([^\"]+?)\";/) {$langExcept = $1;}
 if ($_ =~ /\$thousandsSeparator \= \"([^\"]+?)\";/) {$thSep = $1;}
 if ($_ =~ /\$text-new-date \= \"([^\"]+?)\";/) {$textNewDate = $1;}
 if ($_ =~ /\$text-old-date \= \"([^\"]+?)\";/) {$textOldDate = $1;}
 if ($_ =~ /\$text-lang \= \"([^\"]+?)\";/) {$textLang = $1;}
 if ($_ =~ /\$text-terms \= \"([^\"]+?)\";/) {$textTerms = $1;}
 if ($_ =~ /\$text-diff \= \"([^\"]+?)\";/) {$textDiff = $1;}
 if ($_ =~ /\$text-total \= \"([^\"]+?)\";/) {$textTotal = $1;}
 if ($_ =~ /\$text-color \= \"([^\"]+?)\";/) {$textColor = $1;}
 if ($_ =~ /\$text-note \= \"([^\"]+?)\";/) {$textNote = $1;}
 if ($_ =~ /\$text-dumps \= \"([^\"]+?)\";/) {$textDumps = $1;}
 if ($_ =~ /\$text-and \= \"([^\"]+?)\";/) {$textAnd = $1;}
}
close VARIN;

print "\n\nLingvo = Esperanto (la lingvo internacia!)\n";
print "******************************************\n";

print "  Utiligi la lasta dumpaĵo: $InputFileLatest\n";
open INL, "$InputFileLatest";
$isArticle = 0;
$textArea = 0;
$htmlComment = 0;
$noWikiCode = 0;
$preText = 0;
while (<INL>) {
 if ($_ =~ /<title>([^<]+)<\/title>/) {
  if ($1 !~ /:[A-ZÄÖÜa-zäèéöü]/) {
   $isArticle = 1;
  }
 }
 if ($_ =~ /<text xml:space="preserve">/) {
  $textArea = 1;
 } elsif ($_ =~ /<\/text>/) {
  $htmlComment = 0;
  $noWikiCode = 0;
  $preText = 0;
  $textArea = 0;
  $isArticle = 0;
 }
 if ($textArea eq 1) {
  # Skips multi-line HTML comments; leading comments (<!-- {{L|x}} -->== {{L|y}} ==) may not exist, assumingly
  if ($_ =~ /&lt;!--/) {$htmlComment = 1;}
  if ($_ =~ /--&gt;/) {$htmlComment = 0;}
  # Non-pure article dump workaround
  $noWikiCode = 0;
  if ($_ =~ /&lt; *nowiki *&gt;[^&]+?\{ *Sprache *\|[^\}]+?\}[^&]+?&lt; *\/ *nowiki *&gt;/) {
   $noWikiCode = 1;
  }
  # Non-pure article dump workaround
  if ($_ =~ /&lt; *pre *&gt;/) {$preText = 1;}
  if ($_ =~ /&lt; *\/ *pre *&gt;/) {$preText = 0;}
 }
 $except = 0;
 if ($isArticle eq 1 && $textArea eq 1 && $htmlComment eq 0 && $noWikiCode eq 0 && $preText eq 0) {
  if ($_ =~ /$langExcept/) {$except = $1;}
  # Looks for only one per line (good idea?)
  if ($_ =~ /$langTemp/ && $except eq 0) {
   $langnb_latest{$1}++;
   $termnb_latest++;
  }
 }
}
$i = 0;

print "  Utiligi la antaŭa dumpaĵo: $InputFileFormer\n";
open INF, "$InputFileFormer";
$isArticle = 0;
$textArea = 0;
$htmlComment = 0;
$noWikiCode = 0;
$preText = 0;
while (<INF>) {
 if ($_ =~ /<title>([^<]+)<\/title>/) {
  if ($1 !~ /:[A-ZÄÖÜa-zäèéöü]/) {
   $isArticle = 1;
  }
 }
 if ($_ =~ /<text xml:space="preserve">/) {
  $textArea = 1;
 } elsif ($_ =~ /<\/text>/) {
  $htmlComment = 0;
  $noWikiCode = 0;
  $preText = 0;
  $textArea = 0;
  $isArticle = 0;
 }
 if ($textArea eq 1) {
  # Skips multi-line HTML comments; leading comments (<!-- {{L|x}} -->== {{L|y}} ==) may not exist, assumingly
  if ($_ =~ /&lt;!--/) {$htmlComment = 1;}
  if ($_ =~ /--&gt;/) {$htmlComment = 0;}
  # Non-pure article dump workaround
  $noWikiCode = 0;
  if ($_ =~ /&lt; *nowiki *&gt;[^&]+?\{ *Sprache *\|[^\}]+?\}[^&]+?&lt; *\/ *nowiki *&gt;/) {
   $noWikiCode = 1;
  }
  # Non-pure article dump workaround
  if ($_ =~ /&lt; *pre *&gt;/) {$preText = 1;}
  if ($_ =~ /&lt; *\/ *pre *&gt;/) {$preText = 0;}
 }
 $except = 0;
 if ($isArticle eq 1 && $textArea eq 1 && $htmlComment eq 0 && $noWikiCode eq 0 && $preText eq 0) {
  if ($_ =~ /$langExcept/) {$except = $1;}
  # Looks for only one per line (good idea?)
  if ($_ =~ /$langTemp/ && $except eq 0) {
   $langnb_former{$1}++;
   $termnb_former++;
  }
 }
}
$j = 0;

$termnb_diff = $termnb_latest - $termnb_former;
if ($termnb_diff > 0) {$plusSign = "+";} else {$plusSign = "";}
$termnb_latest =~ s/([0-9])([0-9]{3,3})$/$1$thSep$2/;
$termnb_former =~ s/([0-9])([0-9]{3,3})$/$1$thSep$2/;
$termnb_diff =~ s/([0-9])([0-9]{3,3})$/$1$thSep$2/;

open OUT, ">Statistiko-pri-lingvoj.txt";
print OUT "{| align=\"center\" cellpadding=\"3\" rules=\"all\" style=\"border: 1px solid #AAAAAA; border-collapse: collapse; text-align: right;\"\n";
print OUT "|-\n";
print OUT "! $textLang\n";
print OUT "! style=\"text-align: center;\" | $textTerms&nbsp;<sup>[[#noto|(!)]]</sup><br />($textNewDate)\n";
print OUT "! style=\"text-align: center;\" | $textTerms&nbsp;<sup>[[#noto|(!)]]</sup><br />($textOldDate)\n";
print OUT "! $textDiff\n";
print OUT "|-\n";
print OUT "| \'\'$textTotal\'\' || \'\'$termnb_latest\'\' || \'\'$termnb_former\'\' || \'\'$plusSign$termnb_diff\'\'\n";
foreach $lang_latest (sort {$langnb_latest{$b} <=> $langnb_latest{$a}} (keys %langnb_latest)) {
 $i = $i + 1;
 $langnb_diff{$lang_latest} = $langnb_latest{$lang_latest} - $langnb_former{$lang_latest};
 $langnb_latest{$lang_latest} =~ s/([0-9])([0-9]{3,3})$/$1$thSep$2/;
 $langnb_former{$lang_latest} =~ s/([0-9])([0-9]{3,3})$/$1$thSep$2/;
 $langnb_diff{$lang_latest} =~ s/([0-9])([0-9]{3,3})$/$1$thSep$2/;
 if ($langnb_diff{$lang_latest} > 0) {$plusSign{$lang_latest} = "+";} else {$plusSign{$lang_latest} = "";}
 if ($i % 2) {$color = " style=\"background-color: $textColor;\"";} else {$color = "";}
 print OUT "|-$color\n";
 print OUT "| [[:$catPrefix:$lang_latest|$lang_latest]] || $langnb_latest{$lang_latest} || $langnb_former{$lang_latest} || $plusSign{$lang_latest}$langnb_diff{$lang_latest}\n";
}
print OUT "|}\n\n";

close INL;
close INF;

print OUT "<div id=\"noto\" name=\"noto\" style=\"margin-top: 1em;\">$textNote</div>\n\n";
print OUT "'''$textDumps:''' \'\'$InputFileLatest\'\' $textAnd \'\'$InputFileFormer\'\'";
close OUT;

print "******************************************\n";
print "Lingvo = Esperanto (la lingvo internacia!)\n\n\n";