#!/usr/bin/perl -w
# This script is maintained by Rüdiger Beck
# It is Free Software (License GPLv3)
# If you find errors, contact the author
# jeffbeck@web.de  or  jeffbeck@linuxmuster.net

use strict;
use Text::Iconv;

############################################################
# README
############################################################
#
# 1. Adding a name to the correct list:
#     A) Open $file_in_first in emacs (replace):
#         emacs <$file_in_first> 
#     B) Find that name on an unicoded website 
#        (i.e. wikipedia -> Edit -> Mark to copy  )
#     C) Insert the name into emacs/file
#     D) Run iconv-all-names to create all other target 
#        encoding files:
#         firstnames.<enc>.txt
#         firstname_errors.<enc>.txt
#
# 2. Adding a name to errors
#     A) Lookup the wrong character in a coding Table
#        i.e. in iso8859-1 (Example: A8)
#     B) Write the character in a temporary file and 
#        append suggestion after a colon.
#        Example: Rene with wrong Accent A8:
#        echo -e "Ren\xE8: Wrong accent on letter e ?" > file
#     C) Check the result with emacs 
#        (Wrong letter should display nicely):
#        emacs file
#     D) convert this file to a unicode file uni:
#        iconv --verbose -f 8859_1 -t utf-8 file -o uni
#     E) Add/Append the contents of the file to 
#        firstname_errors.UTF8.txt with emacs or:
#        cat uni >> firstname_errors.UTF8.txt
#     D) Run iconv-all-names to create all other target 
#        encoding files:
#         firstnames.<enc>.txt
#         firstname_errors.<enc>.txt
#

############################################################
# Config
############################################################


# the utf8 master files
my $file_in_first="./firstnames.UTF8.txt";
my $file_error_in_first="./firstname_errors.UTF8.txt";
my $file_in_last="./lastnames.UTF8.txt";
my $file_error_in_last="./lastname_errors.UTF8.txt";

# target encodings
my @target_encodings=(
                      "ISO_8859-1",
                      "WINDOWS-1252"
                     );



############################################################
# Program
############################################################

# correct firstnames
foreach my $encoding (@target_encodings){
    # converting the UTF8 file to targets
    my $target_name="./firstnames.".$encoding.".txt";
    &convert($file_in_first,$target_name,$encoding);
}

# error firstnames
foreach my $encoding (@target_encodings){
    # converting the UTF8 error file to targets
    my $target_name="./firstname_errors.".$encoding.".txt";
    &convert($file_error_in_first,$target_name,$encoding);
}




# correct lastnames
foreach my $encoding (@target_encodings){
    # converting the UTF8 file to targets
    my $target_name="./lastnames.".$encoding.".txt";
    &convert($file_in_last,$target_name,$encoding);
}

# error lastnames
foreach my $encoding (@target_encodings){
    # converting the UTF8 error file to targets
    my $target_name="./lastname_errors.".$encoding.".txt";
    &convert($file_error_in_last,$target_name,$encoding);
}


############################################################
# subs
############################################################

# Option
# infile
# outfile
# target_encoding
sub convert {
    my ($in,$out,$enc) = @_;
    print "###################################################\n";
    print "Convert: $in --> $out\n";
	print "###################################################\n";
    open(IN, "$in") || die "Error: $! $in not found!"; 
    open(OUT, ">$out") || die "Error: $! $in not found!"; 
    while (<IN>){
        chomp();
        my $inline=$_;
        if ($inline=~m/^#/){
            next;
	}
        my $conv = Text::Iconv->new("UTF8",$enc);
        my $outline = $conv->convert($inline);
        if (defined $outline){
            print "$inline --> $outline\n";
            print OUT "$outline\n";
        } else {
            print "$inline --> NOT CONVERTABLE\n";
        }
    }
    close(IN);
    close(OUT);
}
