use utf8; sub DoStartup { use utf8; use Getopt::Long; $HBase = "/usr/web/imc/idna"; $ACEPrefix = 'xn--'; $CharlintProg = "/usr/web/imc/idna/charlint.pl -K -E -q " . " -s CharlintK.storage"; $CharlintTempIn = "/usr/web/imc/idna/thecharlinttempinfile"; system("touch $CharlintTempIn"); unless(-w $CharlintTempIn) { &DieOut("Could not write to temp file $CharlintTempIn\n"); } $CharlintTempOut = "/usr/web/imc/idna/thecharlinttempoutfile"; system("touch $CharlintTempOut"); unless(-w $CharlintTempOut) { &DieOut("Could not write to temp file $CharlintTempOut\n"); } $MapFile = 'MapData.txt'; $UnassignedFile = 'UnassignedCharacters.txt'; $ProhibFile = 'ProhibitedCharacters.txt'; $RandALCatFile = 'RandALCat.txt'; $LCatFile = 'LCat.txt'; # Get the command line arguments and check the formats unless(&GetOptions( 'dounass' => \$DoUnass, 'd|debug' => \$Debug, 'writefiles' => \$WriteFiles, 'h|help|?' => \$Help, )) { &DieOut("Error getting the options: $!\n"); } if(defined($WriteFiles)) { open(REALOUT, '>thestdout') or &DieOut("Could not write to thestdout file.\n"); open(REALERR, '>thestderr') or &DieOut("Could not write to thestderr file.\n"); } else { open(REALOUT, '>&STDOUT'); open(REALERR, '>&STDERR'); } binmode STDIN, ":utf8"; @AllInLines = ; $InputPart = join('', @AllInLines); } sub CheckAllASCII { my $CheckIn = shift(@_); my ($i, %ASCIITable, @AllInOrds, $ThisOrd); &DebugOut(" Input to CheckAllASCII:\n "); &DebugHexOut($CheckIn); foreach $i (0 .. 127) { $ASCIITable{$i} = 1; } @AllInOrds = unpack("U*", $CheckIn); foreach $ThisOrd (@AllInOrds) { unless(exists($ASCIITable{$ThisOrd})) { &DebugOut(" Found a non-ASCII character with ord $ThisOrd\n"); return 0; } } &DebugOut(" All characters were ASCII\n"); return 1; } sub DoNamePrep { my $InputPart = shift(@_); my ($MapOutput, $i, $NormalizedOutput, $ProhibFound, $ThisLine, %RandALCatHash, $Low, $High, %LCatHash, $HasRandALCat, $HasLCat, @AllNormOrds); # NameprepWasOK is consciously exported from the subroutine so that # we can see if it passed or failed $NameprepWasOK = 1; # Nameprep mapping step &DebugOut(" Nameprep mapping\n"); $MapOutput = &DoMap($InputPart); # Nameprep normalize step &DebugOut(" Nameprep normalizing\n"); &DebugOut(" Input to normalization:\n "); &DebugHexOut($MapOutput); for($i = 0; $i < length($MapOutput); $i++) { if(ord(substr($MapOutput, $i, 1)) == 0) { &DieOut(" ***Null characters not allowed in normalization in this program\n") } } open(CHARLINTIN, ">:utf8", $CharlintTempIn) or &DieOut("Could not write to $CharlintTempIn\n"); print CHARLINTIN $MapOutput; close(CHARLINTIN); system("$CharlintProg <$CharlintTempIn >$CharlintTempOut"); $NormalizedOutput = ''; open(FOROUT, "<:utf8", $CharlintTempOut) or &DieOut("Weird; couldn't read from the charlint temp file\n"); while() { $NormalizedOutput .= $_ } &DebugOut(" Output of normalization:\n "); &DebugHexOut($NormalizedOutput); # Nameprep prohibit check # If CheckForProhib returns 0, it means that there were no prohibited # characters. &DebugOut(" Nameprep prohibiting\n"); $ProhibFound = &CheckForProhib($NormalizedOutput); if($ProhibFound) { &DebugOut(" ***Nameprep aborting due to prohibited code points.\n"); $NameprepWasOK = 0; return; } # Nameprep BIDI check &DebugOut(" Nameprep BIDI\n"); open(RANDALCAT, $RandALCatFile) or die "Could not read $RandALCatFile during BIDI check.\n"; open(LCAT, $LCatFile) or die "Could not read $LCatFile during BIDI check.\n"; while() { $ThisLine = $_; chomp($ThisLine); if(index($ThisLine, '-') == -1) { $RandALCatHash{hex($ThisLine)} = 1 } else { ($Low, $High) = split('-', $ThisLine); foreach $Val (hex($Low) .. hex($High)) { $RandALCatHash{$Val} = 1 } } } while() { $ThisLine = $_; chomp($ThisLine); if(index($ThisLine, '-') == -1) { $LCatHash{hex($ThisLine)} = 1 } else { ($Low, $High) = split('-', $ThisLine); foreach $Val (hex($Low) .. hex($High)) { $LCatHash{$Val} = 1 } } } # We don't need to do #1 (Section 5.8 characters) because that is # already done in the prohibition checking # Holder for whether or not the string has an RandALCat or LCat chars $HasRandALCat = 0; $HasLCat = 0; @AllNormOrds = unpack("U*", $NormalizedOutput); foreach $ThisOrd (@AllNormOrds) { if(exists($RandALCatHash{$ThisOrd})) { $HasRandALCat = 1; } if(exists($LCatHash{$ThisOrd})) { $HasLCat = 1; } } if($HasRandALCat) { if($HasLCat) { &DebugOut(" ***Nameprep aborting due to the string having " . "both RandALCat and LCat characters\n"); $NameprepWasOK = 0; return; } unless(exists($RandALCatHash{$AllNormOrds[0]})) { &DebugOut(" ***Nameprep aborting due to the string having " . "RandALCat characters, but not starting with one.\n"); $NameprepWasOK = 0; return; } unless(exists($RandALCatHash{$AllNormOrds[$#AllNormOrds]})) { &DebugOut(" ***Nameprep aborting due to the string having " . "RandALCat characters, but not ending with one.\n"); $NameprepWasOK = 0; return; } } else { &DebugOut(" No RandALCat characters found.\n"); } return $NormalizedOutput; } sub DoMap { my $ToBeMap = shift(@_); my ($i, $TheLine, %MapTable, $From, $To, @AllInOrds, $ThisOrd, @OutParts, $Part, $OutMap); &DebugOut(" Input to DoMap:\n "); &DebugHexOut($ToBeMap); open(MAPFILE, $MapFile) or die "Could not read $MapFile during mapping.\n"; while() { $TheLine = $_; chomp($TheLine); ($From, $To, $Reason) = split(/; /, $TheLine); $Reason = ''; # Needed to avoid the -w warning $MapTable{hex($From)} = $To; } @AllInOrds = unpack("U*", $ToBeMap); foreach $ThisOrd (@AllInOrds) { if(exists($MapTable{$ThisOrd})) { # The mapping may be zero, one, or more encoded hex values @OutParts = split(/ /, $MapTable{$ThisOrd}); foreach $Part (@OutParts) { $OutMap .= pack("U*", hex($Part)); } } else { # No map, so just put out the character $OutMap .= pack("U*", ($ThisOrd)); } } &DebugOut(" Output of DoMap:\n "); &DebugHexOut($OutMap); return $OutMap; } sub CheckForProhib { my $ProhibCheckString = shift(@_); my ($TheLine, @InputLines, $InRange, $Left, %ProhibHash, $ThisOrd, $FoundProhib, $Val, $Low, $High, @AllInChars, $i, $ThisChar, $ThisHex); &DebugOut(" Input to CheckForProhib:\n "); &DebugHexOut($ProhibCheckString); # Only add in the unassigned characters if needed. if($DoUnass) { open(UNASSIGNED, $UnassignedFile) or &DieOut("Could not read from $UnassignedFile\n"); while() { $TheLine = $_; chomp($TheLine); push(@InputLines, $TheLine); } } open(PROHIBIN, $ProhibFile) or &DieOut("Could not read from $ProhibFile\n"); # Load the input array while() { $TheLine = $_; chomp($TheLine); next if($TheLine eq ''); # Skip blank lines @SplitIt = split(/; /, $TheLine); push(@InputLines, $SplitIt[0]); } # Make a hash of values from the two files foreach $InRange (@InputLines) { if(index($InRange, '-') == -1) { $ProhibHash{hex($InRange)} = 1 } else { ($Low, $High) = split('-', $InRange); foreach $Val (hex($Low) .. hex($High)) { $ProhibHash{$Val} = 1 } } } # Compare the characters in the input against the values in # the ProhibHash. @AllInOrds = unpack("U*", $ProhibCheckString); foreach $ThisOrd (@AllInOrds) { if(exists($ProhibHash{$ThisOrd})) { $FmtString = '%04lX'; $ThisHex = sprintf($FmtString, $ThisOrd); &DebugOut("Found prohibited character: $ThisHex\n"); $FoundProhib = 1; } } if($FoundProhib) { return 1; } else { &DebugOut(" No prohibited characters found.\n"); return 0; } } sub CheckNonLDH { my $CheckIn = shift(@_); my ($i, @NonLDHVals, %NonLDHTable, @AllInOrds, $ThisOrd); &DebugOut(" Input to CheckNonLDH:\n "); &DebugHexOut($CheckIn); @NonLDHVals = (0 .. 44, 46 .. 47, 58 .. 64, 91 .. 96, 123 .. 127); foreach $i (@NonLDHVals) { $NonLDHTable{$i} = 1; } @AllInOrds = unpack("U*", $CheckIn); foreach $ThisOrd (@AllInOrds) { if( ($ThisOrd < 128) and (exists($NonLDHTable{$ThisOrd})) ) { &DebugOut(" Found a non-LDH ASCII character with ord $ThisOrd\n"); return 1; } } &DebugOut(" All characters were non-ASCII or LDH ASCII\n"); return 0; } sub DebugOut { my $DebugTemp = shift(@_); if($Debug) { print REALERR $DebugTemp; } } sub DieOut { my $DieTemp = shift(@_); &DebugOut($DieTemp); exit; } sub DebugHexOut { my $DebugTemp = shift(@_); my (@UTF8Parts, $ThisUTF8Part); @UTF8Parts = unpack("U*", $DebugTemp); foreach $ThisUTF8Part (@UTF8Parts) { &DebugOut('U+' . sprintf('%04lX', $ThisUTF8Part) . ' '); } &DebugOut("\n"); } 1;