#!/usr/bin/perl require "AllSubs.pl"; &DoStartup; if(defined($Help)) { print << "EOF"; ToUnicode.pl, version 3.1, by Paul Hoffman This program is provided with no warranty and may not work as expected. You may freely distribute this program. Program to perform the steps in ToUnicode in RFC 3490. The input is UTF-8; the output is UTF-8. Using the '-writefiles' option causes the STDOUT to be written to 'thestdout' and the STDERR to be written to 'thestderr'. This is useful when calling from CGIs. Using the '-dounass' optiong causes the program to check against the unassigned character list when checking for prohibited characters. Without this option, the check is not done. EOF exit; } &DebugOut("ToUnicode Step 1 (Check for all-ASCII)\n"); $IsAllASCII = &CheckAllASCII($InputPart); if($IsAllASCII) { $NormalizedOutput = $InputPart; &DebugOut(" Was all ASCII, skipping to step 3\n"); } else { &DebugOut("ToUnicode Step 2 (Nameprep)\n"); &DoNamePrep($InputPart); unless($NameprepWasOK) { &DieOut("***ToUnicode failing in step 2 because Nameprep aborted\n"); print REALOUT $InputPart; } &DebugOut("ToUnicode Step 2--3a (Check for non-LDH ASCII)\n"); if(&CheckNonLDH($NormalizedOutput)) { print REALOUT $InputPart; &DieOut(" ***ToUnicode failing in Step 2--3a.\n"); } &DebugOut("ToUnicode Step 2--3b (Check for leading or trailing hyphen)\n"); if(substr($NormalizedOutput, 0, 1) eq '-') { print REALOUT $InputPart; &DieOut(" ***ToUnicode failing in Step 2--3b for leading hyphen.\n"); } if(substr($NormalizedOutput, -1, 1) eq '-') { print REALOUT $InputPart; &DieOut(" ***ToUnicode failing in Step 2--3b for trailing hyphen.\n"); } &DebugOut(" After Step 2, the string is $NormalizedOutput\n"); } &DebugOut("ToUnicode Step 3 (Check for ACE prefix)\n"); unless(lc(substr($NormalizedOutput, 0, 4)) eq $ACEPrefix) { print REALOUT $InputPart; &DieOut(" ***ToUnicode failing in Step 3.\n"); } else { $SavedSequence = $NormalizedOutput; } &DebugOut("ToUnicode Step 4 (Remove ACE prefix)\n"); $NormalizedOutput = substr($NormalizedOutput, 4); &DebugOut(" After step 4, the string is $NormalizedOutput\n"); &DebugOut("ToUnicode Step 5 (Decode with Punycode)\n"); open(ACETEMP, ">$HBase/thestdace") or &DieOut("Could not write thestdace holder"); print ACETEMP $NormalizedOutput; close(ACETEMP); $PunycodeDecodeCmd = "$HBase/UnicodeConvert.pl -infmt punycode " . " -outfmt utf8 -errtmp <$HBase/thestdace"; $PunycodeOut = `$PunycodeDecodeCmd`; if($PunycodeOut eq '') { $ErrMsg = `cat $HBase/thestdunicodedie`; print REALOUT $InputPart; &DieOut("Fatal problem doing Punycode decoding:\n$ErrMsg\n"); } else { &DebugOut(" After step 5, the label is $PunycodeOut\n"); $UPlusCmd = "$HBase/UnicodeConvert.pl -infmt punycode " . " -outfmt u+ -errtmp <$HBase/thestdace"; $UPlusOut = `$UPlusCmd`; $UPlusOut =~ s/\n/ /g; if($UPlusOut eq '') { $ErrMsg = `cat $HBase/thestdunicodedie`; print REALOUT $InputPart; &DieOut("Fatal problem doing U+ decoding:\n$ErrMsg\n"); } else { &DebugOut(" Also after step 5, the label is $UPlusOut\n"); } } $Step5Save = $PunycodeOut; &DebugOut("ToUnicode Step 6 (Apply ToASCII)\n"); open(UNICODETEMP, ">$HBase/thestdunicodestring") or &DieOut("Could not write thestdunicodestring holder"); print UNICODETEMP $PunycodeOut; close(UNICODETEMP); $ToASCIICmd = "$HBase/ToASCII.pl <$HBase/thestdunicodestring"; $CompareSequence = `$ToASCIICmd`; &DebugOut(" After step 6, the compared string is $CompareSequence\n"); &DebugOut("ToUnicode Step 7 (Case-insensitive compare)\n"); unless(lc($SavedSequence) eq lc($CompareSequence)) { print REALOUT $InputPart; &DieOut(" ***ToUnicode failing in step 7.\n"); } &DebugOut(" After step 8, the label is $Step5Save\n"); print REALOUT $Step5Save; exit;