mDescription = 'Generate the normalizer data file for Malayalam'; } public function getDbType() { return Maintenance::DB_NONE; } public function execute() { $hexPairs = array( # From http://unicode.org/versions/Unicode5.1.0/#Malayalam_Chillu_Characters '0D23 0D4D 200D' => '0D7A', '0D28 0D4D 200D' => '0D7B', '0D30 0D4D 200D' => '0D7C', '0D32 0D4D 200D' => '0D7D', '0D33 0D4D 200D' => '0D7E', # From http://permalink.gmane.org/gmane.science.linguistics.wikipedia.technical/46413 '0D15 0D4D 200D' => '0D7F', ); $pairs = array(); foreach ( $hexPairs as $hexSource => $hexDest ) { $source = UtfNormal\Utils::hexSequenceToUtf8( $hexSource ); $dest = UtfNormal\Utils::hexSequenceToUtf8( $hexDest ); $pairs[$source] = $dest; } global $IP; file_put_contents( "$IP/serialized/normalize-ml.ser", serialize( $pairs ) ); echo "ml: " . count( $pairs ) . " pairs written.\n"; } } $maintClass = 'GenerateNormalizerDataMl'; require_once RUN_MAINTENANCE_IF_MAIN;