diff options
Diffstat (limited to 'MLEB/cldr/includes/CLDRParser.php')
-rw-r--r-- | MLEB/cldr/includes/CLDRParser.php | 350 |
1 files changed, 350 insertions, 0 deletions
diff --git a/MLEB/cldr/includes/CLDRParser.php b/MLEB/cldr/includes/CLDRParser.php new file mode 100644 index 00000000..23079d94 --- /dev/null +++ b/MLEB/cldr/includes/CLDRParser.php @@ -0,0 +1,350 @@ +<?php + +/** + * Extract data from cldr XML. + * + * @author Niklas Laxström + * @author Ryan Kaldari + * @author Santhosh Thottingal + * @author Sam Reed + * @copyright Copyright © 2007-2015 + * @license GPL-2.0-or-later + */ +class CLDRParser { + /** + * @param string $inputFile filename + * @param string $outputFile filename + */ + public function parse( $inputFile, $outputFile ) { + // Open the input file for reading + + $contents = file_get_contents( $inputFile ); + $doc = new SimpleXMLElement( $contents ); + + $data = [ + 'languageNames' => [], + 'currencyNames' => [], + 'currencySymbols' => [], + 'countryNames' => [], + 'timeUnits' => [], + ]; + + foreach ( $doc->xpath( '//languages/language' ) as $elem ) { + if ( (string)$elem['alt'] !== '' ) { + continue; + } + + if ( (string)$elem['type'] === 'root' ) { + continue; + } + + $key = str_replace( '_', '-', strtolower( $elem['type'] ) ); + + $data['languageNames'][$key] = (string)$elem; + } + + foreach ( $doc->xpath( '//currencies/currency' ) as $elem ) { + if ( (string)$elem->displayName[0] === '' ) { + continue; + } + + $data['currencyNames'][(string)$elem['type']] = (string)$elem->displayName[0]; + if ( (string)$elem->symbol[0] !== '' ) { + $data['currencySymbols'][(string)$elem['type']] = (string)$elem->symbol[0]; + } + } + + foreach ( $doc->xpath( '//territories/territory' ) as $elem ) { + if ( (string)$elem['alt'] !== '' && (string)$elem['alt'] !== 'short' ) { + continue; + } + + if ( (string)$elem['type'] === 'ZZ' || + !preg_match( '/^[A-Z][A-Z]$/', $elem['type'] ) + ) { + continue; + } + + $data['countryNames'][(string)$elem['type']] = (string)$elem; + } + foreach ( $doc->xpath( '//units/unitLength' ) as $unitLength ) { + if ( (string)$unitLength['type'] !== 'long' ) { + continue; + } + foreach ( $unitLength->unit as $elem ) { + $type = (string)$elem['type']; + $pos = strpos( $type, 'duration' ); + if ( $pos === false ) { + continue; + } + $type = substr( $type, strlen( 'duration-' ) ); + foreach ( $elem->unitPattern as $pattern ) { + $data['timeUnits'][$type . '-' . (string)$pattern['count']] = (string)$pattern; + } + } + } + foreach ( $doc->xpath( '//fields/field' ) as $field ) { + $fieldType = (string)$field['type']; + + foreach ( $field->relativeTime as $relative ) { + $type = (string)$relative['type']; + foreach ( $relative->relativeTimePattern as $pattern ) { + $data['timeUnits'][$fieldType . '-' . $type + . '-' . (string)$pattern['count']] = (string)$pattern; + } + } + } + + ksort( $data['timeUnits'] ); + + $this->savephp( $data, $outputFile ); + } + + /** + * Parse method for the file structure found in common/supplemental/supplementalData.xml + * @param string $inputFile + * @param string $outputFile + */ + public function parse_supplemental( $inputFile, $outputFile ) { + // Open the input file for reading + + $contents = file_get_contents( $inputFile ); + $doc = new SimpleXMLElement( $contents ); + + $data = [ + 'currencyFractions' => [], + 'localeCurrencies' => [], + ]; + + // Pull currency attributes - digits, rounding, and cashRounding. + // This will tell us how many decmal places make sense to use with any currency, + // or if the currency is totally non-fractional + foreach ( $doc->xpath( '//currencyData/fractions/info' ) as $elem ) { + if ( (string)$elem['iso4217'] === '' ) { + continue; + } + + $attributes = [ 'digits', 'rounding', 'cashDigits', 'cashRounding' ]; + foreach ( $attributes as $att ) { + if ( (string)$elem[$att] !== '' ) { + $data['currencyFractions'][(string)$elem['iso4217']][$att] = (string)$elem[$att]; + } + } + } + + // Pull a map of regions to currencies in order of preference. + foreach ( $doc->xpath( '//currencyData/region' ) as $elem ) { + if ( (string)$elem['iso3166'] === '' ) { + continue; + } + + $region = (string)$elem['iso3166']; + + foreach ( $elem->currency as $currencynode ) { + if ( (string)$currencynode['to'] === '' && (string)$currencynode['tender'] !== 'false' ) { + $data['localeCurrencies'][$region][] = (string)$currencynode['iso4217']; + } + } + } + + $this->savephp( $data, $outputFile ); + } + + /** + * Parse method for the currency section in the names files. + * This is separate from the regular parse function, because we need all of + * the currency locale information, even if mediawiki doesn't support the language. + * (For instance: en_AU uses '$' for AUD, not USD, but it's not a supported mediawiki locality) + * @param string $inputDir - the directory, in which we will parse everything. + * @param string $outputFile + */ + public function parse_currency_symbols( $inputDir, $outputFile ) { + if ( !file_exists( $inputDir ) ) { + return; + } + $files = scandir( $inputDir ); + + $data = [ + 'currencySymbols' => [], + ]; + + // Foreach files! + foreach ( $files as $inputFile ) { + if ( strpos( $inputFile, '.xml' ) < 1 ) { + continue; + } + + $contents = file_get_contents( $inputDir . '/' . $inputFile ); + $doc = new SimpleXMLElement( $contents ); + + // Tags in the <identity> section are guaranteed to appear once + $languages = $doc->xpath( '//identity/language/@type' ); + $language = $languages + ? (string)$languages[0] + : pathinfo( $inputFile, PATHINFO_FILENAME ); + + // The <territory> element is optional + $territories = $doc->xpath( '//identity/territory/@type' ); + $territory = $territories ? (string)$territories[0] : 'DEFAULT'; + + foreach ( $doc->xpath( '//currencies/currency' ) as $elem ) { + if ( (string)$elem->symbol[0] !== '' ) { + $data['currencySymbols'][(string)$elem['type']][$language][$territory] = + (string)$elem->symbol[0]; + } + } + } + + // now massage the data somewhat. It's pretty blown up at this point. + + /** + * Part 1: Stop blowing up on defaults. + * Defaults apparently come in many forms. Listed below in order of scope + * (widest to narrowest) + * 1) The ISO code itself, in the absence of any other defaults + * 2) The 'root' language file definition + * 3) Language with no locality - locality will come in as 'DEFAULT' + * + * Intended behavior: + * From narrowest scope to widest, collapse the defaults + */ + foreach ( $data['currencySymbols'] as $currency => $language ) { + // get the currency default symbol. This will either be defined in the + // 'root' language file, or taken from the ISO code. + $default = $language['root']['DEFAULT'] ?? $currency; + + foreach ( $language as $lang => $territories ) { + // Collapse a language (no locality) array if it's just the default. One value will do fine. + if ( is_array( $territories ) ) { + if ( count( $territories ) === 1 && array_key_exists( 'DEFAULT', $territories ) ) { + $data['currencySymbols'][$currency][$lang] = $territories['DEFAULT']; + if ( $territories['DEFAULT'] === $default && $lang !== 'root' ) { + unset( $data['currencySymbols'][$currency][$lang] ); + } + } else { + ksort( $data['currencySymbols'][$currency][$lang] ); + } + } + } + + ksort( $data['currencySymbols'][$currency] ); + } + + ksort( $data['currencySymbols'] ); + + $this->savephp( $data, $outputFile ); + } + + /** + * savephp will build and return a string containing properly formatted php + * output of all the vars we've just parsed out of the xml. + * @param array $data The variable names and values we want defined in the php output + * @param string $location File location to write + */ + protected function savephp( $data, $location ) { + $hasData = false; + foreach ( $data as $v ) { + if ( count( $v ) ) { + $hasData = true; + break; + } + } + + if ( !$hasData ) { + return; + } + + // Yes, I am aware I could have simply used var_export. + // ...the spacing was ugly. + $output = "<?php\n"; + foreach ( $data as $varname => $values ) { + if ( !count( $values ) ) { + // Don't output empty arrays + continue; + } + $output .= "\n\$$varname = [\n"; + if ( $this->isAssoc( $values ) ) { + foreach ( $values as $key => $value ) { + if ( is_array( $value ) ) { + $output .= $this->makePrettyArrayOuts( $key, $value, 1 ); + } else { + $key = addcslashes( $key, "'" ); + $value = addcslashes( $value, "'" ); + if ( !is_numeric( $key ) ) { + $key = "'$key'"; + } + $output .= "\t$key => '$value',\n"; + } + } + } else { + foreach ( $values as $value ) { + if ( is_array( $value ) ) { + $output .= $this->makePrettyArrayOuts( null, $value, 1 ); + } else { + $value = addcslashes( $value, "'" ); + $output .= "\t'$value',\n"; + } + } + } + $output .= "];\n"; + } + + file_put_contents( $location, $output ); + } + + /** + * It makes pretty array vals. Dur. + * @param string|null $key Use null to omit outputting the key + * @param array $value + * @param int $level + * @return string + */ + protected function makePrettyArrayOuts( $key, $value, $level = 1 ) { + $subKeys = ''; + $isAssoc = $this->isAssoc( $value ); + $tabs = str_repeat( "\t", $level ); + + foreach ( $value as $subkey => $subvalue ) { + $subkey = $isAssoc ? $subkey : null; + + if ( is_array( $subvalue ) ) { + $subKeys .= $this->makePrettyArrayOuts( $subkey, $subvalue, $level + 1 ); + } else { + $subkey = $isAssoc ? $this->formatKey( $subkey ) : ''; + $subvalue = addcslashes( $subvalue, "'" ); + $subKeys .= "$tabs\t$subkey'$subvalue',\n"; + } + } + + if ( $subKeys === '' ) { + return ''; + } + + $key = $key !== null ? $this->formatKey( $key ) : ''; + return "$tabs$key" . "[\n$subKeys$tabs],\n"; + } + + /** + * It makes pretty array keys. Dur. + * @param string $key + * @return string + */ + protected function formatKey( $key ) { + $key = addcslashes( $key, "'" ); + if ( !is_numeric( $key ) ) { + $key = "'$key'"; + } + + return "$key => "; + } + + /** + * Checks if array is associative or sequential. + * + * @param array $arr + * @return bool + */ + protected function isAssoc( array $arr ) { + return array_keys( $arr ) !== range( 0, count( $arr ) - 1 ); + } +} |