summaryrefslogtreecommitdiff
blob: 745d7553ae71dd6342417809f79ff6be8849de03 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
<?php

/**
 * Extract data from cldr XML.
 *
 * @author Niklas Laxström
 * @author Ryan Kaldari
 * @author Santhosh Thottingal
 * @author Sam Reed
 * @copyright Copyright © 2007-2015
 * @license GPL-2.0-or-later
 */

// Standard boilerplate to define $IP
if ( getenv( 'MW_INSTALL_PATH' ) !== false ) {
	$IP = getenv( 'MW_INSTALL_PATH' );
} else {
	$dir = __DIR__;
	$IP = "$dir/../..";
}
require_once "$IP/maintenance/Maintenance.php";

class CLDRRebuild extends Maintenance {

	public function __construct() {
		parent::__construct();
		$this->addDescription( 'Extract data from CLDR XML' );
		$this->addOption(
			'datadir', 'Directory containing CLDR data. Default is core/common/main',
			/* required */ false,
			/* param */ true
		);
		$this->addOption(
			'outputdir', 'Output directory. Default is current directory',
			/* required */ false,
			/* param */ true
		);

		$this->requireExtension( 'CLDR' );
	}

	public function execute() {
		$dir = __DIR__;

		$DATA = $this->getOption( 'datadir', "$dir/core/common/main" );
		$OUTPUT = $this->getOption( 'outputdir', $dir );

		if ( !file_exists( $DATA ) ) {
			$this->error( "CLDR data not found at $DATA\n", 1 );
		}

		// Get an array of all MediaWiki languages ( $wgLanguageNames + $wgExtraLanguageNames )
		$languages = Language::fetchLanguageNames();
		# hack to get pt-pt too
		$languages['pt-pt'] = 'Foo';
		ksort( $languages );

		foreach ( $languages as $code => $name ) {
			// Construct the correct name for the input file
			$codeParts = explode( '-', $code );
			if ( count( $codeParts ) > 1 ) {
				// ISO 15924 alpha-4 script code
				if ( strlen( $codeParts[1] ) === 4 ) {
					$codeParts[1] = ucfirst( $codeParts[1] );
				}

				// ISO 3166-1 alpha-2 country code
				if ( strlen( $codeParts[1] ) === 2 ) {
					$codeParts[2] = $codeParts[1];
					unset( $codeParts[1] );
				}
				if ( isset( $codeParts[2] ) && strlen( $codeParts[2] ) === 2 ) {
					$codeParts[2] = strtoupper( $codeParts[2] );
				}
				$codeCLDR = implode( '_', $codeParts );
			} else {
				$codeCLDR = $code;
			}
			$input = "$DATA/$codeCLDR.xml";

			// If the file exists, parse it, otherwise display an error
			if ( file_exists( $input ) ) {
				$outputFileName = Language::getFileName( 'CldrNames', getRealCode( $code ), '.php' );
				$p = new CLDRParser();
				$p->parse( $input, "$OUTPUT/CldrNames/$outputFileName" );
			} else {
				$this->output( "File $input not found\n" );
			}
		}

		// Now parse out what we want form the supplemental file
		$this->output( "Parsing Supplemental Data...\n" );
		// argh! If $DATA defaulted to something slightly more general in the
		// CLDR dump, this wouldn't have to be this way.
		$input = "$DATA/../supplemental/supplementalData.xml";
		if ( file_exists( $input ) ) {
			$p = new CLDRParser();
			$p->parse_supplemental( $input, "$OUTPUT/CldrSupplemental/Supplemental.php" );
		} else {
			$this->output( "File $input not found\n" );
		}
		$this->output( "Done parsing supplemental data.\n" );

		$this->output( "Parsing Currency Symbol Data...\n" );
		$p = new CLDRParser();
		$p->parse_currency_symbols( $DATA, "$OUTPUT/CldrCurrency/Symbols.php" );
		$this->output( "Done parsing currency symbols.\n" );
	}
}

/**
 * Get the code for the MediaWiki localisation,
 * these are same as the fallback.
 *
 * @param string $code
 * @return string
 */
function getRealCode( $code ) {
	$realCode = $code;
	if ( !strcmp( $code, 'kk' ) ) {
		$realCode = 'kk-cyrl';
	} elseif ( !strcmp( $code, 'ku' ) ) {
		$realCode = 'ku-latn';
	} elseif ( !strcmp( $code, 'sr' ) ) {
		$realCode = 'sr-ec';
	} elseif ( !strcmp( $code, 'tg' ) ) {
		$realCode = 'tg-cyrl';
	} elseif ( !strcmp( $code, 'zh' ) ) {
		$realCode = 'zh-hans';
	} elseif ( !strcmp( $code, 'pt' ) ) {
		$realCode = 'pt-br';
	} elseif ( !strcmp( $code, 'pt-pt' ) ) {
		$realCode = 'pt';
	} elseif ( !strcmp( $code, 'az-arab' ) ) {
		$realCode = 'azb';
	}

	return $realCode;
}

$maintClass = CLDRRebuild::class;
require_once RUN_MAINTENANCE_IF_MAIN;