summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrian Evans <grknight@gentoo.org>2020-10-02 15:24:06 -0400
committerBrian Evans <grknight@gentoo.org>2020-10-02 15:24:06 -0400
commit60dd5fd95847643eab04ce173f0774c9c584e795 (patch)
tree52299ac4e3c5c69df75997bfd7d62b71ef9e0089 /MLEB/Translate/ttmserver
parentUpdate Widgets to 1.35 (diff)
downloadextensions-60dd5fd95847643eab04ce173f0774c9c584e795.tar.gz
extensions-60dd5fd95847643eab04ce173f0774c9c584e795.tar.bz2
extensions-60dd5fd95847643eab04ce173f0774c9c584e795.zip
Update MLEB to 2020.07
Signed-off-by: Brian Evans <grknight@gentoo.org>
Diffstat (limited to 'MLEB/Translate/ttmserver')
-rw-r--r--MLEB/Translate/ttmserver/CrossLanguageTranslationSearchQuery.php110
-rw-r--r--MLEB/Translate/ttmserver/DatabaseTTMServer.php18
-rw-r--r--MLEB/Translate/ttmserver/ElasticSearchTTMServer.php258
-rw-r--r--MLEB/Translate/ttmserver/FakeTTMServer.php5
-rw-r--r--MLEB/Translate/ttmserver/FuzzyLikeThis.php1
-rw-r--r--MLEB/Translate/ttmserver/Interfaces.php16
-rw-r--r--MLEB/Translate/ttmserver/SolrTTMServer.php445
-rw-r--r--MLEB/Translate/ttmserver/TTMServer.php34
-rw-r--r--MLEB/Translate/ttmserver/TTMServerMessageUpdateJob.php6
-rw-r--r--MLEB/Translate/ttmserver/schema.xml45
10 files changed, 268 insertions, 670 deletions
diff --git a/MLEB/Translate/ttmserver/CrossLanguageTranslationSearchQuery.php b/MLEB/Translate/ttmserver/CrossLanguageTranslationSearchQuery.php
index ba620e40..6a6d0226 100644
--- a/MLEB/Translate/ttmserver/CrossLanguageTranslationSearchQuery.php
+++ b/MLEB/Translate/ttmserver/CrossLanguageTranslationSearchQuery.php
@@ -4,13 +4,13 @@
* @since 2015.08
*/
class CrossLanguageTranslationSearchQuery {
- /** @var TTMServer */
+ /** @var SearchableTTMServer */
protected $server;
/** @var array */
protected $params;
- /** @var ResultSet */
+ /** @var \Elastica\ResultSet */
protected $resultset;
/** @var int */
@@ -25,56 +25,74 @@ class CrossLanguageTranslationSearchQuery {
public function getDocuments() {
$documents = [];
- $total = $start = 0;
- $queryString = $this->params['query'];
$offset = $this->params['offset'];
$limit = $this->params['limit'];
- $size = 1000;
$options = $this->params;
- $options['limit'] = $size;
$options['language'] = $this->params['sourcelanguage'];
- do {
- $options['offset'] = $start;
- $this->resultset = $this->server->search( $queryString, $options, $this->hl );
-
- list( $results, $offsets ) = $this->extractMessages(
- $this->resultset,
- $offset,
- $limit
- );
- $offset = $offsets['start'] + $offsets['left'] - $offsets['total'];
- $limit = $limit - $offsets['left'];
- $total = $total + $offsets['total'];
+ // Use a bigger limit that what was requested, since we are likely to throw away many
+ // results in the local filtering step at extractMessages
+ $options['limit'] = $limit * 10;
+ // TODO: the real offset should be communicated to the frontend. It currently assumes
+ // next offset is current offset + limit and previous one is current offset - limit.
+ // It might be difficult to fix scrolling results backwards. For now we handle offset
+ // locally.
+ $options['offset'] = 0;
+
+ // @phan-suppress-next-line PhanUndeclaredMethod
+ $search = $this->server->createSearch( $this->params['query'], $options, $this->hl );
+ $scroll = $search->scroll( '5s' );
+
+ // Used for aggregations. Only the first scroll response has them.
+ $this->resultset = null;
+
+ foreach ( $scroll as $resultSet ) {
+ if ( !$this->resultset ) {
+ $this->resultset = $resultSet;
+ $this->total = $resultSet->getTotalHits();
+ }
+ $results = $this->extractMessages( $resultSet->getDocuments() );
$documents = array_merge( $documents, $results );
- $start = $start + $size;
- } while (
- $offsets['start'] + $offsets['left'] >= $offsets['total'] &&
- $this->resultset->getTotalHits() > $start
- );
- $this->total = $total;
+
+ $count = count( $documents );
+
+ if ( $count >= $offset + $limit ) {
+ break;
+ }
+ }
+
+ if ( !$this->resultset ) {
+ // No hits for documents, just set the result set.
+ $this->resultset = $scroll->current();
+ $this->total = $scroll->current()->getTotalHits();
+ }
+
+ // clear was introduced in Elastica 5.3.1, but Elastica extension uses 5.3.0
+ if ( is_callable( [ $scroll, 'clear' ] ) ) {
+ $scroll->clear();
+ }
+ $documents = array_slice( $documents, $offset, $limit );
return $documents;
}
/**
- * Extract messages from the resultset and build message definitions.
+ * Extract messages from the documents and build message definitions.
* Create a message collection from the definitions in the target language.
* Filter the message collection to get filtered messages.
* Slice messages according to limit and offset given.
- * @param ResultSet $resultset
- * @param int $offset
- * @param int $limit
- * @return array
+ * @param \Elastica\Document[] $documents
+ * @return array[]
*/
- protected function extractMessages( $resultset, $offset, $limit ) {
- $messages = $documents = $ret = [];
+ protected function extractMessages( $documents ) {
+ $messages = $ret = [];
$language = $this->params['language'];
- foreach ( $resultset->getResults() as $document ) {
+ foreach ( $documents as $document ) {
$data = $document->getData();
+ // @phan-suppress-next-line PhanUndeclaredMethod
if ( !$this->server->isLocalSuggestion( $data ) ) {
continue;
}
@@ -103,32 +121,26 @@ class CrossLanguageTranslationSearchQuery {
$collection->filter( $filter, false );
}
- $total = count( $collection );
- $offset = $collection->slice( $offset, $limit );
- $left = count( $collection );
-
- $offsets = [
- 'start' => $offset[2],
- 'left' => $left,
- 'total' => $total,
- ];
-
if ( $filter === 'translated' || $filter === 'fuzzy' ) {
$collection->loadTranslations();
}
- foreach ( $collection->keys() as $mkey => $title ) {
- $documents[$mkey]['content'] = $messages[$mkey];
+ foreach ( $collection->keys() as $mkey => $titleValue ) {
+ $title = Title::newFromLinkTarget( $titleValue );
+
+ $result = [];
+ $result['content'] = $messages[$mkey];
if ( $filter === 'translated' || $filter === 'fuzzy' ) {
- $documents[$mkey]['content'] = $collection[$mkey]->translation();
+ $result['content'] = $collection[$mkey]->translation();
}
$handle = new MessageHandle( $title );
- $documents[$mkey]['localid'] = $handle->getTitleForBase()->getPrefixedText();
- $documents[$mkey]['language'] = $language;
- $ret[] = $documents[$mkey];
+ $result['localid'] = $handle->getTitleForBase()->getPrefixedText();
+ $result['language'] = $language;
+
+ $ret[] = $result;
}
- return [ $ret, $offsets ];
+ return $ret;
}
/**
diff --git a/MLEB/Translate/ttmserver/DatabaseTTMServer.php b/MLEB/Translate/ttmserver/DatabaseTTMServer.php
index 74d2360d..c172c34a 100644
--- a/MLEB/Translate/ttmserver/DatabaseTTMServer.php
+++ b/MLEB/Translate/ttmserver/DatabaseTTMServer.php
@@ -9,6 +9,7 @@
* @ingroup TTMServer
*/
+use MediaWiki\MediaWikiServices;
use Wikimedia\Rdbms\DBQueryError;
/**
@@ -147,9 +148,10 @@ class DatabaseTTMServer extends TTMServer implements WritableTTMServer, Readable
$dbw->delete( 'translate_tms', '*', __METHOD__ );
$dbw->delete( 'translate_tmt', '*', __METHOD__ );
$dbw->delete( 'translate_tmf', '*', __METHOD__ );
+ // @phan-suppress-next-line PhanUndeclaredMethod
$table = $dbw->tableName( 'translate_tmf' );
try {
- $dbw->query( "DROP INDEX tmf_text ON $table" );
+ $dbw->query( "DROP INDEX tmf_text ON $table", __METHOD__ );
} catch ( DBQueryError $e ) {
// Perhaps the script was aborted before it got
// chance to add the index back.
@@ -167,7 +169,8 @@ class DatabaseTTMServer extends TTMServer implements WritableTTMServer, Readable
$context = Title::makeTitle( $handle->getTitle()->getNamespace(), $handle->getKey() );
$this->sids[$key] = $this->insertSource( $context, $language, $text );
}
- wfWaitForSlaves( 10 );
+ $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
+ $lbFactory->waitForReplication( [ 'ifWritesSince' => 10 ] );
}
public function batchInsertTranslations( array $batch ) {
@@ -183,7 +186,8 @@ class DatabaseTTMServer extends TTMServer implements WritableTTMServer, Readable
$dbw = $this->getDB( DB_MASTER );
$dbw->insert( 'translate_tmt', $rows, __METHOD__ );
- wfWaitForSlaves( 10 );
+ $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
+ $lbFactory->waitForReplication( [ 'ifWritesSince' => 10 ] );
}
public function endBatch() {
@@ -191,8 +195,9 @@ class DatabaseTTMServer extends TTMServer implements WritableTTMServer, Readable
public function endBootstrap() {
$dbw = $this->getDB( DB_MASTER );
+ // @phan-suppress-next-line PhanUndeclaredMethod
$table = $dbw->tableName( 'translate_tmf' );
- $dbw->query( "CREATE FULLTEXT INDEX tmf_text ON $table (tmf_text)" );
+ $dbw->query( "CREATE FULLTEXT INDEX tmf_text ON $table (tmf_text)", __METHOD__ );
}
/* Reading interface */
@@ -271,7 +276,7 @@ class DatabaseTTMServer extends TTMServer implements WritableTTMServer, Readable
'context' => $row->tms_context,
'location' => $row->tms_context . '/' . $targetLanguage,
'quality' => $quality,
- 'wiki' => isset( $row->tms_wiki ) ? $row->tms_wiki : wfWikiID(),
+ 'wiki' => $row->tms_wiki ?? wfWikiID(),
];
}
}
@@ -279,4 +284,7 @@ class DatabaseTTMServer extends TTMServer implements WritableTTMServer, Readable
return $results;
}
+
+ public function setDoReIndex() {
+ }
}
diff --git a/MLEB/Translate/ttmserver/ElasticSearchTTMServer.php b/MLEB/Translate/ttmserver/ElasticSearchTTMServer.php
index a1a01c0e..1a98c8cc 100644
--- a/MLEB/Translate/ttmserver/ElasticSearchTTMServer.php
+++ b/MLEB/Translate/ttmserver/ElasticSearchTTMServer.php
@@ -17,39 +17,33 @@ use MediaWiki\Logger\LoggerFactory;
*/
class ElasticSearchTTMServer
extends TTMServer
- implements ReadableTTMServer, WritableTTMServer, SearchableTTMserver
+ implements ReadableTTMServer, WritableTTMServer, SearchableTTMServer
{
/**
- * @const int number of documents that will be loaded and deleted in a
- * single operation
- */
- const BULK_DELETE_CHUNK_SIZE = 100;
-
- /**
* @const int in case a write operation fails during a batch process
* this constant controls the number of times we will retry the same
* operation.
*/
- const BULK_INDEX_RETRY_ATTEMPTS = 5;
+ private const BULK_INDEX_RETRY_ATTEMPTS = 5;
/**
* @const int time (seconds) to wait for the index to ready before
* starting to index. Since we wait for index status it can be relatively
* long especially if some nodes are restarted.
*/
- const WAIT_UNTIL_READY_TIMEOUT = 3600;
+ private const WAIT_UNTIL_READY_TIMEOUT = 3600;
/**
* Flag in the frozen index that indicates that all indices
* are frozen (useful only when this service shares the cluster with
* CirrusSearch)
*/
- const ALL_INDEXES_FROZEN_NAME = 'freeze_everything';
+ protected const ALL_INDEXES_FROZEN_NAME = 'freeze_everything';
/**
* Type used in the frozen index
*/
- const FROZEN_TYPE = 'frozen';
+ protected const FROZEN_TYPE = 'frozen';
/**
* @var \Elastica\Client
@@ -101,36 +95,13 @@ class ElasticSearchTTMServer
$fuzzyQuery->addFields( [ 'content' ] );
$boostQuery = new \Elastica\Query\FunctionScore();
- if ( $this->useWikimediaExtraPlugin() ) {
- $boostQuery->addFunction(
- 'levenshtein_distance_score',
- [
- 'text' => $text,
- 'field' => 'content'
- ]
- );
- } else {
- // TODO: should we remove this code block the extra
- // plugin is now mandatory and we will never use the
- // groovy script.
- if ( $this->isElastica5() ) {
- $scriptClass = \Elastica\Script\Script::class;
- } else {
- $scriptClass = \Elastica\Script::class;
- }
-
- $groovyScript =
-<<<GROOVY
-import org.apache.lucene.search.spell.*
-new LevensteinDistance().getDistance(srctxt, _source['content'])
-GROOVY;
- $script = new $scriptClass(
- $groovyScript,
- [ 'srctxt' => $text ],
- $scriptClass::LANG_GROOVY
- );
- $boostQuery->addScriptScoreFunction( $script );
- }
+ $boostQuery->addFunction(
+ 'levenshtein_distance_score',
+ [
+ 'text' => $text,
+ 'field' => 'content'
+ ]
+ );
$boostQuery->setBoostMode( \Elastica\Query\FunctionScore::BOOST_MODE_REPLACE );
// Wrap the fuzzy query so it can be used as a filter.
@@ -161,7 +132,7 @@ GROOVY;
$query->setFrom( 0 );
$query->setSize( $sizeFirst );
$query->setParam( '_source', [ 'content' ] );
- $cutoff = isset( $this->config['cutoff'] ) ? $this->config['cutoff'] : 0.65;
+ $cutoff = $this->config['cutoff'] ?? 0.65;
$query->setParam( 'min_score', $cutoff );
$query->setSort( [ '_score', '_uid' ] );
@@ -204,6 +175,7 @@ GROOVY;
}
// After the first query, the smallest score is the new threshold.
+ // @phan-suppress-next-line PhanPossiblyUndeclaredVariable
$query->setParam( 'min_score', $score );
$query->setFrom( $query->getParam( 'size' ) + $query->getParam( 'from' ) );
$query->setSize( $sizeSecond );
@@ -258,6 +230,14 @@ GROOVY;
/* Write functions */
+ /**
+ * Add / update translations.
+ *
+ * @param MessageHandle $handle
+ * @param ?string $targetText
+ * @throws \RuntimeException
+ * @return bool
+ */
public function update( MessageHandle $handle, $targetText ) {
if ( !$handle->isValid() || $handle->getCode() === '' ) {
return false;
@@ -278,14 +258,11 @@ GROOVY;
// Do not delete definitions, because the translations are attached to that
if ( $handle->getCode() !== $sourceLanguage ) {
$localid = $handle->getTitleForBase()->getPrefixedText();
-
- $boolQuery = new \Elastica\Query\BoolQuery();
- $boolQuery->addFilter( new Elastica\Query\Term( [ 'wiki' => wfWikiID() ] ) );
- $boolQuery->addFilter( new Elastica\Query\Term( [ 'language' => $handle->getCode() ] ) );
- $boolQuery->addFilter( new Elastica\Query\Term( [ 'localid' => $localid ] ) );
-
- $query = new \Elastica\Query( $boolQuery );
- $this->deleteByQuery( $this->getType(), $query );
+ $this->deleteByQuery( $this->getType(), Elastica\Query::create(
+ ( new \Elastica\Query\BoolQuery() )
+ ->addFilter( new Elastica\Query\Term( [ 'wiki' => wfWikiID() ] ) )
+ ->addFilter( new Elastica\Query\Term( [ 'language' => $handle->getCode() ] ) )
+ ->addFilter( new Elastica\Query\Term( [ 'localid' => $localid ] ) ) ) );
}
// If translation was made fuzzy, we do not need to add anything
@@ -376,6 +353,11 @@ GROOVY;
$type->getIndex()->create( $indexSettings, $rebuild );
}
+ /**
+ * Begin the bootstrap process.
+ *
+ * @throws \RuntimeException
+ */
public function beginBootstrap() {
$type = $this->getType();
if ( $this->updateMapping ) {
@@ -388,43 +370,32 @@ GROOVY;
$settings = $type->getIndex()->getSettings();
$settings->setRefreshInterval( '-1' );
- $term = new Elastica\Query\Term();
- $term->setTerm( 'wiki', wfWikiID() );
- $query = new \Elastica\Query( $term );
- $this->deleteByQuery( $type, $query );
+ $this->deleteByQuery( $this->getType(), \Elastica\Query::create(
+ ( new Elastica\Query\Term() )->setTerm( 'wiki', wfWikiID() ) ) );
$mapping = new \Elastica\Type\Mapping();
$mapping->setType( $type );
-
- $keywordType = [ 'type' => 'string', 'index' => 'not_analyzed' ];
- $textType = 'string';
- if ( $this->isElastica5() ) {
- $keywordType = [ 'type' => 'keyword' ];
- $textType = 'text';
- }
$mapping->setProperties( [
- 'wiki' => $keywordType,
- 'localid' => $keywordType,
- 'uri' => $keywordType,
- 'language' => $keywordType,
- 'group' => $keywordType,
+ 'wiki' => [ 'type' => 'keyword' ],
+ 'localid' => [ 'type' => 'keyword' ],
+ 'uri' => [ 'type' => 'keyword' ],
+ 'language' => [ 'type' => 'keyword' ],
+ 'group' => [ 'type' => 'keyword' ],
'content' => [
- 'type' => $textType,
+ 'type' => 'text',
'fields' => [
'content' => [
- 'type' => $textType,
- 'index' => 'analyzed',
+ 'type' => 'text',
'term_vector' => 'yes'
],
'prefix_complete' => [
- 'type' => $textType,
+ 'type' => 'text',
'analyzer' => 'prefix',
'search_analyzer' => 'standard',
'term_vector' => 'yes'
],
'case_sensitive' => [
- 'type' => $textType,
- 'index' => 'analyzed',
+ 'type' => 'text',
'analyzer' => 'casesensitive',
'term_vector' => 'yes'
]
@@ -440,6 +411,10 @@ GROOVY;
// I hate the rule that forbids {}
}
+ /**
+ * @param array[] $batch
+ * @phan-param array<int,array{0:MessageHandle,1:string,2:string}> $batch
+ */
public function batchInsertDefinitions( array $batch ) {
$lb = new LinkBatch();
foreach ( $batch as $data ) {
@@ -478,11 +453,7 @@ GROOVY;
public function endBootstrap() {
$index = $this->getType()->getIndex();
$index->refresh();
- if ( $this->isElastica5() ) {
- $index->forcemerge();
- } else {
- $index->optimize();
- }
+ $index->forcemerge();
$index->getSettings()->setRefreshInterval( '5s' );
}
@@ -508,11 +479,7 @@ GROOVY;
* @return string
*/
private function getIndexName() {
- if ( isset( $this->config['index'] ) ) {
- return $this->config['index'];
- } else {
- return 'ttmserver';
- }
+ return $this->config['index'] ?? 'ttmserver';
}
public function getType() {
@@ -522,15 +489,17 @@ GROOVY;
}
protected function getShardCount() {
- return isset( $this->config['shards'] ) ? $this->config['shards'] : 1;
+ return $this->config['shards'] ?? 1;
}
protected function getReplicaCount() {
- return isset( $this->config['replicas'] ) ? $this->config['replicas'] : '0-2';
+ return $this->config['replicas'] ?? '0-2';
}
/**
* Get index health
+ * TODO: Remove this code in the future as we drop support for
+ * older versions of the Elastica extension.
*
* @param string $indexName
* @return array the index health status
@@ -539,7 +508,7 @@ GROOVY;
$path = "_cluster/health/$indexName";
$response = $this->getClient()->request( $path );
if ( $response->hasError() ) {
- throw new \Exception( "Error while fetching index health status: ". $response->getError() );
+ throw new \Exception( "Error while fetching index health status: " . $response->getError() );
}
return $response->getData();
}
@@ -551,6 +520,8 @@ GROOVY;
* CirrusSearch/includes/Maintenance/ConfigUtils.php. Ideally we'd
* like to make these utility methods available in the Elastica
* extension, but this one requires some refactoring in cirrus first.
+ * TODO: Remove this code in the future as we drop support for
+ * older versions of the Elastica extension.
*
* @param string $indexName
* @param int $timeout
@@ -561,7 +532,7 @@ GROOVY;
while ( ( $startTime + $timeout ) > time() ) {
try {
$response = $this->getIndexHealth( $indexName );
- $status = isset( $response['status'] ) ? $response['status'] : 'unknown';
+ $status = $response['status'] ?? 'unknown';
if ( $status === 'green' ) {
$this->logOutput( "\tGreen!" );
return true;
@@ -576,10 +547,29 @@ GROOVY;
}
protected function waitUntilReady() {
+ if ( method_exists( MWElasticUtils::class, 'waitForGreen' ) ) {
+ $statuses = MWElasticUtils::waitForGreen(
+ $this->getClient(),
+ $this->getIndexName(),
+ self::WAIT_UNTIL_READY_TIMEOUT );
+ $this->logOutput( "Waiting for the index to go green..." );
+ foreach ( $statuses as $message ) {
+ $this->logOutput( $message );
+ }
+
+ if ( !$statuses->getReturn() ) {
+ die( "Timeout! Please check server logs for {$this->getIndexName()}." );
+ }
+
+ return;
+ }
+
+ // TODO: This code can be removed in the future as we drop support for
+ // older versions of the Elastica extension.
$indexName = $this->getType()->getIndex()->getName();
$this->logOutput( "Waiting for the index to go green..." );
if ( !$this->waitForGreen( $indexName, self::WAIT_UNTIL_READY_TIMEOUT ) ) {
- die( "Timeout! Please check server logs for {$this->getIndex()->getName()}." );
+ die( "Timeout! Please check server logs for {$this->getIndexName()}." );
}
}
@@ -596,9 +586,9 @@ GROOVY;
/**
* Force the update of index mappings
- * @since 2015.03
+ * @inheritDoc
*/
- public function doMappingUpdate() {
+ public function setDoReIndex() {
$this->updateMapping = true;
}
@@ -678,9 +668,9 @@ GROOVY;
* @param string $queryString
* @param array $opts
* @param array $highlight
- * @return array
+ * @return \Elastica\Search
*/
- public function search( $queryString, $opts, $highlight ) {
+ public function createSearch( $queryString, $opts, $highlight ) {
$query = new \Elastica\Query();
list( $searchQuery, $highlights ) = $this->parseQueryString( $queryString, $opts );
@@ -723,6 +713,9 @@ GROOVY;
// Check that we have at least one filter to avoid invalid query errors.
if ( $language !== '' || $group !== '' ) {
+ // TODO: This seems wrong, but perhaps for aggregation purposes?
+ // should make $search a must clause and use the bool query
+ // as main.
$query->setPostFilter( $filters );
}
@@ -734,15 +727,34 @@ GROOVY;
'fields' => $highlights,
] );
+ return $this->getType()->getIndex()->createSearch( $query );
+ }
+
+ /**
+ * Search interface
+ * @param string $queryString
+ * @param array $opts
+ * @param array $highlight
+ * @throws TTMServerException
+ * @return \Elastica\ResultSet
+ */
+ public function search( $queryString, $opts, $highlight ) {
+ $search = $this->createSearch( $queryString, $opts, $highlight );
+
try {
- return $this->getType()->getIndex()->search( $query );
+ return $search->search();
} catch ( \Elastica\Exception\ExceptionInterface $e ) {
throw new TTMServerException( $e->getMessage() );
}
}
+ /**
+ * @param \Elastica\ResultSet $resultset
+ * @return array
+ */
public function getFacets( $resultset ) {
$aggs = $resultset->getAggregations();
+ '@phan-var array[][][] $aggs';
$ret = [
'language' => [],
@@ -758,10 +770,18 @@ GROOVY;
return $ret;
}
+ /**
+ * @param \Elastica\ResultSet $resultset
+ * @return int
+ */
public function getTotalHits( $resultset ) {
return $resultset->getTotalHits();
}
+ /**
+ * @param \Elastica\ResultSet $resultset
+ * @return array
+ */
public function getDocuments( $resultset ) {
$ret = [];
foreach ( $resultset->getResults() as $document ) {
@@ -782,13 +802,30 @@ GROOVY;
/**
* Delete docs by query by using the scroll API.
+ * TODO: Elastica\Index::deleteByQuery() ? was removed
+ * in 2.x and returned in 5.x.
*
* @param \Elastica\Type $type the source index
* @param \Elastica\Query $query the query
+ * @throws \RuntimeException
*/
private function deleteByQuery( \Elastica\Type $type, \Elastica\Query $query ) {
- $retryAttempts = self::BULK_INDEX_RETRY_ATTEMPTS;
+ if ( method_exists( MWElasticUtils::class, 'deleteByQuery' ) ) {
+ try {
+ MWElasticUtils::deleteByQuery( $type->getIndex(), $query, /* $allowConflicts = */ true );
+ } catch ( \Exception $e ) {
+ LoggerFactory::getInstance( 'ElasticSearchTTMServer' )->error(
+ 'Problem encountered during deletion.',
+ [ 'exception' => $e ]
+ );
+ throw new \RuntimeException( "Problem encountered during deletion.\n" . $e );
+ }
+ return;
+ }
+ // TODO: This code can be removed in the future as we drop support for
+ // older versions of the Elastica extension.
+ $retryAttempts = self::BULK_INDEX_RETRY_ATTEMPTS;
$search = new \Elastica\Search( $this->getClient() );
$search->setQuery( $query );
$search->addType( $type );
@@ -817,12 +854,27 @@ GROOVY;
* @return bool
*/
public function isFrozen() {
+ if ( method_exists( MWElasticUtils::class, 'isFrozen' ) ) {
+ try {
+ return MWElasticUtils::isFrozen( $this->getClient() );
+ } catch ( \Exception $e ) {
+ LoggerFactory::getInstance( 'ElasticSearchTTMServer' )->warning(
+ 'Problem encountered while checking the frozen index.',
+ [ 'exception' => $e ]
+ );
+ return false;
+ }
+ }
+
+ // TODO: This code can be removed in the future as we drop support for
+ // older versions of the Elastica extension.
if ( !isset( $this->config['frozen_index'] ) ) {
return false;
}
$frozenIndex = $this->config['frozen_index'];
$indices = [ static::ALL_INDEXES_FROZEN_NAME, $this->getIndexName() ];
- $ids = new \Elastica\Query\Ids( null, $indices );
+ $ids = ( new \Elastica\Query\Ids() )
+ ->setIds( $indices );
try {
$resp = $this->getClient()
@@ -830,12 +882,8 @@ GROOVY;
->getType( static::FROZEN_TYPE )
->search( \Elastica\Query::create( $ids ) );
- if ( $resp->count() === 0 ) {
- return false;
- } else {
- return true;
- }
- } catch ( Exception $e ) {
+ return $resp->count() !== 0;
+ } catch ( \Exception $e ) {
LoggerFactory::getInstance( 'ElasticSearchTTMServer' )->warning(
'Problem encountered while checking the frozen index.',
[ 'exception' => $e ]
@@ -843,14 +891,4 @@ GROOVY;
return false;
}
}
-
- /**
- * @return bool true if running with Elastica 5+
- */
- private function isElastica5() {
- // Sadly Elastica does not seem to expose its version so we
- // check the inexistence of a class that was removed in the
- // version 5
- return !class_exists( \Elastica\Script::class );
- }
}
diff --git a/MLEB/Translate/ttmserver/FakeTTMServer.php b/MLEB/Translate/ttmserver/FakeTTMServer.php
index 16715592..9a35822e 100644
--- a/MLEB/Translate/ttmserver/FakeTTMServer.php
+++ b/MLEB/Translate/ttmserver/FakeTTMServer.php
@@ -22,7 +22,7 @@ class FakeTTMServer implements ReadableTTMServer, WritableTTMServer {
}
public function isLocalSuggestion( array $suggestion ) {
- false;
+ return false;
}
public function expandLocation( array $suggestion ) {
@@ -57,4 +57,7 @@ class FakeTTMServer implements ReadableTTMServer, WritableTTMServer {
public function isFrozen() {
return false;
}
+
+ public function setDoReIndex() {
+ }
}
diff --git a/MLEB/Translate/ttmserver/FuzzyLikeThis.php b/MLEB/Translate/ttmserver/FuzzyLikeThis.php
index 143b3222..82798584 100644
--- a/MLEB/Translate/ttmserver/FuzzyLikeThis.php
+++ b/MLEB/Translate/ttmserver/FuzzyLikeThis.php
@@ -199,6 +199,7 @@ class FuzzyLikeThis extends \Elastica\Query\AbstractQuery {
* @see \Elastica\Query\AbstractQuery::toArray()
*/
public function toArray() {
+ $args = [];
if ( !empty( $this->_fields ) ) {
$args['fields'] = $this->_fields;
}
diff --git a/MLEB/Translate/ttmserver/Interfaces.php b/MLEB/Translate/ttmserver/Interfaces.php
index 1f8cb20e..133f4ae4 100644
--- a/MLEB/Translate/ttmserver/Interfaces.php
+++ b/MLEB/Translate/ttmserver/Interfaces.php
@@ -33,7 +33,7 @@ interface ReadableTTMServer {
* Determines if the suggestion returned by this TTMServer comes
* from this wiki or any other wiki.
* @param array $suggestion
- * @return Bool
+ * @return bool
*/
public function isLocalSuggestion( array $suggestion );
@@ -41,7 +41,7 @@ interface ReadableTTMServer {
* Given suggestion returned by this TTMServer, constructs fully
* qualified URL to the location of the translation.
* @param array $suggestion
- * @return String URL
+ * @return string URL
*/
public function expandLocation( array $suggestion );
}
@@ -116,6 +116,12 @@ interface WritableTTMServer {
* @return bool true if the service is frozen
*/
public function isFrozen();
+
+ /**
+ * Instruct the service to fully wipe the index and start from scratch.
+ * @since 2020.01
+ */
+ public function setDoReIndex();
}
/**
@@ -135,19 +141,19 @@ interface SearchableTTMServer {
public function search( $queryString, $opts, $highlight );
/**
- * @param stdClass $resultset
+ * @param mixed $resultset
* @return array[]
*/
public function getFacets( $resultset );
/**
- * @param stdClass $resultset
+ * @param mixed $resultset
* @return int
*/
public function getTotalHits( $resultset );
/**
- * @param stdClass $resultset
+ * @param mixed $resultset
* @return array[]
*/
public function getDocuments( $resultset );
diff --git a/MLEB/Translate/ttmserver/SolrTTMServer.php b/MLEB/Translate/ttmserver/SolrTTMServer.php
deleted file mode 100644
index bb6c244c..00000000
--- a/MLEB/Translate/ttmserver/SolrTTMServer.php
+++ /dev/null
@@ -1,445 +0,0 @@
-<?php
-/**
- * TTMServer - The Translate extension translation memory interface
- *
- * @file
- * @author Niklas Laxström
- * @copyright Copyright © 2012-2013, Niklas Laxström
- * @license GPL-2.0-or-later
- * @ingroup TTMServer
- */
-
-/**
- * TTMServer backed based on Solr instance. Depends on Solarium.
- * @since 2012-06-27
- * @ingroup TTMServer
- * @deprecated 1.27. Will be removed in 1.29.
- */
-class SolrTTMServer
- extends TTMServer
- implements ReadableTTMServer, SearchableTTMServer, WritableTTMServer
-{
- /**
- * In case auto-commit is not enabled, or even if it is, tell solr to
- * commit before this time has passed, in milliseconds.
- */
- const COMMIT_WITHIN = 5000;
-
- protected $client;
-
- /**
- * Reference to the maintenance script to relay logging output.
- */
- protected $logger;
-
- public function __construct( $config ) {
- wfDeprecated( __METHOD__, '1.24' );
-
- parent::__construct( $config );
-
- if ( isset( $config['config'] ) ) {
- $this->client = new Solarium_Client( $config['config'] );
- } else {
- $this->client = new Solarium_Client();
- }
- }
-
- public function isLocalSuggestion( array $suggestion ) {
- return $suggestion['wiki'] === wfWikiID();
- }
-
- public function expandLocation( array $suggestion ) {
- return $suggestion['uri'];
- }
-
- public function query( $sourceLanguage, $targetLanguage, $text ) {
- try {
- return $this->doQuery( $sourceLanguage, $targetLanguage, $text );
- } catch ( Solarium_Exception $e ) {
- throw new TranslationHelperException( 'Solarium exception: ' . $e );
- }
- }
-
- /// @see ReadableTTMServer::query
- protected function doQuery( $sourceLanguage, $targetLanguage, $text ) {
- /* Two query system:
- * 1) Find all strings in source language that match text
- * 2) Do another query for translations for those strings
- */
- // For now impose a length limit on query string to avoid doing
- // very slow queries. Magic number.
- if ( strlen( $text ) > 789 ) {
- return [];
- }
-
- $query = $this->client->createSelect();
- $query->setFields( [ 'globalid', 'content', 'score' ] );
-
- /* The interface usually displays three best candidates. These might
- * come from more than three matches, if the translation is the same.
- * This might not find all suggestions, if the top N best matching
- * source texts don't have translations, but worse matches do. We
- * could loop with start parameter to fetch more until we have enough
- * suggestions or the quality drops below the cutoff point. */
- $query->setRows( 25 );
-
- /* Our string can contain all kind of nasty characters, so we need
- * escape them with great pain. */
- $helper = $query->getHelper();
- $dist = $helper->escapePhrase( $text );
- // "edit" could also be ngram of other algorithm
- $dist = "strdist($dist,content,edit)";
- /* Note how we need to escape twice here, first the string for strdist
- * and then the strdist call itself for the query. And of course every-
- * thing will be URL encoded once sent over the line. */
- $query->setQuery( '_val_:%P1%', [ $dist ] );
-
- /* Filter queries are supposed to be efficient as they are separately
- * cached, but I haven't done any benchmarks. */
- $query->createFilterQuery( 'lang' )
- ->setQuery( 'language:%P1%', [ $sourceLanguage ] );
-
- $resultset = $this->client->select( $query );
-
- /* This query is doing two unrelated things:
- * 1) Collect the message contents and scores so that they can
- * be accessed later for the translations we found.
- * 2) Build the query string for the query that fetches the
- * translations.
- * This code is a bit uglier than I'd like it to be, since there
- * there is no field that globally identifies a message (message
- * definition and translations). */
- $contents = $scores = [];
- $queryString = '';
- foreach ( $resultset as $doc ) {
- $sourceId = preg_replace( '~/[^/]+$~', '', $doc->globalid );
- $contents[$sourceId] = $doc->content;
- $scores[$sourceId] = $doc->score;
-
- $globalid = $helper->escapePhrase( "$sourceId/$targetLanguage" );
- $queryString .= "globalid:$globalid ";
- }
-
- // Second query to fetch available translations
- $fetchQuery = $this->client->createSelect();
- $fetchQuery->setFields( [ 'wiki', 'uri', 'content', 'messageid', 'globalid' ] );
- // This come in random order, so have to fetch all and sort
- $fetchQuery->setRows( 25 );
- $fetchQuery->setQuery( $queryString );
- // With AND we would not find anything, obviously.
- $fetchQuery->setQueryDefaultOperator( Solarium_Query_Select::QUERY_OPERATOR_OR );
-
- $translations = $this->client->select( $fetchQuery );
-
- $suggestions = [];
- foreach ( $translations as $doc ) {
- /* Construct the matching source id */
- $sourceId = preg_replace( '~/[^/]+$~', '', $doc->globalid );
-
- /* Unfortunately we cannot do this on the search server,
- * because score is not a real field and thus cannot be
- * used in a filter query. */
- $quality = $scores[$sourceId];
- if ( $quality < $this->config['cutoff'] ) {
- continue;
- }
-
- $suggestions[] = [
- 'source' => $contents[$sourceId],
- 'target' => $doc->content,
- 'context' => $doc->messageid,
- 'quality' => $quality,
- 'wiki' => $doc->wiki,
- 'location' => $doc->messageid . '/' . $targetLanguage,
- 'uri' => $doc->uri,
- ];
- }
-
- /* Like mentioned above, we get results in random order. Sort them
- * now to have best matches first as expected by callers. */
- uasort( $suggestions, function ( $a, $b ) {
- if ( $a['quality'] === $b['quality'] ) {
- return 0;
- }
-
- return ( $a['quality'] < $b['quality'] ) ? 1 : -1;
- } );
-
- return $suggestions;
- }
-
- /* Write functions */
-
- public function update( MessageHandle $handle, $targetText ) {
- if ( $handle->getCode() === '' ) {
- return false;
- }
-
- /* There are various different cases here:
- * [new or updated] [fuzzy|non-fuzzy] [translation|definition]
- * 1) We don't distinguish between new or updated here.
- * 2) Delete old translation, but not definition
- * 3) Insert new translation or definition, if non-fuzzy
- * The definition should never be fuzzied anyway.
- *
- * These only apply to known messages.
- */
-
- $update = $this->client->createUpdate();
- $title = $handle->getTitle();
-
- $doDelete = true;
- $sourceLanguage = '';
- if ( $handle->isValid() ) {
- $sourceLanguage = $handle->getGroup()->getSourceLanguage();
- if ( $handle->getCode() === $sourceLanguage ) {
- $doDelete = false;
- }
- }
-
- if ( $doDelete ) {
- $base = Title::makeTitle( $title->getNamespace(), $handle->getKey() );
- $conds = [
- 'wiki' => wfWikiID(),
- 'language' => $handle->getCode(),
- 'messageid' => $base->getPrefixedText(),
- ];
- foreach ( $conds as $key => &$value ) {
- $value = "$key:" . $update->getHelper()->escapePhrase( $value );
- }
- $update->addDeleteQuery( implode( ' AND ', $conds ) );
- }
-
- if ( $targetText !== null ) {
- if ( $handle->isValid() ) {
- // Of the message definition page
- $targetTitle = $handle->getTitle();
- $sourceTitle = Title::makeTitle(
- $targetTitle->getNamespace(),
- $handle->getKey() . '/' . $sourceLanguage
- );
- $revId = (int)$sourceTitle->getLatestRevID();
- /* Note: in some cases the source page might not exist, in this case
- * we use 0 as message version identifier, to differentiate them from
- * orphan messages */
- } else {
- $revId = 'orphan';
- }
-
- $doc = $this->createDocument( $handle, $targetText, $revId );
- // Add document and commit within X seconds.
- $update->addDocument( $doc, null, self::COMMIT_WITHIN );
- }
-
- try {
- $this->client->update( $update );
- } catch ( Solarium_Exception $e ) {
- error_log( 'SolrTTMServer update-write failed' );
-
- return false;
- }
-
- return true;
- }
-
- /**
- * @see schema.xml
- * @param MessageHandle $handle
- * @param string $text
- * @param int $revId
- * @return Solarium_Document_ReadWrite
- */
- protected function createDocument( MessageHandle $handle, $text, $revId ) {
- $language = $handle->getCode();
- $translationTitle = $handle->getTitle();
-
- $title = Title::makeTitle( $handle->getTitle()->getNamespace(), $handle->getKey() );
- $wiki = wfWikiID();
- $messageid = $title->getPrefixedText();
- $globalid = "$wiki-$messageid-$revId/$language";
-
- $doc = new Solarium_Document_ReadWrite();
- $doc->wiki = $wiki;
- $doc->uri = $translationTitle->getCanonicalURL();
- $doc->messageid = $messageid;
- $doc->globalid = $globalid;
-
- $doc->language = $language;
- $doc->content = $text;
- $doc->setField( 'group', $handle->getGroupIds() );
-
- return $doc;
- }
-
- public function beginBootstrap() {
- $update = $this->client->createUpdate();
- $query = 'wiki:' . $update->getHelper()->escapePhrase( wfWikiID() );
- $update->addDeleteQuery( $query );
- $update->addCommit();
- $this->client->update( $update );
- }
-
- public function beginBatch() {
- // I hate the rule that forbids {}
- }
-
- public function batchInsertDefinitions( array $batch ) {
- $lb = new LinkBatch();
- foreach ( $batch as $data ) {
- $lb->addObj( $data[0]->getTitle() );
- }
- $lb->execute();
-
- $this->batchInsertTranslations( $batch );
- }
-
- public function batchInsertTranslations( array $batch ) {
- $update = $this->client->createUpdate();
- foreach ( $batch as $key => $data ) {
- list( $handle, $sourceLanguage, $text ) = $data;
- $revId = $handle->getTitleForLanguage( $sourceLanguage )->getLatestRevID();
- $doc = $this->createDocument( $handle, $text, $revId );
- // Add document and commit within X seconds.
- $update->addDocument( $doc, null, self::COMMIT_WITHIN );
- }
-
- $retries = 5;
-
- while ( $retries-- > 0 ) {
- try {
- $this->client->update( $update );
- break;
- } catch ( Solarium_Client_HttpException $e ) {
- if ( $retries === 0 ) {
- throw $e;
- } else {
- $c = get_class( $e );
- $msg = $e->getMessage();
- $this->logOutput( "Batch failed ($c: $msg), trying again in 10 seconds" );
- sleep( 10 );
- }
- }
- }
- }
-
- public function endBatch() {
- $update = $this->client->createUpdate();
- $this->client->update( $update );
- }
-
- public function endBootstrap() {
- $update = $this->client->createUpdate();
- $update->addCommit();
- $update->addOptimize();
- $this->client->update( $update );
- }
-
- public function getSolarium() {
- return $this->client;
- }
-
- public function setLogger( $logger ) {
- $this->logger = $logger;
- }
-
- // Can it get any uglier?
- protected function logOutput( $text ) {
- if ( $this->logger ) {
- $this->logger->statusLine( "$text\n" );
- }
- }
-
- /**
- * Search interface
- * @param string $queryString
- * @param array $opts
- * @param array $highlight
- * @return array
- */
- public function search( $queryString, $opts, $highlight ) {
- $client = $this->getSolarium();
-
- $query = $client->createSelect();
- $dismax = $query->getDisMax();
- $dismax->setQueryParser( 'edismax' );
- $query->setQuery( $queryString );
- $query->setRows( $opts['limit'] );
- $query->setStart( $opts['offset'] );
-
- list( $pre, $post ) = $highlight;
- $hl = $query->getHighlighting();
- $hl->setFields( 'text' );
- $hl->setSimplePrefix( $pre );
- $hl->setSimplePostfix( $post );
- $hl->setMaxAnalyzedChars( '5000' );
- $hl->setFragSize( '5000' );
- $hl->setSnippets( 1 );
-
- $languageFilter = $opts['language'];
- if ( $languageFilter !== '' ) {
- $query->createFilterQuery( 'languageFilter' )
- ->setQuery( 'language:%P1%', [ $languageFilter ] )
- ->addTag( 'filter' );
- }
-
- $groupFilter = $opts['group'];
- if ( $groupFilter !== '' ) {
- $query->createFilterQuery( 'groupFilter' )
- ->setQuery( 'group:%P1%', [ $groupFilter ] )
- ->addTag( 'filter' );
- }
-
- $facetSet = $query->getFacetSet();
-
- $language = $facetSet->createFacetField( 'language' );
- $language->setField( 'language' );
- $language->setMinCount( 1 );
- $language->addExclude( 'filter' );
-
- $group = $facetSet->createFacetField( 'group' );
- $group->setField( 'group' );
- $group->setMinCount( 1 );
- $group->setMissing( true );
- $group->addExclude( 'filter' );
-
- try {
- return $client->select( $query );
- } catch ( Solarium_Client_HttpException $e ) {
- throw new TTMServer( $e->getMessage() );
- }
- }
-
- public function getFacets( $resultset ) {
- return [
- 'language' => iterator_to_array( $resultset->getFacetSet()->getFacet( 'language' ) ),
- 'group' => iterator_to_array( $resultset->getFacetSet()->getFacet( 'group' ) ),
- ];
- }
-
- public function getTotalHits( $resultset ) {
- return $resultset->getNumFound();
- }
-
- public function getDocuments( $resultset ) {
- $highlighting = $resultset->getHighlighting();
- $ret = [];
- foreach ( $resultset as $document ) {
- $fields = iterator_to_array( $document );
- // Compatibility mapping
- $fields['localid'] = $fields['messageid'];
-
- $hdoc = $highlighting->getResult( $document->globalid );
- $text = $hdoc->getField( 'text' );
- if ( $text === [] ) {
- $text = $document->text;
- } else {
- $text = $text[0];
- }
-
- $fields['content'] = $text;
- $ret[] = $fields;
- }
-
- return $ret;
- }
-}
diff --git a/MLEB/Translate/ttmserver/TTMServer.php b/MLEB/Translate/ttmserver/TTMServer.php
index 98724869..2a7f0900 100644
--- a/MLEB/Translate/ttmserver/TTMServer.php
+++ b/MLEB/Translate/ttmserver/TTMServer.php
@@ -15,13 +15,22 @@
* @ingroup TTMServer
*/
class TTMServer {
+ /** @var array */
protected $config;
- protected function __construct( $config ) {
+ /**
+ * @param array $config
+ */
+ protected function __construct( array $config ) {
$this->config = $config;
}
- public static function factory( $config ) {
+ /**
+ * @param array $config
+ * @return TTMServer|null
+ * @throws MWException
+ */
+ public static function factory( array $config ) {
if ( isset( $config['class'] ) ) {
$class = $config['class'];
@@ -38,7 +47,7 @@ class TTMServer {
}
}
- throw new MWEXception( 'TTMServer with no type' );
+ throw new MWException( 'TTMServer with no type' );
}
/**
@@ -60,12 +69,21 @@ class TTMServer {
return new FakeTTMServer();
}
+ /**
+ * @param array[] $suggestions
+ * @return array[]
+ */
public static function sortSuggestions( array $suggestions ) {
usort( $suggestions, [ __CLASS__, 'qualitySort' ] );
return $suggestions;
}
+ /**
+ * @param array $a
+ * @param array $b
+ * @return int
+ */
protected static function qualitySort( $a, $b ) {
list( $c, $d ) = [ $a['quality'], $b['quality'] ];
if ( $c === $d ) {
@@ -137,15 +155,17 @@ class TTMServer {
/**
* Called from TranslateEditAddons::onSave
* @param MessageHandle $handle
- * @param string $text
- * @param bool $fuzzy
*/
- public static function onChange( MessageHandle $handle, $text, $fuzzy ) {
+ public static function onChange( MessageHandle $handle ) {
$job = TTMServerMessageUpdateJob::newJob( $handle, 'refresh' );
JobQueueGroup::singleton()->push( $job );
}
- public static function onGroupChange( MessageHandle $handle, $old, $new ) {
+ /**
+ * @param MessageHandle $handle
+ * @param array $old
+ */
+ public static function onGroupChange( MessageHandle $handle, $old ) {
if ( $old === [] ) {
// Don't bother for newly added messages
return;
diff --git a/MLEB/Translate/ttmserver/TTMServerMessageUpdateJob.php b/MLEB/Translate/ttmserver/TTMServerMessageUpdateJob.php
index 7a6a91d7..2d59813e 100644
--- a/MLEB/Translate/ttmserver/TTMServerMessageUpdateJob.php
+++ b/MLEB/Translate/ttmserver/TTMServerMessageUpdateJob.php
@@ -34,20 +34,20 @@ class TTMServerMessageUpdateJob extends Job {
* Number of *retries* allowed, 4 means we attempt
* to run the job 5 times (1 initial attempt + 4 retries).
*/
- const MAX_ERROR_RETRY = 4;
+ protected const MAX_ERROR_RETRY = 4;
/**
* Constant used by backoffDelay().
* With 7 the cumulative delay between the first and last attempt is
* between 8 and 33 minutes.
*/
- const WRITE_BACKOFF_EXPONENT = 7;
+ protected const WRITE_BACKOFF_EXPONENT = 7;
/**
* The maximum amount of time jobs delayed due to frozen services can remain
* in the job queue.
*/
- const DROP_DELAYED_JOBS_AFTER = 86400; // 60 * 60 * 24 * 1;
+ public const DROP_DELAYED_JOBS_AFTER = 86400; // 60 * 60 * 24 * 1;
/**
* @param MessageHandle $handle
diff --git a/MLEB/Translate/ttmserver/schema.xml b/MLEB/Translate/ttmserver/schema.xml
deleted file mode 100644
index 0ed2f047..00000000
--- a/MLEB/Translate/ttmserver/schema.xml
+++ /dev/null
@@ -1,45 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!-- This is schema file for TTMServer using Solr as backend -->
-<schema name="ttmserver" version="1.5">
- <types>
- <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
- <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
- <fieldType name="tint" class="solr.TrieIntField" precisionStep="50" positionIncrementGap="0"/>
- <!-- Our input can basically be in any language, so we use either
- language agnostic processing or something that can adapt to
- the language in question. -->
- <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
- <analyzer>
- <!-- Consider using solr.ICUTokenizerFactory -->
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <!-- Consider using solr.ICUNormalizer2FilterFactory -->
- <filter class="solr.LowerCaseFilterFactory"/>
- </analyzer>
- </fieldType>
- </types>
-
- <fields>
- <field name="_version_" type="long" indexed="true" stored="true" />
-
- <!-- If multiple wikis are using the same server, this will tell which one
- owns this document. Maps to MediaWiki wfWikiId(). -->
- <field name="wiki" type="string" indexed="true" stored="true" required="true" />
- <!-- Title::getPrefixedText() of the message definition page. -->
- <field name="messageid" type="string" indexed="true" stored="true" required="true" />
- <!-- Consists of concatenation of wiki and messageid. -->
- <field name="globalid" type="string" indexed="true" stored="true" required="true" />
- <!-- URL or something to the translation in the wiki. -->
- <field name="uri" type="string" indexed="true" stored="true" required="true" />
-
- <!-- FACETs: Language and groups. -->
- <field name="language" type="string" indexed="true" stored="true" required="true" />
- <field name="group" multiValued="true" indexed="true" stored="true" type="string" />
-
- <field name="content" type="string" indexed="true" stored="true" required="true" />
-
- <field name="text" type="text_ws" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
- <copyField source="content" dest="text"/>
- </fields>
- <defaultSearchField>text</defaultSearchField>
- <uniqueKey>globalid</uniqueKey>
-</schema>