summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'Translate/src/MessageGroupProcessing/CsvTranslationImporter.php')
-rw-r--r--Translate/src/MessageGroupProcessing/CsvTranslationImporter.php249
1 files changed, 249 insertions, 0 deletions
diff --git a/Translate/src/MessageGroupProcessing/CsvTranslationImporter.php b/Translate/src/MessageGroupProcessing/CsvTranslationImporter.php
new file mode 100644
index 00000000..83a9f9a8
--- /dev/null
+++ b/Translate/src/MessageGroupProcessing/CsvTranslationImporter.php
@@ -0,0 +1,249 @@
+<?php
+declare( strict_types = 1 );
+
+namespace MediaWiki\Extension\Translate\MessageGroupProcessing;
+
+use CommentStoreComment;
+use ContentHandler;
+use MediaWiki\Page\WikiPageFactory;
+use MediaWiki\Permissions\Authority;
+use MediaWiki\Revision\SlotRecord;
+use MessageHandle;
+use SplFileObject;
+use Status;
+use Title;
+use TranslateUtils;
+
+/**
+ * Parse, validate and import translations from a CSV file
+ * @since 2022.06
+ * @license GPL-2.0-or-later
+ * @author Abijeet Patro
+ */
+class CsvTranslationImporter {
+ /** @var WikiPageFactory */
+ private $wikiPageFactory;
+
+ public function __construct( WikiPageFactory $wikiPageFactory ) {
+ $this->wikiPageFactory = $wikiPageFactory;
+ }
+
+ /** Parse and validate the CSV file */
+ public function parseFile( string $csvFilePath ): Status {
+ if ( !file_exists( $csvFilePath ) || !is_file( $csvFilePath ) ) {
+ return Status::newFatal(
+ "CSV file path '$csvFilePath' does not exist, is not readable or is not a file"
+ );
+ }
+
+ $indexedLanguageCodes = [];
+ $currentRowCount = -1;
+ $importData = [];
+ $invalidRows = [
+ 'emptyTitleRows' => [],
+ 'invalidTitleRows' => [],
+ 'groupNotFoundRows' => []
+ ];
+
+ $csvFileContent = new SplFileObject( $csvFilePath, 'r' );
+ while ( !$csvFileContent->eof() ) {
+ // Increment the row count at the beginning since we have a bunch of jump statements
+ // at various placaes
+ ++$currentRowCount;
+
+ $csvRow = $csvFileContent->fgetcsv();
+ if ( $this->isCsvRowEmpty( $csvRow ) ) {
+ continue;
+ }
+
+ if ( $currentRowCount === 0 ) {
+ // Validate the header
+ $status = $this->getLanguagesFromHeader( $csvRow );
+ if ( !$status->isGood() ) {
+ return $status;
+ }
+ /** @var string[] */
+ $indexedLanguageCodes = $status->getValue();
+ continue;
+ }
+
+ $rowData = [ 'translations' => [] ];
+ $messageTitle = isset( $csvRow[0] ) ? trim( $csvRow[0] ) : null;
+ if ( !$messageTitle ) {
+ $invalidRows['emptyTitleRows'][] = $currentRowCount + 1;
+ continue;
+ }
+
+ $handle = $this->getMessageHandleIfValid( $messageTitle );
+ if ( $handle === null ) {
+ $invalidRows['invalidTitleRows'][] = $currentRowCount + 1;
+ continue;
+ }
+
+ // Ensure that the group is present
+ $group = $handle->getGroup();
+ if ( !$group ) {
+ $invalidRows['groupNotFoundRows'][] = $currentRowCount + 1;
+ continue;
+ }
+
+ $sourceLanguage = $group->getSourceLanguage();
+
+ $rowData['messageTitle'] = $messageTitle;
+ foreach ( $indexedLanguageCodes as $languageCode => $index ) {
+ if ( $sourceLanguage === $languageCode ) {
+ continue;
+ }
+
+ $rowData['translations'][$languageCode] = $csvRow[$index] ?? null;
+ }
+ $importData[] = $rowData;
+ }
+
+ $status = new Status();
+ if ( $invalidRows['emptyTitleRows'] ) {
+ $status->fatal(
+ 'Empty message titles found on row(s): ' . implode( ',', $invalidRows['emptyTitleRows'] )
+ );
+ }
+
+ if ( $invalidRows['invalidTitleRows'] ) {
+ $status->fatal(
+ 'Invalid message title(s) found on row(s): ' . implode( ',', $invalidRows['invalidTitleRows'] )
+ );
+ }
+
+ if ( $invalidRows['groupNotFoundRows'] ) {
+ $status->fatal(
+ 'Group not found for message(s) on row(s) ' . implode( ',', $invalidRows['invalidTitleRows'] )
+ );
+ }
+
+ if ( !$status->isGood() ) {
+ return $status;
+ }
+
+ return Status::newGood( $importData );
+ }
+
+ /** Import the data returned from the parseFile method */
+ public function importData(
+ array $messagesWithTranslations,
+ Authority $authority,
+ string $comment,
+ ?callable $progressReporter = null
+ ): Status {
+ $commentStoreComment = CommentStoreComment::newUnsavedComment( $comment );
+
+ // Loop over each translation to import
+ $importStatus = new Status();
+ $failedStatuses = [];
+ $currentTranslation = 0;
+ foreach ( $messagesWithTranslations as $messageTranslation ) {
+ $messageTitleText = $messageTranslation['messageTitle'];
+ $messageTitle = Title::newFromText( $messageTitleText );
+ $messageHandle = new MessageHandle( $messageTitle );
+
+ $translationImportStatuses = [];
+
+ // Import each translation for the current message
+ $translations = $messageTranslation['translations'];
+ foreach ( $translations as $languageCode => $translation ) {
+ // Skip empty translations
+ if ( $translation === null || trim( $translation ) === '' ) {
+ continue;
+ }
+
+ $translationTitle = $messageHandle->getTitleForLanguage( $languageCode );
+
+ // Perform the update for the translation page
+ $updater = $this->wikiPageFactory->newFromTitle( $translationTitle )
+ ->newPageUpdater( $authority );
+ $content = ContentHandler::makeContent( $translation, $translationTitle );
+ $updater->setContent( SlotRecord::MAIN, $content );
+ $updater->setFlags( EDIT_FORCE_BOT );
+ $updater->saveRevision( $commentStoreComment );
+
+ $status = $updater->getStatus();
+ $translationImportStatuses[] = $status;
+ if ( !$status->isOK() ) {
+ $failedStatuses[ $translationTitle->getPrefixedText() ] = $status;
+ }
+ }
+
+ ++$currentTranslation;
+ if ( $progressReporter ) {
+ call_user_func(
+ $progressReporter,
+ $messageTitle,
+ $translationImportStatuses,
+ count( $messagesWithTranslations ),
+ $currentTranslation
+ );
+ }
+ }
+
+ if ( $failedStatuses ) {
+ foreach ( $failedStatuses as $failedStatus ) {
+ $importStatus->merge( $failedStatus );
+ }
+
+ $importStatus->setResult( false, $failedStatuses );
+ }
+
+ return $importStatus;
+ }
+
+ private function getLanguagesFromHeader( array $csvHeader ): Status {
+ if ( count( $csvHeader ) < 2 ) {
+ return Status::newFatal(
+ 'CSV has < 2 columns. Assuming that there are no languages to import'
+ );
+ }
+
+ $languageCodesInHeader = array_slice( $csvHeader, 2 );
+ if ( $languageCodesInHeader === [] ) {
+ return Status::newFatal( 'No languages found for import' );
+ }
+
+ $invalidLanguageCodes = [];
+ $indexedLanguageCodes = [];
+ // First two columns are message title and definition
+ $originalLanguageIndex = 2;
+ foreach ( $languageCodesInHeader as $languageCode ) {
+ if ( !TranslateUtils::isSupportedLanguageCode( strtolower( $languageCode ) ) ) {
+ $invalidLanguageCodes[] = $languageCode;
+ } else {
+ // Language codes maybe in upper case, convert to lower case for further use.
+ $indexedLanguageCodes[ strtolower( $languageCode ) ] = $originalLanguageIndex;
+ }
+ ++$originalLanguageIndex;
+ }
+
+ if ( $invalidLanguageCodes ) {
+ return Status::newFatal(
+ 'Invalid language codes detected in CSV header: ' . implode( ', ', $invalidLanguageCodes )
+ );
+ }
+
+ return Status::newGood( $indexedLanguageCodes );
+ }
+
+ private function getMessageHandleIfValid( string $messageTitle ): ?MessageHandle {
+ $title = Title::newFromText( $messageTitle );
+ if ( $title === null ) {
+ return null;
+ }
+
+ $handle = new MessageHandle( $title );
+ if ( $handle->isValid() ) {
+ return $handle;
+ }
+
+ return null;
+ }
+
+ private function isCsvRowEmpty( array $csvRow ): bool {
+ return count( $csvRow ) === 1 && ( $csvRow[0] === null || trim( $csvRow[0] ) === '' );
+ }
+}