Skip to content

Commit

Permalink
Introduce 'delta-pull' option to the third party sync command (#10)
Browse files Browse the repository at this point in the history
Introduce a delta-pull option to the third party sync command, when enabled text unit imports will only occur for batches that contain updates. The implementation stores the checksum of a translated file pulled from a third party provider for a locale, on a subsequent sync compare the checksum of the current file with the stored checksum.

If checksums match no changes have occurred and we will exit processing early for that locale.
  • Loading branch information
maallen authored and aurambaj committed Sep 2, 2023
1 parent ce803c9 commit fc08864
Show file tree
Hide file tree
Showing 11 changed files with 607 additions and 47 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package com.box.l10n.mojito.entity;

import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.ForeignKey;
import javax.persistence.Index;
import javax.persistence.JoinColumn;
import javax.persistence.ManyToOne;
import javax.persistence.Table;

/** Entity that stores the checksum of a translated file downloaded via a third party sync. */
@Entity
@Table(
name = "third_party_sync_file_checksum",
indexes = {
@Index(
name = "I__TPS_FILE_CHECKSUM__REPO_ID__LOCALE_ID__FILE_NAME",
columnList = "repository_id, locale_id, file_name",
unique = true),
})
public class ThirdPartyFileChecksum extends AuditableEntity {

@ManyToOne(optional = false)
@JoinColumn(
name = "repository_id",
foreignKey = @ForeignKey(name = "FK__TPS_FILE_CHECKSUM__REPO__ID"))
private Repository repository;

@Column(name = "file_name")
private String fileName;

@ManyToOne(optional = false)
@JoinColumn(
name = "locale_id",
foreignKey = @ForeignKey(name = "FK__TPS_FILE_CHECKSUM__LOCALE__ID"))
private Locale locale;

@Column(name = "md5")
private String md5;

public ThirdPartyFileChecksum() {}

public ThirdPartyFileChecksum(Repository repository, String fileName, Locale locale, String md5) {
this.repository = repository;
this.fileName = fileName;
this.locale = locale;
this.md5 = md5;
}

public String getMd5() {
return md5;
}

public void setMd5(String checksum) {
this.md5 = checksum;
}

public Locale getLocale() {
return locale;
}

public Repository getRepository() {
return repository;
}

public String getFileName() {
return fileName;
}

public void setLocale(Locale locale) {
this.locale = locale;
}

public void setRepository(Repository repository) {
this.repository = repository;
}

public void setFileName(String fileName) {
this.fileName = fileName;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package com.box.l10n.mojito.service.thirdparty;

import com.box.l10n.mojito.entity.Locale;
import com.box.l10n.mojito.entity.Repository;
import com.box.l10n.mojito.entity.ThirdPartyFileChecksum;
import java.util.Optional;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.rest.core.annotation.RepositoryRestResource;

@RepositoryRestResource(exported = false)
public interface ThirdPartyFileChecksumRepository
extends JpaRepository<ThirdPartyFileChecksum, Long> {

Optional<ThirdPartyFileChecksum> findById(Long thirdPartyFileChecksumId);

Optional<ThirdPartyFileChecksum> findByRepositoryAndFileNameAndLocale(
Repository repository, String fileName, Locale locale);
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.box.l10n.mojito.service.thirdparty;

import static com.box.l10n.mojito.android.strings.AndroidPluralQuantity.MANY;
import static com.box.l10n.mojito.service.thirdparty.ThirdPartyTMSUtils.isFileEqualToPreviousRun;
import static com.box.l10n.mojito.service.thirdparty.smartling.SmartlingFileUtils.getOutputSourceFile;
import static com.box.l10n.mojito.service.thirdparty.smartling.SmartlingFileUtils.getOutputTargetFile;
import static com.box.l10n.mojito.service.thirdparty.smartling.SmartlingFileUtils.isPluralFile;
Expand Down Expand Up @@ -98,6 +99,8 @@ public class ThirdPartyTMSSmartling implements ThirdPartyTMS {

private final MeterRegistry meterRegistry;

private final ThirdPartyFileChecksumRepository thirdPartyFileChecksumRepository;

private final Set<String> supportedImageExtensions =
Sets.newHashSet("png", "jpg", "jpeg", "gif", "tiff");

Expand All @@ -115,7 +118,8 @@ public ThirdPartyTMSSmartling(
ThirdPartyTMSSmartlingWithJson thirdPartyTMSSmartlingWithJson,
ThirdPartyTMSSmartlingGlossary thirdPartyTMSSmartlingGlossary,
AssetTextUnitToTMTextUnitRepository assetTextUnitToTMTextUnitRepository,
MeterRegistry meterRegistry) {
MeterRegistry meterRegistry,
ThirdPartyFileChecksumRepository thirdPartyFileChecksumRepository) {
this(
smartlingClient,
textUnitSearcher,
Expand All @@ -126,7 +130,8 @@ public ThirdPartyTMSSmartling(
thirdPartyTMSSmartlingGlossary,
assetTextUnitToTMTextUnitRepository,
DEFAULT_BATCH_SIZE,
meterRegistry);
meterRegistry,
thirdPartyFileChecksumRepository);
}

public ThirdPartyTMSSmartling(
Expand All @@ -139,7 +144,8 @@ public ThirdPartyTMSSmartling(
ThirdPartyTMSSmartlingGlossary thirdPartyTMSSmartlingGlossary,
AssetTextUnitToTMTextUnitRepository assetTextUnitToTMTextUnitRepository,
int batchSize,
MeterRegistry meterRegistry) {
MeterRegistry meterRegistry,
ThirdPartyFileChecksumRepository thirdPartyFileChecksumRepository) {
this.smartlingClient = smartlingClient;
this.assetPathAndTextUnitNameKeys = assetPathAndTextUnitNameKeys;
this.textUnitBatchImporterService = textUnitBatchImporterService;
Expand All @@ -150,6 +156,7 @@ public ThirdPartyTMSSmartling(
this.thirdPartyTMSSmartlingGlossary = thirdPartyTMSSmartlingGlossary;
this.assetTextUnitToTMTextUnitRepository = assetTextUnitToTMTextUnitRepository;
this.meterRegistry = meterRegistry;
this.thirdPartyFileChecksumRepository = thirdPartyFileChecksumRepository;
}

@Override
Expand Down Expand Up @@ -529,14 +536,21 @@ public void pull(
String skipAssetsWithPathPattern,
List<String> optionList) {

SmartlingOptions options = SmartlingOptions.parseList(optionList);

meterRegistry
.timer("SmartlingSync.pull", Tags.of("repository", repository.getName()))
.timer(
"SmartlingSync.pull",
Tags.of(
"repository",
repository.getName(),
"deltaPull",
Boolean.toString(options.isDeltaPull())))
.record(
() -> {
SmartlingOptions options = SmartlingOptions.parseList(optionList);

if (options.isJsonSync()) {
thirdPartyTMSSmartlingWithJson.pull(repository, projectId, localeMapping);
thirdPartyTMSSmartlingWithJson.pull(
repository, projectId, localeMapping, options.isDeltaPull());
return;
}

Expand Down Expand Up @@ -602,7 +616,12 @@ private void processPullBatch(
.timer(
"SmartlingSync.processPullBatch",
Tags.of(
"repository", repository.getName(), "locale", locale.getLocale().getBcp47Tag()))
"repository",
repository.getName(),
"locale",
locale.getLocale().getBcp47Tag(),
"deltaPull",
Boolean.toString(options.isDeltaPull())))
.record(
() -> {
String localeTag = locale.getLocale().getBcp47Tag();
Expand Down Expand Up @@ -648,6 +667,23 @@ private void processPullBatch(
new SmartlingClientException(
"Error with download from Smartling, file content string is not present."));

if (options.isDeltaPull()
&& isFileEqualToPreviousRun(
thirdPartyFileChecksumRepository,
repository,
locale.getLocale(),
fileName,
fileContent,
meterRegistry)) {
logger.info(
"Checksum match for "
+ fileName
+ " in locale "
+ localeTag
+ ", skipping text unit import.");
return;
}

List<TextUnitDTO> textUnits;

try {
Expand Down Expand Up @@ -893,16 +929,17 @@ private Stream<List<TextUnitDTO>> partitionSingulars(
String localeTag,
String skipTextUnitsWithPattern,
String skipAssetsWithPathPattern) {
return partitionedStream(
TextUnitSearcherParameters parameters =
baseParams(
repositoryId,
localeTag,
skipTextUnitsWithPattern,
skipAssetsWithPathPattern,
true,
true,
null),
textUnitSearcher::search);
null);
parameters.setOrderByTextUnitID(true);
return partitionedStream(parameters, textUnitSearcher::search);
}

private Stream<List<TextUnitDTO>> partitionSingulars(
Expand All @@ -911,7 +948,7 @@ private Stream<List<TextUnitDTO>> partitionSingulars(
String skipTextUnitsWithPattern,
String skipAssetsWithPathPattern,
String includeTextUnitWithPattern) {
return partitionedStream(
TextUnitSearcherParameters parameters =
baseParams(
repositoryId,
localeTag,
Expand All @@ -920,25 +957,27 @@ private Stream<List<TextUnitDTO>> partitionSingulars(
true,
true,
null,
includeTextUnitWithPattern),
textUnitSearcher::search);
includeTextUnitWithPattern);
parameters.setOrderByTextUnitID(true);
return partitionedStream(parameters, textUnitSearcher::search);
}

private Stream<List<TextUnitDTO>> partitionPlurals(
Long repositoryId,
String localeTag,
String skipTextUnitsWithPattern,
String skipAssetsWithPathPattern) {
return partitionedStream(
TextUnitSearcherParameters parameters =
baseParams(
repositoryId,
localeTag,
skipTextUnitsWithPattern,
skipAssetsWithPathPattern,
false,
false,
"%"),
textUnitSearcher::search);
"%");
parameters.setOrderByTextUnitID(true);
return partitionedStream(parameters, textUnitSearcher::search);
}

private Stream<List<TextUnitDTO>> partitionPlurals(
Expand All @@ -961,7 +1000,7 @@ private Stream<List<TextUnitDTO>> partitionPlurals(
.collect(Collectors.toList()));
}

return partitionedStream(
TextUnitSearcherParameters parameters =
baseParams(
repositoryId,
localeTag,
Expand All @@ -970,8 +1009,11 @@ private Stream<List<TextUnitDTO>> partitionPlurals(
false,
false,
"%",
includeTextUnitsWithPattern),
searchFunction);
includeTextUnitsWithPattern);

parameters.setOrderByTextUnitID(true);

return partitionedStream(parameters, searchFunction);
}

private Stream<List<TextUnitDTO>> partitionedStream(
Expand Down Expand Up @@ -1037,7 +1079,6 @@ private TextUnitSearcherParameters baseParams(
result.setPluralFormsExcluded(pluralFormsExcluded);
result.setSkipTextUnitWithPattern(skipTextUnitsWithPattern);
result.setSkipAssetPathWithPattern(skipAssetsWithPathPattern);

if (!Strings.isNullOrEmpty(pluralFormOther)) {
result.setPluralFormOther(pluralFormOther);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.box.l10n.mojito.service.thirdparty;

import static com.box.l10n.mojito.service.thirdparty.ThirdPartyTMSUtils.isFileEqualToPreviousRun;
import static com.box.l10n.mojito.service.thirdparty.smartling.SmartlingFileUtils.isPluralFile;

import com.box.l10n.mojito.entity.Repository;
Expand Down Expand Up @@ -58,6 +59,8 @@ public class ThirdPartyTMSSmartlingWithJson {

MeterRegistry meterRegistry;

ThirdPartyFileChecksumRepository thirdPartyFileChecksumRepository;

int batchSize = 5000;

public ThirdPartyTMSSmartlingWithJson(
Expand All @@ -66,13 +69,15 @@ public ThirdPartyTMSSmartlingWithJson(
TextUnitSearcher textUnitSearcher,
TextUnitBatchImporterService textUnitBatchImporterService,
SmartlingJsonKeys smartlingJsonKeys,
MeterRegistry meterRegistry) {
MeterRegistry meterRegistry,
ThirdPartyFileChecksumRepository thirdPartyFileChecksumRepository) {
this.smartlingClient = smartlingClient;
this.smartlingJsonConverter = smartlingJsonConverter;
this.textUnitSearcher = textUnitSearcher;
this.textUnitBatchImporterService = textUnitBatchImporterService;
this.smartlingJsonKeys = smartlingJsonKeys;
this.meterRegistry = meterRegistry;
this.thirdPartyFileChecksumRepository = thirdPartyFileChecksumRepository;
}

void push(
Expand Down Expand Up @@ -187,7 +192,11 @@ void removeFileForBatchNumberGreaterOrEquals(
.block());
}

void pull(Repository repository, String projectId, Map<String, String> localeMapping) {
void pull(
Repository repository,
String projectId,
Map<String, String> localeMapping,
boolean isDeltaPull) {

List<File> repositoryFilesFromProject = getRepositoryFilesFromProject(repository, projectId);

Expand All @@ -201,6 +210,23 @@ void pull(Repository repository, String projectId, Map<String, String> localeMap
String localizedFileContent =
getLocalizedFileContent(projectId, file, smartlingLocale, false);

if (isDeltaPull
&& isFileEqualToPreviousRun(
thirdPartyFileChecksumRepository,
repository,
repositoryLocale.getLocale(),
file.getFileUri(),
localizedFileContent,
meterRegistry)) {
logger.info(
"Checksum match for "
+ file.getFileUri()
+ " in locale "
+ repositoryLocale.getLocale().getBcp47Tag()
+ ", skipping text unit import.");
return;
}

ImmutableList<TextUnitDTO> textUnitDTOS =
smartlingJsonConverter.jsonStringToTextUnitDTOs(
localizedFileContent, TextUnitDTO::setTarget);
Expand Down Expand Up @@ -339,6 +365,7 @@ PageFetcherOffsetAndLimitSplitIterator<TextUnitDTO> getSourceTextUnitIterator(
parameters.setOffset(offset);
parameters.setLimit(limit);
parameters.setPluralFormsFiltered(true);
parameters.setOrderByTextUnitID(true);
List<TextUnitDTO> search = textUnitSearcher.search(parameters);
return search;
},
Expand Down
Loading

0 comments on commit fc08864

Please sign in to comment.