-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
validator-core - refactor data normalization to support MultiTable (a…
…dd an AutoFeatureType factory), FileConverter : disable legacy FixGML (refs #231)
- Loading branch information
Showing
13 changed files
with
487 additions
and
150 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
175 changes: 175 additions & 0 deletions
175
validator-core/src/main/java/fr/ign/validator/normalize/DocumentNormalizer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,175 @@ | ||
package fr.ign.validator.normalize; | ||
|
||
import java.io.File; | ||
import java.io.IOException; | ||
import java.util.List; | ||
|
||
import org.apache.commons.io.FileUtils; | ||
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
import org.apache.logging.log4j.Marker; | ||
import org.apache.logging.log4j.MarkerManager; | ||
|
||
import fr.ign.validator.Context; | ||
import fr.ign.validator.data.Document; | ||
import fr.ign.validator.data.DocumentFile; | ||
import fr.ign.validator.model.FeatureType; | ||
import fr.ign.validator.model.FileModel; | ||
import fr.ign.validator.model.file.MetadataModel; | ||
import fr.ign.validator.model.file.MultiTableModel; | ||
import fr.ign.validator.model.file.PdfModel; | ||
import fr.ign.validator.model.file.TableModel; | ||
import fr.ign.validator.tools.AutoFeatureType; | ||
import fr.ign.validator.tools.MultiTableReader; | ||
|
||
/** | ||
* Creates DATA and METADATA directories in the validation directory : | ||
* | ||
* <ul> | ||
* <li>Tables are normalized according to FeatureType as | ||
* DATA/{fileModel.name}.csv</li> | ||
* <li>Tables are normalized according to FeatureType as | ||
* DATA/{fileModel.name}/{tableName}.csv</li> | ||
* <li>PDF are copied to DATA directory</li> | ||
* <li>Metadata are copied to METADATA directory</li> | ||
* <li>Directories are ignored</li> | ||
* </ul> | ||
* | ||
* Note that DATA and METADATA corresponds to the structure of an EaaS delivery | ||
* (former geoportal datastore). | ||
* | ||
* @author MBorne | ||
* | ||
*/ | ||
public class DocumentNormalizer { | ||
public static final Logger log = LogManager.getRootLogger(); | ||
public static final Marker MARKER = MarkerManager.getMarker("NormalizePostProcess"); | ||
|
||
/** | ||
* Normalize document files. | ||
* | ||
* @param context | ||
*/ | ||
public void normalize(Context context, Document document) throws IOException { | ||
log.info(MARKER, "Create normalized files in {} ...", context.getDataDirectory()); | ||
|
||
/* | ||
* Create a normalized CSV file for each FileModel. | ||
*/ | ||
List<FileModel> fileModels = document.getDocumentModel().getFileModels(); | ||
for (FileModel fileModel : fileModels) { | ||
// Retrieve document files corresponding to the FileModel | ||
List<DocumentFile> documentFiles = document.getDocumentFilesByModel(fileModel); | ||
|
||
if (fileModel instanceof TableModel) { | ||
normalizeTable(context, (TableModel) fileModel, documentFiles); | ||
} else if (fileModel instanceof MultiTableModel) { | ||
normalizeMultiTable(context, (MultiTableModel) fileModel, documentFiles); | ||
} else if (fileModel instanceof PdfModel) { | ||
createFlatCopyInTargetDirectory(fileModel, documentFiles, context.getDataDirectory()); | ||
} else if (fileModel instanceof MetadataModel) { | ||
createFlatCopyInTargetDirectory(fileModel, documentFiles, context.getMetadataDirectory()); | ||
} | ||
} | ||
|
||
log.info(MARKER, "Create normalized files in {} : completed.", context.getDataDirectory()); | ||
} | ||
|
||
/** | ||
* Convert documentFiles in a normalized DATA/{fileModel.name}.csv file. | ||
* | ||
* @param context | ||
* @param fileModel | ||
* @param documentFiles | ||
* @throws IOException | ||
*/ | ||
private void normalizeTable(Context context, TableModel fileModel, List<DocumentFile> documentFiles) | ||
throws IOException { | ||
FeatureType featureType = fileModel.getFeatureType(); | ||
if (featureType == null) { | ||
log.warn(MARKER, "Skip {} (no FeatureType provided)", fileModel.getName()); | ||
return; | ||
} | ||
|
||
File csvFile = new File(context.getDataDirectory(), fileModel.getName() + ".csv"); | ||
log.warn(MARKER, "Create {} (no FeatureType provided)", fileModel.getName()); | ||
TableNormalizer normalizer = new TableNormalizer(context, featureType, csvFile); | ||
for (DocumentFile documentFile : documentFiles) { | ||
log.info(MARKER, "Append {} to CSV file {}...", documentFile.getPath(), csvFile); | ||
normalizer.append(documentFile.getPath()); | ||
} | ||
normalizer.close(); | ||
} | ||
|
||
/** | ||
* Convert documentFiles in a normalized DATA/{fileModel.name}.{tableName}.csv | ||
* file. | ||
* | ||
* @param context | ||
* @param fileModel | ||
* @param documentFiles | ||
* @throws IOException | ||
*/ | ||
private void normalizeMultiTable(Context context, MultiTableModel fileModel, List<DocumentFile> documentFiles) | ||
throws IOException { | ||
if (documentFiles.isEmpty() && documentFiles.size() > 1) { | ||
log.warn( | ||
MARKER, "{} - skipped (found {} files, normalization not supported for MultiTable)", | ||
fileModel.getName(), | ||
documentFiles.size() | ||
); | ||
return; | ||
} | ||
DocumentFile documentFile = documentFiles.get(0); | ||
MultiTableReader reader = MultiTableReader.createMultiTableReader(documentFile.getPath()); | ||
for (String tableName : reader.getTableNames()) { | ||
/* | ||
* Retrieve source path for CSV converted table. | ||
*/ | ||
File sourceFile = reader.getTablePath(tableName); | ||
/* | ||
* Detected FeatureType from CSV. | ||
* | ||
* TODO allow user to provide featureTypes. | ||
*/ | ||
FeatureType featureType = AutoFeatureType.createFeatureTypeFromTable(sourceFile); | ||
|
||
/* | ||
* Prepare output directory for the FileModel DATA/{fileModel.name} | ||
*/ | ||
File outputDir = new File(context.getDataDirectory(), fileModel.getName()); | ||
if (!outputDir.exists()) { | ||
outputDir.mkdirs(); | ||
} | ||
/* | ||
* Create normalized CSV file. | ||
*/ | ||
File outputFile = new File(outputDir, tableName + ".csv"); | ||
TableNormalizer normalizer = new TableNormalizer(context, featureType, outputFile); | ||
normalizer.append(sourceFile); | ||
normalizer.close(); | ||
} | ||
} | ||
|
||
/** | ||
* Copy files to targetDirectory without original hierarchy. | ||
* | ||
* @param documentFiles | ||
* @param targetDirectory | ||
* @throws IOException | ||
*/ | ||
private void createFlatCopyInTargetDirectory( | ||
FileModel fileModel, | ||
List<DocumentFile> documentFiles, | ||
File targetDirectory) throws IOException { | ||
|
||
log.warn(MARKER, "{} - Copy {} files to {} ...", fileModel.getName(), fileModel.getType(), targetDirectory); | ||
for (DocumentFile documentFile : documentFiles) { | ||
File srcFile = documentFile.getPath(); | ||
File destFile = new File(targetDirectory, srcFile.getName()); | ||
log.info(MARKER, "Copy {} to {}...", srcFile, destFile); | ||
FileUtils.copyFile(srcFile, destFile); | ||
} | ||
} | ||
|
||
} |
Oops, something went wrong.