diff --git a/validator-core/src/main/java/fr/ign/validator/Context.java b/validator-core/src/main/java/fr/ign/validator/Context.java index 5f362d01..a4b080c1 100644 --- a/validator-core/src/main/java/fr/ign/validator/Context.java +++ b/validator-core/src/main/java/fr/ign/validator/Context.java @@ -631,7 +631,11 @@ public void setOutputProjection(Projection outputProjection) { * @return */ public File getDataDirectory() { - return new File(validationDirectory, getCurrentDirectory().getName() + "/DATA"); + File result = new File(validationDirectory, getCurrentDirectory().getName() + "/DATA"); + if (!result.exists()) { + result.mkdirs(); + } + return result; } /** @@ -640,7 +644,11 @@ public File getDataDirectory() { * @return */ public File getMetadataDirectory() { - return new File(validationDirectory, getCurrentDirectory().getName() + "/METADATA"); + File result = new File(validationDirectory, getCurrentDirectory().getName() + "/METADATA"); + if (!result.exists()) { + result.mkdirs(); + } + return result; } /** diff --git a/validator-core/src/main/java/fr/ign/validator/info/DocumentInfoExtractor.java b/validator-core/src/main/java/fr/ign/validator/info/DocumentInfoExtractor.java index e6aee5d4..3beed4e2 100644 --- a/validator-core/src/main/java/fr/ign/validator/info/DocumentInfoExtractor.java +++ b/validator-core/src/main/java/fr/ign/validator/info/DocumentInfoExtractor.java @@ -3,6 +3,7 @@ import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; +import java.util.Collection; import java.util.List; import org.apache.logging.log4j.LogManager; @@ -25,8 +26,10 @@ import fr.ign.validator.metadata.gmd.MetadataISO19115; import fr.ign.validator.model.FileModel; import fr.ign.validator.model.file.MetadataModel; +import fr.ign.validator.model.file.MultiTableModel; import fr.ign.validator.model.file.TableModel; import fr.ign.validator.tools.EnvelopeUtils; +import fr.ign.validator.tools.FileUtils; import fr.ign.validator.tools.TableReader; /** @@ -41,6 +44,17 @@ public class DocumentInfoExtractor { public static final Logger log = LogManager.getRootLogger(); public static final Marker MARKER = MarkerManager.getMarker("DocumentInfoExtractor"); + /** + * Stats about a given table. + * + * @author mborne + * + */ + private class TableStats { + Envelope boundingBox = new Envelope(); + int totalFeatures = 0; + } + /** * Gets informations on directory * @@ -82,46 +96,94 @@ private void parseDocumentFiles(Context context, Document document, DocumentInfo documentFileInfo.setName(documentFile.getPath().getName()); documentFileInfo.setPath(context.relativize(documentFile.getPath())); if (fileModel instanceof TableModel) { - parseTable(context, fileModel, documentFileInfo); + parseTable(context, (TableModel) fileModel, documentFileInfo); + } else if (fileModel instanceof MultiTableModel) { + parseTables(context, (MultiTableModel) fileModel, documentFileInfo); } documentInfo.addFile(documentFileInfo); } } /** - * Retreive boundingBox and featureCount from normalized file + * Retrieve boundingBox and featureCount from normalized file * * @param context * @param fileModel * @param documentFileInfo */ - private void parseTable(Context context, FileModel fileModel, DocumentFileInfo documentFileInfo) { + private void parseTable(Context context, TableModel fileModel, DocumentFileInfo documentFileInfo) { File csvFile = new File(context.getDataDirectory(), fileModel.getName() + ".csv"); + TableStats stats = getTableStatsFromNormalizedCSV(csvFile); + if (stats != null) { + documentFileInfo.setTotalFeatures(stats.totalFeatures); + documentFileInfo.setBoundingBox(stats.boundingBox); + } + } - Envelope boundingBox = new Envelope(); - int totalFeatures = 0; + /** + * Retrieve boundingBox and featureCount from normalized file + * + * @param context + * @param fileModel + * @param documentFileInfo + */ + private void parseTables(Context context, MultiTableModel fileModel, DocumentFileInfo documentFileInfo) { + File csvDirectory = new File(context.getDataDirectory(), fileModel.getName()); + + // stats for all tables + TableStats result = new TableStats(); + + String[] extensions = { + "csv" + }; + Collection csvFiles = FileUtils.listFilesAndDirs(csvDirectory, extensions); + if (csvFiles.isEmpty()) { + log.warn(MARKER, "normalized CSV files for {} not found", fileModel.getName()); + return; + } + + for (File csvFile : csvFiles) { + TableStats tableStats = getTableStatsFromNormalizedCSV(csvFile); + if (tableStats == null) { + continue; + } + // TODO save stats for each table in multi_table. + result.totalFeatures += tableStats.totalFeatures; + result.boundingBox.expandToInclude(tableStats.boundingBox); + } + + documentFileInfo.setTotalFeatures(result.totalFeatures); + documentFileInfo.setBoundingBox(result.boundingBox); + } + + /** + * Get {@link TableStats} from a normalized CSV file. + * + * @param csvFile + * @return + */ + private TableStats getTableStatsFromNormalizedCSV(File csvFile) { + TableStats result = new TableStats(); try { TableReader reader = TableReader.createTableReader(csvFile, StandardCharsets.UTF_8); - // retreive geometry column + int indexWktColumn = reader.findColumn("WKT"); while (reader.hasNext()) { String[] row = reader.next(); // count features - totalFeatures++; + result.totalFeatures++; // compute bounding box if (indexWktColumn >= 0) { String wkt = row[indexWktColumn]; - boundingBox.expandToInclude(EnvelopeUtils.getEnvelope(wkt)); + result.boundingBox.expandToInclude(EnvelopeUtils.getEnvelope(wkt)); } } } catch (IOException e) { - log.error(MARKER, "Fail to extract infos from " + fileModel.getName() + ".csv"); - return; + log.error(MARKER, "fail to compute stats for {}", csvFile); + return null; } - - documentFileInfo.setTotalFeatures(totalFeatures); - documentFileInfo.setBoundingBox(boundingBox); + return result; } /** diff --git a/validator-core/src/main/java/fr/ign/validator/normalize/DocumentNormalizer.java b/validator-core/src/main/java/fr/ign/validator/normalize/DocumentNormalizer.java new file mode 100644 index 00000000..df6d796f --- /dev/null +++ b/validator-core/src/main/java/fr/ign/validator/normalize/DocumentNormalizer.java @@ -0,0 +1,175 @@ +package fr.ign.validator.normalize; + +import java.io.File; +import java.io.IOException; +import java.util.List; + +import org.apache.commons.io.FileUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.Marker; +import org.apache.logging.log4j.MarkerManager; + +import fr.ign.validator.Context; +import fr.ign.validator.data.Document; +import fr.ign.validator.data.DocumentFile; +import fr.ign.validator.model.FeatureType; +import fr.ign.validator.model.FileModel; +import fr.ign.validator.model.file.MetadataModel; +import fr.ign.validator.model.file.MultiTableModel; +import fr.ign.validator.model.file.PdfModel; +import fr.ign.validator.model.file.TableModel; +import fr.ign.validator.tools.AutoFeatureType; +import fr.ign.validator.tools.MultiTableReader; + +/** + * Creates DATA and METADATA directories in the validation directory : + * + * + * + * Note that DATA and METADATA corresponds to the structure of an EaaS delivery + * (former geoportal datastore). + * + * @author MBorne + * + */ +public class DocumentNormalizer { + public static final Logger log = LogManager.getRootLogger(); + public static final Marker MARKER = MarkerManager.getMarker("NormalizePostProcess"); + + /** + * Normalize document files. + * + * @param context + */ + public void normalize(Context context, Document document) throws IOException { + log.info(MARKER, "Create normalized files in {} ...", context.getDataDirectory()); + + /* + * Create a normalized CSV file for each FileModel. + */ + List fileModels = document.getDocumentModel().getFileModels(); + for (FileModel fileModel : fileModels) { + // Retrieve document files corresponding to the FileModel + List documentFiles = document.getDocumentFilesByModel(fileModel); + + if (fileModel instanceof TableModel) { + normalizeTable(context, (TableModel) fileModel, documentFiles); + } else if (fileModel instanceof MultiTableModel) { + normalizeMultiTable(context, (MultiTableModel) fileModel, documentFiles); + } else if (fileModel instanceof PdfModel) { + createFlatCopyInTargetDirectory(fileModel, documentFiles, context.getDataDirectory()); + } else if (fileModel instanceof MetadataModel) { + createFlatCopyInTargetDirectory(fileModel, documentFiles, context.getMetadataDirectory()); + } + } + + log.info(MARKER, "Create normalized files in {} : completed.", context.getDataDirectory()); + } + + /** + * Convert documentFiles in a normalized DATA/{fileModel.name}.csv file. + * + * @param context + * @param fileModel + * @param documentFiles + * @throws IOException + */ + private void normalizeTable(Context context, TableModel fileModel, List documentFiles) + throws IOException { + FeatureType featureType = fileModel.getFeatureType(); + if (featureType == null) { + log.warn(MARKER, "Skip {} (no FeatureType provided)", fileModel.getName()); + return; + } + + File csvFile = new File(context.getDataDirectory(), fileModel.getName() + ".csv"); + log.warn(MARKER, "Create {} (no FeatureType provided)", fileModel.getName()); + TableNormalizer normalizer = new TableNormalizer(context, featureType, csvFile); + for (DocumentFile documentFile : documentFiles) { + log.info(MARKER, "Append {} to CSV file {}...", documentFile.getPath(), csvFile); + normalizer.append(documentFile.getPath()); + } + normalizer.close(); + } + + /** + * Convert documentFiles in a normalized DATA/{fileModel.name}.{tableName}.csv + * file. + * + * @param context + * @param fileModel + * @param documentFiles + * @throws IOException + */ + private void normalizeMultiTable(Context context, MultiTableModel fileModel, List documentFiles) + throws IOException { + if (documentFiles.isEmpty() && documentFiles.size() > 1) { + log.warn( + MARKER, "{} - skipped (found {} files, normalization not supported for MultiTable)", + fileModel.getName(), + documentFiles.size() + ); + return; + } + DocumentFile documentFile = documentFiles.get(0); + MultiTableReader reader = MultiTableReader.createMultiTableReader(documentFile.getPath()); + for (String tableName : reader.getTableNames()) { + /* + * Retrieve source path for CSV converted table. + */ + File sourceFile = reader.getTablePath(tableName); + /* + * Detected FeatureType from CSV. + * + * TODO allow user to provide featureTypes. + */ + FeatureType featureType = AutoFeatureType.createFeatureTypeFromTable(sourceFile); + + /* + * Prepare output directory for the FileModel DATA/{fileModel.name} + */ + File outputDir = new File(context.getDataDirectory(), fileModel.getName()); + if (!outputDir.exists()) { + outputDir.mkdirs(); + } + /* + * Create normalized CSV file. + */ + File outputFile = new File(outputDir, tableName + ".csv"); + TableNormalizer normalizer = new TableNormalizer(context, featureType, outputFile); + normalizer.append(sourceFile); + normalizer.close(); + } + } + + /** + * Copy files to targetDirectory without original hierarchy. + * + * @param documentFiles + * @param targetDirectory + * @throws IOException + */ + private void createFlatCopyInTargetDirectory( + FileModel fileModel, + List documentFiles, + File targetDirectory) throws IOException { + + log.warn(MARKER, "{} - Copy {} files to {} ...", fileModel.getName(), fileModel.getType(), targetDirectory); + for (DocumentFile documentFile : documentFiles) { + File srcFile = documentFile.getPath(); + File destFile = new File(targetDirectory, srcFile.getName()); + log.info(MARKER, "Copy {} to {}...", srcFile, destFile); + FileUtils.copyFile(srcFile, destFile); + } + } + +} diff --git a/validator-core/src/main/java/fr/ign/validator/normalize/CSVNormalizer.java b/validator-core/src/main/java/fr/ign/validator/normalize/TableNormalizer.java similarity index 78% rename from validator-core/src/main/java/fr/ign/validator/normalize/CSVNormalizer.java rename to validator-core/src/main/java/fr/ign/validator/normalize/TableNormalizer.java index eb650072..3cf819c3 100644 --- a/validator-core/src/main/java/fr/ign/validator/normalize/CSVNormalizer.java +++ b/validator-core/src/main/java/fr/ign/validator/normalize/TableNormalizer.java @@ -26,12 +26,12 @@ /** * - * Normalize CSV file according to FeatureType + * Normalize table file producing a CSV according to FeatureType columns. * * @author MBorne * */ -public class CSVNormalizer implements Closeable { +public class TableNormalizer implements Closeable { public static final Logger log = LogManager.getRootLogger(); public static final Marker MARKER = MarkerManager.getMarker("CSVNormalizer"); @@ -55,7 +55,15 @@ public class CSVNormalizer implements Closeable { */ private CSVPrinter printer; - public CSVNormalizer(Context context, FeatureType featureType, File targetFile) throws IOException { + /** + * Create table normalizer with a given FeatureType. + * + * @param context + * @param featureType + * @param targetFile + * @throws IOException + */ + public TableNormalizer(Context context, FeatureType featureType, File targetFile) throws IOException { this.context = context; this.featureType = featureType; @@ -91,9 +99,10 @@ public CSVNormalizer(Context context, FeatureType featureType, File targetFile) * Append rows corresponding to a document file * * @param documentFile + * @throws IOException * @throws Exception */ - public void append(File csvFile) throws Exception { + public void append(File csvFile) throws IOException { TableReader reader = TableReader.createTableReader( csvFile, context.getEncoding() @@ -111,7 +120,9 @@ public void append(File csvFile) throws Exception { if (position < 0) { continue; } - // binding + /* + * bind value to the expected type + */ AttributeType attribute = featureType.getAttribute(position); Object bindedValue = null; try { @@ -121,12 +132,15 @@ public void append(File csvFile) throws Exception { } } catch (IllegalArgumentException e) { log.warn( - MARKER, "{}.{} : {} transformé en valeur nulle (type non valide)", inputRow[i], - featureType.getName(), attribute.getName() + MARKER, "{}.{} : {} converted to null (bad type).", + inputRow[i], + featureType.getName(), + attribute.getName() ); } - // formatting + // format binded value to get normalized output. String outputValue = attribute.formatObject(bindedValue); + // apply string fixer to remove bad chars. outputValue = context.getStringFixer().transform(outputValue); outputRow[position] = outputValue; } @@ -135,8 +149,12 @@ public void append(File csvFile) throws Exception { } @Override - public void close() throws IOException { - printer.close(); + public void close() { + try { + printer.close(); + } catch (IOException e) { + throw new RuntimeException("fail to close CSV printer", e); + } } } diff --git a/validator-core/src/main/java/fr/ign/validator/process/NormalizePostProcess.java b/validator-core/src/main/java/fr/ign/validator/process/NormalizePostProcess.java index 5d453b66..abb7d4df 100644 --- a/validator-core/src/main/java/fr/ign/validator/process/NormalizePostProcess.java +++ b/validator-core/src/main/java/fr/ign/validator/process/NormalizePostProcess.java @@ -1,9 +1,5 @@ package fr.ign.validator.process; -import java.io.File; -import java.util.List; - -import org.apache.commons.io.FileUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Marker; @@ -12,25 +8,11 @@ import fr.ign.validator.Context; import fr.ign.validator.ValidatorListener; import fr.ign.validator.data.Document; -import fr.ign.validator.data.DocumentFile; -import fr.ign.validator.model.FeatureType; -import fr.ign.validator.model.FileModel; -import fr.ign.validator.model.file.MetadataModel; -import fr.ign.validator.model.file.PdfModel; -import fr.ign.validator.model.file.TableModel; -import fr.ign.validator.normalize.CSVNormalizer; +import fr.ign.validator.normalize.DocumentNormalizer; /** - * - * Creates DATA and METADATA directories in the validation directory : - * - *
    - *
  • Tables are normalized as csv files according to corresponding FeatureType - * in DATA directory
  • - *
  • PDF are copied to DATA directory
  • - *
  • Metadata are copied to METADATA directory
  • - *
  • Directories are ignored
  • - *
+ * Invoke {@link DocumentNormalizer} to normalize input data in validation + * directory. * * @author MBorne * @@ -41,12 +23,12 @@ public class NormalizePostProcess implements ValidatorListener { @Override public void beforeMatching(Context context, Document document) throws Exception { - + // nothing to do } @Override public void beforeValidate(Context context, Document document) throws Exception { - + // nothing to do } @Override @@ -55,61 +37,10 @@ public void afterValidate(Context context, Document document) throws Exception { log.info(MARKER, "Skipped as normalize is disabled (use --normalize)"); return; } - /* - * Creating DATA directory - */ - File dataDirectory = context.getDataDirectory(); - if (!dataDirectory.exists()) { - dataDirectory.mkdirs(); - } - - /* - * Creating METADATA directory - */ - File metadataDirectory = context.getMetadataDirectory(); - if (!metadataDirectory.exists()) { - metadataDirectory.mkdirs(); - } - - log.info(MARKER, "Create normalized files in {}", dataDirectory); - - /* - * Create a normalized CSV file for each FileModel. - */ - List fileModels = document.getDocumentModel().getFileModels(); - for (FileModel fileModel : fileModels) { - if (fileModel instanceof TableModel) { - FeatureType featureType = fileModel.getFeatureType(); - if (featureType == null) { - continue; - } - File csvFile = new File(dataDirectory, fileModel.getName() + ".csv"); - CSVNormalizer normalizer = new CSVNormalizer(context, featureType, csvFile); - List documentFiles = document.getDocumentFilesByModel(fileModel); - for (DocumentFile documentFile : documentFiles) { - log.info(MARKER, "Append {} to CSV file {}...", documentFile.getPath(), csvFile); - normalizer.append(documentFile.getPath()); - } - normalizer.close(); - } else if (fileModel instanceof PdfModel) { - List documentFiles = document.getDocumentFilesByModel(fileModel); - for (DocumentFile documentFile : documentFiles) { - File srcFile = documentFile.getPath(); - File destFile = new File(dataDirectory, srcFile.getName()); - log.info(MARKER, "Copy {} to {}...", srcFile, destFile); - FileUtils.copyFile(srcFile, destFile); - } - } else if (fileModel instanceof MetadataModel) { - List documentFiles = document.getDocumentFilesByModel(fileModel); - for (DocumentFile documentFile : documentFiles) { - File srcFile = documentFile.getPath(); - File destFile = new File(metadataDirectory, srcFile.getName()); - log.info(MARKER, "Copy {} to {}...", srcFile, destFile); - FileUtils.copyFile(srcFile, destFile); - } - } - } - + log.info(MARKER, "Normalize input data..."); + DocumentNormalizer normalizer = new DocumentNormalizer(); + normalizer.normalize(context, document); + log.info(MARKER, "Normalize input data : completed..."); } } diff --git a/validator-core/src/main/java/fr/ign/validator/process/RemovePreviousFilesPreProcess.java b/validator-core/src/main/java/fr/ign/validator/process/RemovePreviousFilesPreProcess.java index 8284f33f..22a25fa7 100644 --- a/validator-core/src/main/java/fr/ign/validator/process/RemovePreviousFilesPreProcess.java +++ b/validator-core/src/main/java/fr/ign/validator/process/RemovePreviousFilesPreProcess.java @@ -3,7 +3,6 @@ import java.io.File; import java.util.Collection; -import org.apache.commons.io.FileUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Marker; @@ -12,6 +11,8 @@ import fr.ign.validator.Context; import fr.ign.validator.ValidatorListener; import fr.ign.validator.data.Document; +import fr.ign.validator.tools.FileUtils; +import fr.ign.validator.tools.MultiTableReader; import fr.ign.validator.tools.TableReader; /** @@ -26,27 +27,61 @@ public class RemovePreviousFilesPreProcess implements ValidatorListener { public static final Logger log = LogManager.getRootLogger(); public static final Marker MARKER = MarkerManager.getMarker("RemovePreviousFilesPreProcess"); + /** + * Run process on document directory. + * + * @param documentPath + */ + public void run(File documentPath) { + log.info(MARKER, "Remove temp files from {} ...", documentPath); + + Collection files = FileUtils.listFilesAndDirs(documentPath, null); + for (File file : files) { + if (isMultiTableReaderFile(file)) { + log.info(MARKER, "Remove MultiTableReader directory {}...", file); + org.apache.commons.io.FileUtils.deleteQuietly(file); + } else if (isTableReaderFile(file)) { + log.info(MARKER, "Remove TableReader file {}...", file); + org.apache.commons.io.FileUtils.deleteQuietly(file); + } + } + + log.info(MARKER, "Remove temp files from {} : completed.", documentPath); + } + + /** + * Is generated by {@link MultiTableReader}? + * + * @param file + * @return + */ + private boolean isMultiTableReaderFile(File file) { + return file.isDirectory() && file.getName().endsWith(MultiTableReader.TMP_EXTENSION); + } + + /** + * Is generated by {@link TableReader}? + * + * @param file + * @return + */ + private boolean isTableReaderFile(File file) { + return (!file.isDirectory()) && file.getName().endsWith(TableReader.TMP_EXTENSION); + } + @Override public void beforeMatching(Context context, Document document) throws Exception { - log.info(MARKER, "Remove files from previous execution..."); - String[] extensions = new String[] { - TableReader.TMP_EXTENSION - }; - Collection tempFiles = FileUtils.listFiles(document.getDocumentPath(), extensions, true); - for (File tempFile : tempFiles) { - log.info(MARKER, "Remove file {}...", tempFile.getAbsolutePath()); - tempFile.delete(); - } + run(document.getDocumentPath()); } @Override public void beforeValidate(Context context, Document document) throws Exception { - + // nothing to do } @Override public void afterValidate(Context context, Document document) throws Exception { - + // nothing to do } } diff --git a/validator-core/src/main/java/fr/ign/validator/tools/AutoFeatureType.java b/validator-core/src/main/java/fr/ign/validator/tools/AutoFeatureType.java new file mode 100644 index 00000000..05b3ced6 --- /dev/null +++ b/validator-core/src/main/java/fr/ign/validator/tools/AutoFeatureType.java @@ -0,0 +1,53 @@ +package fr.ign.validator.tools; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; + +import org.apache.commons.io.FilenameUtils; + +import fr.ign.validator.model.AttributeType; +import fr.ign.validator.model.FeatureType; +import fr.ign.validator.model.type.GeometryType; +import fr.ign.validator.model.type.StringType; + +/** + * Dirty way to create minimal {@link FeatureType} reading data from table in + * order to allow geometry validation when no model is provided for data. + * + * @author MBorne + * + */ +public class AutoFeatureType { + + private AutoFeatureType() { + // helper class grouping static helpers + } + + /** + * Create a minimal {@link FeatureType} reading data from a CSV generated by + * ogr2ogr : + *
    + *
  • WKT column is defined as {@link GeometryType}
  • + *
  • other fields are defined as {@link StringType}
  • + *
+ * + * @param path + * @return + * @throws IOException + */ + public static FeatureType createFeatureTypeFromTable(File path) throws IOException { + FeatureType result = new FeatureType(); + result.setName(FilenameUtils.getBaseName(path.getName())); + + TableReader reader = TableReader.createTableReader(path, StandardCharsets.UTF_8); + for (String attributeName : reader.getHeader()) { + AttributeType attribute = attributeName.equalsIgnoreCase("WKT") ? new GeometryType() : new StringType(); + attribute.setName(attributeName); + result.addAttribute(attribute); + } + + return result; + } + +} diff --git a/validator-core/src/main/java/fr/ign/validator/tools/FileConverter.java b/validator-core/src/main/java/fr/ign/validator/tools/FileConverter.java index 87453611..d93b325b 100644 --- a/validator-core/src/main/java/fr/ign/validator/tools/FileConverter.java +++ b/validator-core/src/main/java/fr/ign/validator/tools/FileConverter.java @@ -20,7 +20,6 @@ import org.apache.logging.log4j.Marker; import org.apache.logging.log4j.MarkerManager; -import fr.ign.validator.tools.internal.FixGML; import fr.ign.validator.tools.ogr.OgrVersion; /** @@ -83,7 +82,7 @@ public OgrVersion getVersion() { /** * Convert a source file with a given sourceCharset to an UTF-8 encoded CSV - * target + * target. * * @param source * @param target @@ -95,30 +94,33 @@ public void convertToCSV(File source, File target, Charset sourceCharset) throws if (target.exists()) { target.delete(); } - String sourceExtension = FilenameUtils.getExtension(source.getName()).toLowerCase(); + /* - * patch on GML files + * Prepare command arguments. */ - if (sourceExtension.equals("gml")) { - fixGML(source); - } + String[] args = getArguments(source, target, DRIVER_CSV); + Map envs = new HashMap<>(); + /* - * Removing cpg + * Remove CPG files as they may contains non portable values such as system. */ CompanionFileUtils.removeCompanionFile(source, "cpg"); CompanionFileUtils.removeCompanionFile(source, "CPG"); - String[] args = getArguments(source, target, DRIVER_CSV); - Map envs = new HashMap<>(); - + /* + * Configure charset for shapefiles + */ + String sourceExtension = FilenameUtils.getExtension(source.getName()).toLowerCase(); if (sourceExtension.equals("dbf") || sourceExtension.equals("shp")) { envs.put("SHAPE_ENCODING", toEncoding(sourceCharset)); } runCommand(args, envs); + /* - * Controls that output file is created + * Ensure that output file is created */ if (!target.exists()) { + // TODO throw IOException instead. log.error(MARKER, "Impossible de créer le fichier de sortie {}", target.getName()); createFalseCSV(target); } @@ -132,12 +134,9 @@ public void convertToCSV(File source, File target, Charset sourceCharset) throws */ public void convertToShapefile(File source, File target) throws IOException { log.info(MARKER, "{} => {} (gdal {})...", source, target, version); - if (FilenameUtils.getExtension(source.getName()).toLowerCase().equals("gml")) { - fixGML(source); - } String[] args = getArguments(source, target, DRIVER_SHAPEFILE); - Map envs = new HashMap(); + Map envs = new HashMap<>(); envs.put("SHAPE_ENCODING", ENCODING_LATIN1); runCommand(args, envs); /* @@ -254,7 +253,7 @@ private void createFalseCSV(File target) throws IOException { * @return */ private String[] getArguments(File source, File target, String driver) { - List arguments = new ArrayList(); + List arguments = new ArrayList<>(); arguments.add(ogr2ogrPath); // Otherwise, some ogr2ogr versions transforms 01 to 1... @@ -381,16 +380,4 @@ private String commandToString(String[] args) { return message; } - /** - * ogr2ogr ignores self-closing tags. They are changed to empty tags - * - * @param source - * @throws IOException - */ - private void fixGML(File source) throws IOException { - File backupedFile = new File(source.getPath() + ".backup"); - source.renameTo(backupedFile); - FixGML.replaceAutoclosedByEmpty(backupedFile, source); - } - } diff --git a/validator-core/src/main/java/fr/ign/validator/tools/MultiTableReader.java b/validator-core/src/main/java/fr/ign/validator/tools/MultiTableReader.java index c3522f44..7bf9dd3a 100644 --- a/validator-core/src/main/java/fr/ign/validator/tools/MultiTableReader.java +++ b/validator-core/src/main/java/fr/ign/validator/tools/MultiTableReader.java @@ -36,12 +36,16 @@ public class MultiTableReader { private File csvDirectory; /** + * Create a {@link MultiTableReader} with a source gmlPath and a csvDirectory + * produced by an ogr2ogr conversion. * + * @param gmlPath + * @param csvDirectory */ - private MultiTableReader(File path, File csvDirectory) { + private MultiTableReader(File gmlPath, File csvDirectory) { assert csvDirectory.exists(); if (!csvDirectory.isDirectory()) { - throw new RuntimeException("fail to read " + path + "(" + csvDirectory + " is not a directory)"); + throw new RuntimeException("fail to read " + gmlPath + "(" + csvDirectory + " is not a directory)"); } this.csvDirectory = csvDirectory; } @@ -71,10 +75,20 @@ public List getTableNames() { * @throws IOException */ public TableReader getTableReader(String tableName) throws IOException { - File tablePath = new File(csvDirectory, tableName + ".csv"); + File tablePath = getTablePath(tableName); return new TableReader(tablePath, StandardCharsets.UTF_8); } + /** + * Get path to the table converted to CSV. + * + * @param tableName + * @return + */ + public File getTablePath(String tableName) { + return new File(csvDirectory, tableName + ".csv"); + } + /** * Create a multiple table reader converting * diff --git a/validator-core/src/main/java/fr/ign/validator/tools/internal/FixGML.java b/validator-core/src/main/java/fr/ign/validator/tools/internal/FixGML.java index 271b7c72..c2656956 100644 --- a/validator-core/src/main/java/fr/ign/validator/tools/internal/FixGML.java +++ b/validator-core/src/main/java/fr/ign/validator/tools/internal/FixGML.java @@ -18,6 +18,7 @@ * @author MBorne * */ +@Deprecated public class FixGML { /** diff --git a/validator-core/src/test/java/fr/ign/validator/normalize/CSVNormalizerTest.java b/validator-core/src/test/java/fr/ign/validator/normalize/TableNormalizerTest.java similarity index 95% rename from validator-core/src/test/java/fr/ign/validator/normalize/CSVNormalizerTest.java rename to validator-core/src/test/java/fr/ign/validator/normalize/TableNormalizerTest.java index 17311470..94cbbc8f 100644 --- a/validator-core/src/test/java/fr/ign/validator/normalize/CSVNormalizerTest.java +++ b/validator-core/src/test/java/fr/ign/validator/normalize/TableNormalizerTest.java @@ -20,7 +20,7 @@ import fr.ign.validator.report.InMemoryReportBuilder; import fr.ign.validator.tools.ResourceHelper; -public class CSVNormalizerTest { +public class TableNormalizerTest { @Rule public TemporaryFolder folder = new TemporaryFolder(); @@ -59,7 +59,7 @@ public void testNormalise() throws Exception { Assert.assertNotNull(fileModel); File targetFile = new File(document.getDocumentPath(), "adresse_normalized.csv"); - CSVNormalizer csvNormalizer = new CSVNormalizer(context, fileModel.getFeatureType(), targetFile); + TableNormalizer csvNormalizer = new TableNormalizer(context, fileModel.getFeatureType(), targetFile); File csvFile1 = new File(document.getDocumentPath(), "adresse_1.csv"); Assert.assertTrue(csvFile1.exists()); diff --git a/validator-core/src/test/java/fr/ign/validator/tools/AutoFeatureTypeTest.java b/validator-core/src/test/java/fr/ign/validator/tools/AutoFeatureTypeTest.java new file mode 100644 index 00000000..7c1044be --- /dev/null +++ b/validator-core/src/test/java/fr/ign/validator/tools/AutoFeatureTypeTest.java @@ -0,0 +1,53 @@ +package fr.ign.validator.tools; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.IOException; + +import org.junit.Test; + +import fr.ign.validator.model.AttributeType; +import fr.ign.validator.model.FeatureType; +import fr.ign.validator.model.type.GeometryType; +import fr.ign.validator.model.type.StringType; + +public class AutoFeatureTypeTest { + + @Test + public void testCreateFromTAB() throws IOException { + File path = ResourceHelper.getResourceFile(getClass(), "/data/tab_utf8/PRESCRIPTION_PCT.tab"); + FeatureType featureType = AutoFeatureType.createFeatureTypeFromTable(path); + assertEquals("PRESCRIPTION_PCT", featureType.getName()); + + /* ensure that WKT attribute is recognized as a WKT */ + { + AttributeType attribute = featureType.getAttribute("WKT"); + assertNotNull(attribute); + assertTrue(attribute instanceof GeometryType); + } + + /* ensure that other attributes are recognized as string */ + String[] otherNames = new String[] { + "LIBELLE", + "TXT", + "TYPEPSC", + "NOMFIC", + "URLFIC", + "INSEE", + "DATAPPRO", + "DATVALID" + }; + for (String otherName : otherNames) { + AttributeType attribute = featureType.getAttribute(otherName); + assertNotNull(attribute); + assertTrue(attribute instanceof StringType); + } + + /* check attribute count */ + assertEquals(otherNames.length + 1, featureType.getAttributeCount()); + } + +} diff --git a/validator-core/src/test/java/fr/ign/validator/tools/MultiTableReaderTest.java b/validator-core/src/test/java/fr/ign/validator/tools/MultiTableReaderTest.java index 68c2a980..70867818 100644 --- a/validator-core/src/test/java/fr/ign/validator/tools/MultiTableReaderTest.java +++ b/validator-core/src/test/java/fr/ign/validator/tools/MultiTableReaderTest.java @@ -18,7 +18,7 @@ public void testReadPcrsLyon01() throws IOException { ); MultiTableReader reader = MultiTableReader.createMultiTableReader(srcFile); List tableNames = reader.getTableNames(); - assertEquals(6, tableNames.size()); + assertEquals(11, tableNames.size()); // HabillageLignesPCRS { TableReader tableReader = reader.getTableReader("HabillageLignesPCRS");