From c648fd2bcd6ff4dfaa39681c7e314c2924ec72c4 Mon Sep 17 00:00:00 2001 From: baunsgaard Date: Wed, 19 Jul 2023 23:23:35 +0200 Subject: [PATCH] [SYSTEMDS-3135] Reader Generation for Custom Text Formats This commit suppress all the warnings from the newly merged IOGen. There are many insances in the IO gen framework where methods are never called or ArrayLists are allocated via new ArrayList[1]. This is correct java code, but not ideal. Future TODOs is to fix these. --- .../sysds/runtime/iogen/FormatIdentifyer.java | 55 +++++++++++-------- .../sysds/runtime/iogen/GenerateReader.java | 4 +- .../apache/sysds/runtime/iogen/RawIndex.java | 2 + .../runtime/iogen/ReaderMappingIndex.java | 1 + .../apache/sysds/runtime/iogen/TextTrie.java | 5 +- .../iogen/template/FrameGenerateReader.java | 2 +- .../iogen/template/MatrixGenerateReader.java | 2 - .../iogen/GenerateReaderFrameTest.java | 5 +- 8 files changed, 46 insertions(+), 30 deletions(-) diff --git a/src/main/java/org/apache/sysds/runtime/iogen/FormatIdentifyer.java b/src/main/java/org/apache/sysds/runtime/iogen/FormatIdentifyer.java index 86721ec786e..aa02ad37fca 100644 --- a/src/main/java/org/apache/sysds/runtime/iogen/FormatIdentifyer.java +++ b/src/main/java/org/apache/sysds/runtime/iogen/FormatIdentifyer.java @@ -19,7 +19,6 @@ package org.apache.sysds.runtime.iogen; -import org.apache.spark.sql.sources.In; import org.apache.sysds.hops.OptimizerUtils; import org.apache.sysds.lops.Lop; import org.apache.sysds.runtime.frame.data.FrameBlock; @@ -65,6 +64,7 @@ public FormatIdentifyer(String raw, FrameBlock frame) throws Exception { this.runIdentification(); } + @SuppressWarnings("unchecked") private void runIdentification() { /* Index properties: @@ -615,9 +615,12 @@ private ArrayList> extractPrefixSuffixBeginEndCells(boolean result.get(nrows - 1).setValue(null); return result; } + public CustomProperties getFormatProperties() { return properties; } + + @SuppressWarnings("unchecked") private Pair, HashSet> buildValueKeyPattern() { int minSelectCols = Math.min(10, ncols); ArrayList[] prefixesRemovedReverse = new ArrayList[1]; @@ -664,6 +667,7 @@ private Pair, HashSet> buildValueKeyPattern() { return new Pair<>(keys[0], colSuffixes[0]); } + private String addToPrefixes(Set list, String strValue, int value, boolean reverse){ String str = reverse ? new StringBuilder(strValue).reverse().toString() : strValue; RawIndex rawIndex = new RawIndex(str); @@ -675,6 +679,8 @@ private String addToPrefixes(Set list, String strValue, int value, boole } return null; } + + @SuppressWarnings("unchecked") private Pair, HashSet> buildIndexKeyPattern(boolean keyForRowIndexes, int begin) { ArrayList[] prefixesRemovedReverse = new ArrayList[1]; ArrayList[] prefixesRemoved = new ArrayList[1]; @@ -786,6 +792,7 @@ private Pair, HashSet> buildIndexKeyPattern(boolean ke } // Get all prefix strings of a column + @SuppressWarnings("unchecked") public Pair[], ArrayList[]> extractAllPrefixStringsOfColsSingleLine(boolean reverse, boolean removesSelected) { ArrayList[] prefixStrings = new ArrayList[ncols]; ArrayList[] rowIndexes = new ArrayList[ncols]; @@ -796,10 +803,11 @@ public Pair[], ArrayList[]> extractAllPrefixStringsOf } return new Pair<>(prefixStrings, rowIndexes); } + public Pair, ArrayList> extractAllPrefixStringsOfAColSingleLine(int r, ArrayList colIndexes, boolean reverse, boolean removesSelected) { - ArrayList prefixStrings = new ArrayList(); - ArrayList rowIndexes = new ArrayList(); + ArrayList prefixStrings = new ArrayList<>(); + ArrayList rowIndexes = new ArrayList<>(); for(int c : colIndexes) { int rowIndex = mapRow[r][c]; if(rowIndex != -1) { @@ -820,8 +828,8 @@ public Pair, ArrayList> extractAllPrefixStringsOfACol public Pair, ArrayList> extractAllPrefixStringsOfAColSingleLine(int colIndex, boolean reverse, boolean removesSelected) { - ArrayList prefixStrings = new ArrayList(); - ArrayList rowIndexes = new ArrayList(); + ArrayList prefixStrings = new ArrayList<>(); + ArrayList rowIndexes = new ArrayList<>(); for(int r = 0; r < nrows; r++) { int rowIndex = mapRow[r][colIndex]; if(rowIndex != -1) { @@ -840,7 +848,7 @@ public Pair, ArrayList> extractAllPrefixStringsOfACol return new Pair<>(prefixStrings, rowIndexes); } - + @SuppressWarnings("unchecked") private ArrayList[] extractAllSuffixStringsOfColsSingleLine(boolean removeData) { ArrayList[] result = new ArrayList[ncols]; for(int c = 0; c < ncols; c++) { @@ -876,6 +884,7 @@ private ArrayList extractAllSuffixStringsOfColsSingleLine(int col, boole return result; } + @SuppressWarnings("unused") private ArrayList extractAllSuffixStringsOfColsSingleLine(ArrayList rows,int col, boolean removeData) { ArrayList result = new ArrayList<>(); for(int r: rows) { @@ -979,6 +988,7 @@ private ArrayList> getTokenIndexOnMultiLineRecords(String return result; } + @SuppressWarnings("unused") private ArrayList> getTokenIndexOnMultiLineRecords(String beginToken, String endToken) { ArrayList> result = new ArrayList<>(); @@ -1026,7 +1036,7 @@ private ArrayList> getTokenIndexOnMultiLineRecords(String } return result; } - + private Pair, Set> getNewRefineKeys(LongestCommonSubsequence lcs, String firstKey, ArrayList prefixesRemoved, ArrayList prefixes, Set refineKeys) { @@ -1059,7 +1069,8 @@ else if(!isInTheMiddleOfString) { set.addAll(list1); for(String lcsKeys : set) { - if(setRefineLCS.contains(lcsKeys) || newSetRefineLCS.contains(lcsKey)) + // TODO Removed an unlikely argument it should not be a problem. + if(setRefineLCS.contains(lcsKeys)) continue; String[] newLCSKey = (lcsKeys+Lop.OPERAND_DELIMITOR+firstKey).split(Lop.OPERAND_DELIMITOR); ArrayList tmpLCSKeyList = new ArrayList<>(); @@ -1109,9 +1120,10 @@ private ArrayList cleanUPKey(ArrayList keys, ArrayList p for(; i>=0; i--) { boolean flag = true; for(int j =0; j< prefixes.size() && flag; j++) { - String bk = keys.get(i); - int k1 = getIndexOfKeyPatternOnString(prefixes.get(j), i, keys, 0); - int k2 = prefixes.get(j).length(); + // TODO find out if used: + // String bk = keys.get(i); + // int k1 = getIndexOfKeyPatternOnString(prefixes.get(j), i, keys, 0); + // int k2 = prefixes.get(j).length(); flag = getIndexOfKeyPatternOnString(prefixes.get(j), i, keys, 0) == prefixes.get(j).length(); } if(flag) @@ -1123,10 +1135,11 @@ private ArrayList cleanUPKey(ArrayList keys, ArrayList p for(int index = i; index< keys.size(); index++) result.add(keys.get(index)); - int a = 100; } return result; } + + private boolean checkExtraKeyForCol(ArrayList keys, String extraKey , ArrayList prefixes){ boolean flag = true; for(int i=0; i keys, String extraKey , Ar } return flag; } + private Integer getIndexOfKeyPatternOnString(String str, ArrayList key, int beginPos) { return getIndexOfKeyPatternOnString(str,0, key, beginPos); } + private Integer getIndexOfKeyPatternOnString(String str, int keyFromIndex,ArrayList key, int beginPos) { int currPos = beginPos; boolean flag = true; @@ -1164,6 +1179,8 @@ private Integer getIndexOfKeyPatternOnString(String str, int keyFromIndex,ArrayL else return -1; } + + @SuppressWarnings("unchecked") private Pair[], HashSet[]> buildColsKeyPatternSingleRow() { ArrayList[] prefixesRemovedReverse = extractAllPrefixStringsOfColsSingleLine(true, true).getKey(); ArrayList[] prefixesRemoved = new ArrayList[ncols]; @@ -1199,14 +1216,13 @@ private Pair[], HashSet[]> buildColsKeyPatternSingleRo //check for exceptions for(Future task : rt) task.get(); - - int a = 50; } catch(Exception e) { throw new RuntimeException("Failed parallel ColsKeyPatternSingleRow.", e); } return new Pair<>(keys, colSuffixes); } + private class BuildColsKeyPatternSingleRowTask implements Callable { private final ArrayList[] prefixesRemovedReverse; private final ArrayList[] prefixesRemoved; @@ -1232,6 +1248,7 @@ public BuildColsKeyPatternSingleRowTask(ArrayList[] prefixesRemovedRever this.lcs = lcs; this.colIndexes = colIndexes; } + @Override public Object call() throws Exception { // Sort prefixesRemovedReverse list @@ -1367,16 +1384,7 @@ else if(refineKeysStep.size() == 1) { // CleanUP keys: reduce key list if it possible for(int c :colIndexes) { - if(c == 5){ - int fff = 500; - } ArrayList cleanUPKeys = cleanUPKey(keys[c], prefixes[c]); -// boolean flagOptimal = false; -// for(int i=0; i< keys[c].size() && !flagOptimal; i++) -// flagOptimal = keys[c].get(i).contains(" "); -// if(flagOptimal) { -// keys[c] = optimalKeyPattern(keys[c], prefixes[c]); -// } // set static col flag Boolean flagFixCol = true; @@ -1603,6 +1611,7 @@ private ArrayList stringTokenize(String str, int tokenLength) { return result; } + @SuppressWarnings("all") // unused and unsafe private ArrayList optimalKeyPattern(ArrayList keys, ArrayList prefixes) { ArrayList> keysList = new ArrayList<>(); for(int i = 0; i < keys.size() - 1; i++) { diff --git a/src/main/java/org/apache/sysds/runtime/iogen/GenerateReader.java b/src/main/java/org/apache/sysds/runtime/iogen/GenerateReader.java index 89212d9d86c..ff430b6d8d4 100644 --- a/src/main/java/org/apache/sysds/runtime/iogen/GenerateReader.java +++ b/src/main/java/org/apache/sysds/runtime/iogen/GenerateReader.java @@ -85,7 +85,7 @@ public MatrixReader getReader() throws Exception { String className = getRandomClassName(); MatrixCodeGen src = new MatrixCodeGen(properties, className); // constructor with arguments as CustomProperties - Class[] cArg = new Class[1]; + Class[] cArg = new Class[1]; cArg[0] = CustomProperties.class; String srcJava = src.generateCodeJava(formatIdentifyer); matrixReader = (MatrixReader) CodegenUtils.compileClass(className, srcJava).getDeclaredConstructor(cArg).newInstance(properties); @@ -111,7 +111,7 @@ public FrameReader getReader() throws Exception { String className = getRandomClassName(); FrameCodeGen src = new FrameCodeGen(properties, className); // constructor with arguments as CustomProperties - Class[] cArg = new Class[1]; + Class[] cArg = new Class[1]; cArg[0] = CustomProperties.class; String srcJava = src.generateCodeJava(formatIdentifyer); frameReader = (FrameReader) CodegenUtils.compileClass(className, srcJava).getDeclaredConstructor(cArg).newInstance(properties); diff --git a/src/main/java/org/apache/sysds/runtime/iogen/RawIndex.java b/src/main/java/org/apache/sysds/runtime/iogen/RawIndex.java index f65a06780dc..943246a4b9e 100644 --- a/src/main/java/org/apache/sysds/runtime/iogen/RawIndex.java +++ b/src/main/java/org/apache/sysds/runtime/iogen/RawIndex.java @@ -213,6 +213,7 @@ private Pair getValuePositionAndLength(ArrayList(); @@ -238,6 +239,7 @@ private void extractNumericActualValues() { addActualValueToList(sb.toString(), pi, actualNumericValues); } + @SuppressWarnings("unused") private void extractNumericDotActualValues() { if(this.dotActualNumericValues == null) this.dotActualNumericValues = new HashMap<>(); diff --git a/src/main/java/org/apache/sysds/runtime/iogen/ReaderMappingIndex.java b/src/main/java/org/apache/sysds/runtime/iogen/ReaderMappingIndex.java index e8c3f0daac4..4797c7fe8b8 100644 --- a/src/main/java/org/apache/sysds/runtime/iogen/ReaderMappingIndex.java +++ b/src/main/java/org/apache/sysds/runtime/iogen/ReaderMappingIndex.java @@ -117,6 +117,7 @@ private void ReadRaw(String raw) throws Exception { } + @SuppressWarnings("unused") private void runMapping(boolean isIndexMapping) { this.mapRow = new int[nrows][ncols]; diff --git a/src/main/java/org/apache/sysds/runtime/iogen/TextTrie.java b/src/main/java/org/apache/sysds/runtime/iogen/TextTrie.java index a228993e229..99eea60b1d7 100644 --- a/src/main/java/org/apache/sysds/runtime/iogen/TextTrie.java +++ b/src/main/java/org/apache/sysds/runtime/iogen/TextTrie.java @@ -127,15 +127,17 @@ public StringBuilder getKey() { return key; } - + @SuppressWarnings("unused") public void setKey(StringBuilder key) { this.key = key; } + @SuppressWarnings("unused") public ArrayList getRowIndexes() { return rowIndexes; } + @SuppressWarnings("unused") public void setRowIndexes(ArrayList rowIndexes) { this.rowIndexes = rowIndexes; } @@ -152,6 +154,7 @@ public int getIndexSetSize() { return indexSetSize; } + @SuppressWarnings("unused") public void print(){ Gson gson = new Gson(); System.out.println(key.toString()+" "+gson.toJson(this.indexSet)); diff --git a/src/main/java/org/apache/sysds/runtime/iogen/template/FrameGenerateReader.java b/src/main/java/org/apache/sysds/runtime/iogen/template/FrameGenerateReader.java index bffdb0afd3f..76439cfa8c8 100644 --- a/src/main/java/org/apache/sysds/runtime/iogen/template/FrameGenerateReader.java +++ b/src/main/java/org/apache/sysds/runtime/iogen/template/FrameGenerateReader.java @@ -212,7 +212,7 @@ public FrameBlock readFrameFromInputStream(InputStream is, Types.ValueType[] sch // core read (sequential/parallel) InputStreamInputFormat informat = new InputStreamInputFormat(is); - InputSplit split = informat.getSplits(null, 1)[0]; + informat.getSplits(null, 1); return ret; } diff --git a/src/main/java/org/apache/sysds/runtime/iogen/template/MatrixGenerateReader.java b/src/main/java/org/apache/sysds/runtime/iogen/template/MatrixGenerateReader.java index 31b8c4bf1e1..94653982cbb 100644 --- a/src/main/java/org/apache/sysds/runtime/iogen/template/MatrixGenerateReader.java +++ b/src/main/java/org/apache/sysds/runtime/iogen/template/MatrixGenerateReader.java @@ -42,7 +42,6 @@ import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; -import java.util.concurrent.Future; public abstract class MatrixGenerateReader extends MatrixReader { @@ -257,7 +256,6 @@ else if(_props.getRowIndexStructure().getProperties() == RowIndexStructure.Index return ret; } - @SuppressWarnings("unchecked") protected void readMatrixFromHDFS(TextInputFormat informat, InputSplit[] splits, JobConf job, MatrixBlock dest) throws IOException { MutableInt row = new MutableInt(0); long lnnz = 0; diff --git a/src/test/java/org/apache/sysds/test/functions/iogen/GenerateReaderFrameTest.java b/src/test/java/org/apache/sysds/test/functions/iogen/GenerateReaderFrameTest.java index 38b182e08c1..8679a8059c9 100644 --- a/src/test/java/org/apache/sysds/test/functions/iogen/GenerateReaderFrameTest.java +++ b/src/test/java/org/apache/sysds/test/functions/iogen/GenerateReaderFrameTest.java @@ -70,7 +70,10 @@ protected void runGenerateReaderTest(String sampleRawFileName, String sampleFram GenerateReader.GenerateReaderFrame gr = new GenerateReader.GenerateReaderFrame(sampleRaw, sampleFrame, parallel); FrameReader fr = gr.getReader(); - FrameBlock frameBlock = fr.readFrameFromHDFS(dataFileName, sampleSchema, rows, sampleSchema.length); + fr.readFrameFromHDFS(dataFileName, sampleSchema, rows, sampleSchema.length); + + // TODO Verify the frameblock contains the correct values. + } catch(Exception exception) { exception.printStackTrace();