Skip to content

Commit

Permalink
[SYSTEMDS-3135] Reader Generation for Custom Text Formats
Browse files Browse the repository at this point in the history
This commit suppress all the warnings from the newly merged IOGen.
There are many insances in the IO gen framework where methods are
never called or ArrayLists are allocated via new ArrayList[1].
This is correct java code, but not ideal.

Future TODOs is to fix these.
  • Loading branch information
Baunsgaard committed Jul 19, 2023
1 parent 89b938c commit c648fd2
Show file tree
Hide file tree
Showing 8 changed files with 46 additions and 30 deletions.
55 changes: 32 additions & 23 deletions src/main/java/org/apache/sysds/runtime/iogen/FormatIdentifyer.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

package org.apache.sysds.runtime.iogen;

import org.apache.spark.sql.sources.In;
import org.apache.sysds.hops.OptimizerUtils;
import org.apache.sysds.lops.Lop;
import org.apache.sysds.runtime.frame.data.FrameBlock;
Expand Down Expand Up @@ -65,6 +64,7 @@ public FormatIdentifyer(String raw, FrameBlock frame) throws Exception {
this.runIdentification();
}

@SuppressWarnings("unchecked")
private void runIdentification() {

/* Index properties:
Expand Down Expand Up @@ -615,9 +615,12 @@ private ArrayList<Pair<String, String>> extractPrefixSuffixBeginEndCells(boolean
result.get(nrows - 1).setValue(null);
return result;
}

public CustomProperties getFormatProperties() {
return properties;
}

@SuppressWarnings("unchecked")
private Pair<ArrayList<String>, HashSet<String>> buildValueKeyPattern() {
int minSelectCols = Math.min(10, ncols);
ArrayList<String>[] prefixesRemovedReverse = new ArrayList[1];
Expand Down Expand Up @@ -664,6 +667,7 @@ private Pair<ArrayList<String>, HashSet<String>> buildValueKeyPattern() {

return new Pair<>(keys[0], colSuffixes[0]);
}

private String addToPrefixes(Set<String> list, String strValue, int value, boolean reverse){
String str = reverse ? new StringBuilder(strValue).reverse().toString() : strValue;
RawIndex rawIndex = new RawIndex(str);
Expand All @@ -675,6 +679,8 @@ private String addToPrefixes(Set<String> list, String strValue, int value, boole
}
return null;
}

@SuppressWarnings("unchecked")
private Pair<ArrayList<String>, HashSet<String>> buildIndexKeyPattern(boolean keyForRowIndexes, int begin) {
ArrayList<String>[] prefixesRemovedReverse = new ArrayList[1];
ArrayList<String>[] prefixesRemoved = new ArrayList[1];
Expand Down Expand Up @@ -786,6 +792,7 @@ private Pair<ArrayList<String>, HashSet<String>> buildIndexKeyPattern(boolean ke
}

// Get all prefix strings of a column
@SuppressWarnings("unchecked")
public Pair<ArrayList<String>[], ArrayList<Integer>[]> extractAllPrefixStringsOfColsSingleLine(boolean reverse, boolean removesSelected) {
ArrayList<String>[] prefixStrings = new ArrayList[ncols];
ArrayList<Integer>[] rowIndexes = new ArrayList[ncols];
Expand All @@ -796,10 +803,11 @@ public Pair<ArrayList<String>[], ArrayList<Integer>[]> extractAllPrefixStringsOf
}
return new Pair<>(prefixStrings, rowIndexes);
}

public Pair<ArrayList<String>, ArrayList<Integer>> extractAllPrefixStringsOfAColSingleLine(int r,
ArrayList<Integer> colIndexes, boolean reverse, boolean removesSelected) {
ArrayList<String> prefixStrings = new ArrayList();
ArrayList<Integer> rowIndexes = new ArrayList();
ArrayList<String> prefixStrings = new ArrayList<>();
ArrayList<Integer> rowIndexes = new ArrayList<>();
for(int c : colIndexes) {
int rowIndex = mapRow[r][c];
if(rowIndex != -1) {
Expand All @@ -820,8 +828,8 @@ public Pair<ArrayList<String>, ArrayList<Integer>> extractAllPrefixStringsOfACol

public Pair<ArrayList<String>, ArrayList<Integer>> extractAllPrefixStringsOfAColSingleLine(int colIndex,
boolean reverse, boolean removesSelected) {
ArrayList<String> prefixStrings = new ArrayList();
ArrayList<Integer> rowIndexes = new ArrayList();
ArrayList<String> prefixStrings = new ArrayList<>();
ArrayList<Integer> rowIndexes = new ArrayList<>();
for(int r = 0; r < nrows; r++) {
int rowIndex = mapRow[r][colIndex];
if(rowIndex != -1) {
Expand All @@ -840,7 +848,7 @@ public Pair<ArrayList<String>, ArrayList<Integer>> extractAllPrefixStringsOfACol
return new Pair<>(prefixStrings, rowIndexes);
}


@SuppressWarnings("unchecked")
private ArrayList<String>[] extractAllSuffixStringsOfColsSingleLine(boolean removeData) {
ArrayList<String>[] result = new ArrayList[ncols];
for(int c = 0; c < ncols; c++) {
Expand Down Expand Up @@ -876,6 +884,7 @@ private ArrayList<String> extractAllSuffixStringsOfColsSingleLine(int col, boole
return result;
}

@SuppressWarnings("unused")
private ArrayList<String> extractAllSuffixStringsOfColsSingleLine(ArrayList<Integer> rows,int col, boolean removeData) {
ArrayList<String> result = new ArrayList<>();
for(int r: rows) {
Expand Down Expand Up @@ -979,6 +988,7 @@ private ArrayList<Pair<Integer, Integer>> getTokenIndexOnMultiLineRecords(String
return result;
}

@SuppressWarnings("unused")
private ArrayList<Pair<Integer, Integer>> getTokenIndexOnMultiLineRecords(String beginToken, String endToken) {
ArrayList<Pair<Integer, Integer>> result = new ArrayList<>();

Expand Down Expand Up @@ -1026,7 +1036,7 @@ private ArrayList<Pair<Integer, Integer>> getTokenIndexOnMultiLineRecords(String
}
return result;
}

private Pair<Set<String>, Set<String>> getNewRefineKeys(LongestCommonSubsequence lcs, String firstKey,
ArrayList<String> prefixesRemoved, ArrayList<String> prefixes, Set<String> refineKeys) {

Expand Down Expand Up @@ -1059,7 +1069,8 @@ else if(!isInTheMiddleOfString) {
set.addAll(list1);

for(String lcsKeys : set) {
if(setRefineLCS.contains(lcsKeys) || newSetRefineLCS.contains(lcsKey))
// TODO Removed an unlikely argument it should not be a problem.
if(setRefineLCS.contains(lcsKeys))
continue;
String[] newLCSKey = (lcsKeys+Lop.OPERAND_DELIMITOR+firstKey).split(Lop.OPERAND_DELIMITOR);
ArrayList<String> tmpLCSKeyList = new ArrayList<>();
Expand Down Expand Up @@ -1109,9 +1120,10 @@ private ArrayList<String> cleanUPKey(ArrayList<String> keys, ArrayList<String> p
for(; i>=0; i--) {
boolean flag = true;
for(int j =0; j< prefixes.size() && flag; j++) {
String bk = keys.get(i);
int k1 = getIndexOfKeyPatternOnString(prefixes.get(j), i, keys, 0);
int k2 = prefixes.get(j).length();
// TODO find out if used:
// String bk = keys.get(i);
// int k1 = getIndexOfKeyPatternOnString(prefixes.get(j), i, keys, 0);
// int k2 = prefixes.get(j).length();
flag = getIndexOfKeyPatternOnString(prefixes.get(j), i, keys, 0) == prefixes.get(j).length();
}
if(flag)
Expand All @@ -1123,10 +1135,11 @@ private ArrayList<String> cleanUPKey(ArrayList<String> keys, ArrayList<String> p
for(int index = i; index< keys.size(); index++)
result.add(keys.get(index));

int a = 100;
}
return result;
}


private boolean checkExtraKeyForCol(ArrayList<String> keys, String extraKey , ArrayList<String> prefixes){
boolean flag = true;
for(int i=0; i<keys.size()-1 && flag; i++)
Expand All @@ -1144,9 +1157,11 @@ private boolean checkExtraKeyForCol(ArrayList<String> keys, String extraKey , Ar
}
return flag;
}

private Integer getIndexOfKeyPatternOnString(String str, ArrayList<String> key, int beginPos) {
return getIndexOfKeyPatternOnString(str,0, key, beginPos);
}

private Integer getIndexOfKeyPatternOnString(String str, int keyFromIndex,ArrayList<String> key, int beginPos) {
int currPos = beginPos;
boolean flag = true;
Expand All @@ -1164,6 +1179,8 @@ private Integer getIndexOfKeyPatternOnString(String str, int keyFromIndex,ArrayL
else
return -1;
}

@SuppressWarnings("unchecked")
private Pair<ArrayList<String>[], HashSet<String>[]> buildColsKeyPatternSingleRow() {
ArrayList<String>[] prefixesRemovedReverse = extractAllPrefixStringsOfColsSingleLine(true, true).getKey();
ArrayList<String>[] prefixesRemoved = new ArrayList[ncols];
Expand Down Expand Up @@ -1199,14 +1216,13 @@ private Pair<ArrayList<String>[], HashSet<String>[]> buildColsKeyPatternSingleRo
//check for exceptions
for(Future<Object> task : rt)
task.get();

int a = 50;
}
catch(Exception e) {
throw new RuntimeException("Failed parallel ColsKeyPatternSingleRow.", e);
}
return new Pair<>(keys, colSuffixes);
}

private class BuildColsKeyPatternSingleRowTask implements Callable<Object> {
private final ArrayList<String>[] prefixesRemovedReverse;
private final ArrayList<String>[] prefixesRemoved;
Expand All @@ -1232,6 +1248,7 @@ public BuildColsKeyPatternSingleRowTask(ArrayList<String>[] prefixesRemovedRever
this.lcs = lcs;
this.colIndexes = colIndexes;
}

@Override
public Object call() throws Exception {
// Sort prefixesRemovedReverse list
Expand Down Expand Up @@ -1367,16 +1384,7 @@ else if(refineKeysStep.size() == 1) {

// CleanUP keys: reduce key list if it possible
for(int c :colIndexes) {
if(c == 5){
int fff = 500;
}
ArrayList<String> cleanUPKeys = cleanUPKey(keys[c], prefixes[c]);
// boolean flagOptimal = false;
// for(int i=0; i< keys[c].size() && !flagOptimal; i++)
// flagOptimal = keys[c].get(i).contains(" ");
// if(flagOptimal) {
// keys[c] = optimalKeyPattern(keys[c], prefixes[c]);
// }

// set static col flag
Boolean flagFixCol = true;
Expand Down Expand Up @@ -1603,6 +1611,7 @@ private ArrayList<String> stringTokenize(String str, int tokenLength) {
return result;
}

@SuppressWarnings("all") // unused and unsafe
private ArrayList<String> optimalKeyPattern(ArrayList<String> keys, ArrayList<String> prefixes) {
ArrayList<ArrayList<String>> keysList = new ArrayList<>();
for(int i = 0; i < keys.size() - 1; i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ public MatrixReader getReader() throws Exception {
String className = getRandomClassName();
MatrixCodeGen src = new MatrixCodeGen(properties, className);
// constructor with arguments as CustomProperties
Class[] cArg = new Class[1];
Class<?>[] cArg = new Class[1];
cArg[0] = CustomProperties.class;
String srcJava = src.generateCodeJava(formatIdentifyer);
matrixReader = (MatrixReader) CodegenUtils.compileClass(className, srcJava).getDeclaredConstructor(cArg).newInstance(properties);
Expand All @@ -111,7 +111,7 @@ public FrameReader getReader() throws Exception {
String className = getRandomClassName();
FrameCodeGen src = new FrameCodeGen(properties, className);
// constructor with arguments as CustomProperties
Class[] cArg = new Class[1];
Class<?>[] cArg = new Class[1];
cArg[0] = CustomProperties.class;
String srcJava = src.generateCodeJava(formatIdentifyer);
frameReader = (FrameReader) CodegenUtils.compileClass(className, srcJava).getDeclaredConstructor(cArg).newInstance(properties);
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/org/apache/sysds/runtime/iogen/RawIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ private Pair<Integer, Integer> getValuePositionAndLength(ArrayList<Pair<Integer,
return null;
}

@SuppressWarnings("unused")
private void extractNumericActualValues() {
if(this.actualNumericValues == null)
this.actualNumericValues = new HashMap<>();
Expand All @@ -238,6 +239,7 @@ private void extractNumericActualValues() {
addActualValueToList(sb.toString(), pi, actualNumericValues);
}

@SuppressWarnings("unused")
private void extractNumericDotActualValues() {
if(this.dotActualNumericValues == null)
this.dotActualNumericValues = new HashMap<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ private void ReadRaw(String raw) throws Exception {

}

@SuppressWarnings("unused")
private void runMapping(boolean isIndexMapping) {

this.mapRow = new int[nrows][ncols];
Expand Down
5 changes: 4 additions & 1 deletion src/main/java/org/apache/sysds/runtime/iogen/TextTrie.java
Original file line number Diff line number Diff line change
Expand Up @@ -127,15 +127,17 @@ public StringBuilder getKey() {
return key;
}


@SuppressWarnings("unused")
public void setKey(StringBuilder key) {
this.key = key;
}

@SuppressWarnings("unused")
public ArrayList<Integer> getRowIndexes() {
return rowIndexes;
}

@SuppressWarnings("unused")
public void setRowIndexes(ArrayList<Integer> rowIndexes) {
this.rowIndexes = rowIndexes;
}
Expand All @@ -152,6 +154,7 @@ public int getIndexSetSize() {
return indexSetSize;
}

@SuppressWarnings("unused")
public void print(){
Gson gson = new Gson();
System.out.println(key.toString()+" "+gson.toJson(this.indexSet));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ public FrameBlock readFrameFromInputStream(InputStream is, Types.ValueType[] sch

// core read (sequential/parallel)
InputStreamInputFormat informat = new InputStreamInputFormat(is);
InputSplit split = informat.getSplits(null, 1)[0];
informat.getSplits(null, 1);
return ret;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.concurrent.Future;

public abstract class MatrixGenerateReader extends MatrixReader {

Expand Down Expand Up @@ -257,7 +256,6 @@ else if(_props.getRowIndexStructure().getProperties() == RowIndexStructure.Index
return ret;
}

@SuppressWarnings("unchecked")
protected void readMatrixFromHDFS(TextInputFormat informat, InputSplit[] splits, JobConf job, MatrixBlock dest) throws IOException {
MutableInt row = new MutableInt(0);
long lnnz = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,10 @@ protected void runGenerateReaderTest(String sampleRawFileName, String sampleFram
GenerateReader.GenerateReaderFrame gr = new GenerateReader.GenerateReaderFrame(sampleRaw, sampleFrame,
parallel);
FrameReader fr = gr.getReader();
FrameBlock frameBlock = fr.readFrameFromHDFS(dataFileName, sampleSchema, rows, sampleSchema.length);
fr.readFrameFromHDFS(dataFileName, sampleSchema, rows, sampleSchema.length);

// TODO Verify the frameblock contains the correct values.

}
catch(Exception exception) {
exception.printStackTrace();
Expand Down

0 comments on commit c648fd2

Please sign in to comment.