Skip to content

Commit

Permalink
parseLineJIT comp
Browse files Browse the repository at this point in the history
  • Loading branch information
Baunsgaard committed Oct 21, 2024
1 parent 83f81a1 commit 9a312e1
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 25 deletions.
46 changes: 22 additions & 24 deletions src/main/java/org/apache/sysds/runtime/io/FrameReaderTextCSV.java
Original file line number Diff line number Diff line change
Expand Up @@ -146,44 +146,42 @@ protected final int readCSVFrameFromInputSplit(InputSplit split, InputFormat<Lon
Array<?>[] destA = dest.getColumns();
while(reader.next(key, value)) // foreach line
{
String cellStr = IOUtilFunctions.trim(value.toString());
parseLine(cellStr, delim, destA, row, (int) clen, dfillValue, sfillValue, isFill, naValues);
parseLine(value.toString(), delim, destA, row, (int) clen, dfillValue, sfillValue, isFill, naValues);
row++;
}

}
catch(Exception e){
throw new DMLRuntimeException("Failed parsing string: \"" + value +"\"", e);
}
finally {
// if(pool != null)
// pool.shutdown();
IOUtilFunctions.closeSilently(reader);
}

return row;
}

private void parseLine(String cellStr, String delim, Array<?>[] destA , int row,
int clen, double dfillValue, String sfillValue, boolean isFill,
Set<String> naValues) {
try{
int from = 0, to = 0;
final int len = cellStr.length();
final int delimLen = delim.length();
int c = 0;
while(from < len) { // for all tokens
to = IOUtilFunctions.getTo(cellStr, from, delim, len, delimLen);
assignCellGeneric(row, destA, cellStr.substring(from, to), naValues, isFill, dfillValue, sfillValue,
false, c);
c++;
from = to + delimLen;
}
private void parseLine(String cellStr, String delim, Array<?>[] destA, int row, int clen, double dfillValue,
String sfillValue, boolean isFill, Set<String> naValues) {
try {
final int len = cellStr.length();
final int delimLen = delim.length();
parseLineSpecialized(cellStr, delim, destA, row, dfillValue, sfillValue, isFill, naValues, len, delimLen);
}
catch(Exception e) {
throw new RuntimeException("failed to parse: " + cellStr, e);
}
}

}
catch(Exception e){
throw new RuntimeException("failed to parse: " + cellStr, e);
}
private void parseLineSpecialized(String cellStr, String delim, Array<?>[] destA, int row, double dfillValue, String sfillValue,
boolean isFill, Set<String> naValues, final int len, final int delimLen) {
int from = 0, to = 0, c = 0;
while(from < len) { // for all tokens
to = IOUtilFunctions.getTo(cellStr, from, delim, len, delimLen);
assignCellGeneric(row, destA, cellStr.substring(from, to), naValues, isFill, dfillValue, sfillValue, false,
c);
c++;
from = to + delimLen;
}
}

private boolean assignColumns(int row, int nCol, Array<?>[] destA, String[] parts, Set<String> naValues,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,10 @@ protected void readCSVFrameFromHDFS( Path path, JobConf job, FileSystem fs,
new ReadRowsTask(splits[0], informat, job, dest, 0, true).call();
return;
}

//compute num rows per split
ArrayList<Future<Long>> cret = new ArrayList<>();
for( int i=0; i<splits.length - 1; i++ )
for( int i=0; i<splits.length - 1; i++ ) // all but last split
cret.add(pool.submit(new CountRowsTask(splits[i], informat, job, _props.hasHeader() && i==0)));

//compute row offset per split via cumsum on row counts
Expand Down

0 comments on commit 9a312e1

Please sign in to comment.