Skip to content

Commit

Permalink
fast parse
Browse files Browse the repository at this point in the history
  • Loading branch information
Baunsgaard committed Oct 21, 2024
1 parent 3d1a7a0 commit 133a217
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 15 deletions.
44 changes: 30 additions & 14 deletions src/main/java/org/apache/sysds/runtime/io/FrameReaderTextCSV.java
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,19 @@ private void parseLine(String cellStr, String delim, Array<?>[] destA , int row,
int clen, double dfillValue, String sfillValue, boolean isFill,
Set<String> naValues) {
try{
int from = 0, to = 0;
final int len = cellStr.length();
final int delimLen = delim.length();
int c = 0;
while(from < len) { // for all tokens
to = IOUtilFunctions.getTo(cellStr, from, delim, len, delimLen);
String cell = cellStr.substring(from, to);
assignCellGeneric(row, destA, cell, naValues, isFill, dfillValue, sfillValue, false, c);
c++;
from = to + delimLen;
}


String[] parts = IOUtilFunctions.splitCSV(cellStr, delim, clen);
assignColumns(row, (int)clen, destA, parts, naValues, isFill, dfillValue, sfillValue);
IOUtilFunctions.checkAndRaiseErrorCSVNumColumns("", cellStr, parts, clen);
Expand All @@ -189,15 +202,23 @@ private boolean assignColumnsGeneric(int row, int nCol, Array<?>[] destA, Strin
boolean isFill, double dfillValue, String sfillValue) {
boolean emptyValuesFound = false;
for(int col = 0; col < nCol; col++) {
emptyValuesFound = assignCellGeneric(row, destA, parts, naValues, isFill, dfillValue, sfillValue, emptyValuesFound, col);
emptyValuesFound = assignCellGeneric(row, destA, parts[col], naValues, isFill, dfillValue, sfillValue, emptyValuesFound, col);
}
return emptyValuesFound;
}

private boolean assignColumnsNoFillNoNan(int row, int nCol, Array<?>[] destA, String[] parts){
boolean emptyValuesFound = false;
for(int col = 0; col < nCol; col++) {
emptyValuesFound = assignCellNoNan(row, destA, parts[col], emptyValuesFound, col);
}
return emptyValuesFound;
}

private boolean assignCellGeneric(int row, Array<?>[] destA, String[] parts, Set<String> naValues, boolean isFill,

private static boolean assignCellGeneric(int row, Array<?>[] destA, String val, Set<String> naValues, boolean isFill,
double dfillValue, String sfillValue, boolean emptyValuesFound, int col) {
String part = IOUtilFunctions.trim(parts[col]);
String part = IOUtilFunctions.trim(val);
if(part == null || part.isEmpty() || (naValues != null && naValues.contains(part))) {
if(isFill && dfillValue != 0)
destA[col].set(row, sfillValue);
Expand All @@ -208,17 +229,12 @@ private boolean assignCellGeneric(int row, Array<?>[] destA, String[] parts, Set
return emptyValuesFound;
}

private boolean assignColumnsNoFillNoNan(int row, int nCol, Array<?>[] destA, String[] parts){

boolean emptyValuesFound = false;
for(int col = 0; col < nCol; col++) {
String part = IOUtilFunctions.trim(parts[col]);
if(part.isEmpty())
emptyValuesFound = true;
else
destA[col].set(row, part);
}

private static boolean assignCellNoNan(int row, Array<?>[] destA, String val, boolean emptyValuesFound, int col) {
String part = IOUtilFunctions.trim(val);
if(part.isEmpty())
emptyValuesFound = true;
else
destA[col].set(row, part);
return emptyValuesFound;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ private static boolean isEmptyMatch(final String str, final int from, final Stri
* @param dLen The length of the delimiter string
* @return The next index.
*/
private static int getTo(final String str, final int from, final String delim,
public static int getTo(final String str, final int from, final String delim,
final int len, final int dLen) {
final char cq = CSV_QUOTE_CHAR;
final int fromP1 = from + 1;
Expand Down

0 comments on commit 133a217

Please sign in to comment.