Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/apache/systemds into graph_…
Browse files Browse the repository at this point in the history
…conv
  • Loading branch information
MaximilianSchreff committed Nov 2, 2023
2 parents 59e3c94 + d9e4f21 commit 738d93f
Show file tree
Hide file tree
Showing 199 changed files with 7,862 additions and 3,025 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/monitoringUITests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Build the application, with Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v3
uses: actions/setup-node@v4
with:
# Set always-auth in npmrc
always-auth: false # optional, default is false
Expand Down
18 changes: 15 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,14 @@
<directory>src/assembly/bin</directory>
<targetPath>META-INF/</targetPath>
</resource>
<resource>
<!-- add the Log4j properties file into the jar to have default logging settings -->
<directory>conf</directory>
<includes>
<include>log4j.properties</include>
</includes>
<targetPath>./</targetPath>
</resource>
</resources>
<testResources>
<testResource>
Expand Down Expand Up @@ -213,8 +221,13 @@
<forceCreation>true</forceCreation>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<classpathPrefix>lib/</classpathPrefix>
<mainClass>org.apache.sysds.api.DMLScript</mainClass>
</manifest>
<manifestEntries>
<Class-Path>SystemDS.jar ${project.artifactId}-${project.version}.jar</Class-Path>
</manifestEntries>
</archive>
</configuration>
</execution>
Expand All @@ -227,7 +240,6 @@
<classifier>perf</classifier>
<includes>
<include>org/apache/sysds/performance/**</include>
<include>log4j.properties</include>
</includes>
<archive>
<manifest>
Expand All @@ -236,7 +248,7 @@
<mainClass>org.apache.sysds.performance.Main</mainClass>
</manifest>
<manifestEntries>
<Class-Path>SystemDS.jar ${project.build.directory}/${project.artifactId}-${project.version}-tests.jar</Class-Path>
<Class-Path>SystemDS.jar ${project.artifactId}-${project.version}-tests.jar</Class-Path>
</manifestEntries>
</archive>
</configuration>
Expand Down Expand Up @@ -1427,7 +1439,7 @@
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>
<version>4.1.68.Final</version>
<scope>provided</scope>
<!-- <scope>provided</scope> -->
<exclusions>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
Expand Down
2 changes: 1 addition & 1 deletion scripts/builtin/auc.dml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#
#-------------------------------------------------------------

# This builting function computes the area under the ROC curve (AUC)
# This builtin function computes the area under the ROC curve (AUC)
# for binary classifiers.
#
# INPUT:
Expand Down
2 changes: 2 additions & 0 deletions scripts/builtin/randomForest.dml
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ m_randomForest = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] cty
if( sample_frac < 1.0 ) {
si1 = as.integer(as.scalar(randSeeds[3*(i-1)+1,1]));
I1 = rand(rows=nrow(X), cols=1, seed=si1) <= sample_frac;
if( sum(I1) <= 1 ) # min 2 tuples
I1[1:2,] = matrix(1,2,1);
Xi = removeEmpty(target=X, margin="rows", select=I1);
yi = removeEmpty(target=y, margin="rows", select=I1);
}
Expand Down
1 change: 1 addition & 0 deletions src/assembly/bin.xml
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@
<include>*:commons-logging*</include>
<include>*:commons-math3*</include>
<include>*:commons-text*</include>
<include>*:fastdoubleparser*</include>
<include>*:guava*</include>
<include>*:hadoop-auth*</include>
<include>*:hadoop-client*</include>
Expand Down
12 changes: 4 additions & 8 deletions src/main/java/org/apache/sysds/api/DMLScript.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,8 @@
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.sysds.common.Types.ExecMode;
import org.apache.sysds.conf.CompilerConfig;
import org.apache.sysds.conf.ConfigurationManager;
Expand Down Expand Up @@ -204,29 +202,27 @@ public static boolean isActiveAM(){
public static void main(String[] args)
{
try{
Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
DMLScript.executeScript(conf, otherArgs);
DMLScript.executeScript(args);
} catch(Exception e){
errorPrint(e);
for(String s: args){
if(s.trim().contains("-debug")){
e.printStackTrace();
return;
}
}
errorPrint(e);
}
}

/**
* Single entry point for all public invocation alternatives (e.g.,
* main, executeScript, JaqlUdf etc)
*
* @param conf Hadoop configuration
* @param args arguments
* @return true if success, false otherwise
* @throws IOException If an internal IOException happens.
*/
public static boolean executeScript( Configuration conf, String[] args )
public static boolean executeScript( String[] args )
throws IOException, ParseException, DMLScriptException
{
//parse arguments and set execution properties
Expand Down
18 changes: 17 additions & 1 deletion src/main/java/org/apache/sysds/common/Types.java
Original file line number Diff line number Diff line change
Expand Up @@ -77,17 +77,21 @@ public boolean isUnknown() {
public enum ValueType {
UINT4, UINT8, // Used for parsing in UINT values from numpy.
FP32, FP64, INT32, INT64, BOOLEAN, STRING, UNKNOWN,
HASH64, // Indicate that the value is a hash of 64 bit.
CHARACTER;

public boolean isNumeric() {
return this == UINT8 || this == INT32 || this == INT64 || this == FP32 || this == FP64 || this== UINT4;
}

public boolean isUnknown() {
return this == UNKNOWN;
}

public boolean isPseudoNumeric() {
return isNumeric() || this == BOOLEAN || this == CHARACTER;
}

public String toExternalString() {
switch(this) {
case FP32:
Expand All @@ -100,10 +104,13 @@ public String toExternalString() {
default: return toString();
}
}

public static ValueType fromExternalString(String value) {
//for now we support both internal and external strings
//until we have completely changed the external types
String lValue = (value != null) ? value.toUpperCase() : null;
if(value == null)
throw new DMLRuntimeException("Unknown null value type");
final String lValue = value.toUpperCase();
switch(lValue) {
case "FP32": return FP32;
case "FP64":
Expand All @@ -117,6 +124,7 @@ public static ValueType fromExternalString(String value) {
case "STRING": return STRING;
case "CHARACTER": return CHARACTER;
case "UNKNOWN": return UNKNOWN;
case "HASH64": return HASH64;
default:
throw new DMLRuntimeException("Unknown value type: "+value);
}
Expand All @@ -143,6 +151,13 @@ else if(b == UNKNOWN)
switch(a){
case CHARACTER:
return STRING;
case HASH64:
switch(b){
case STRING:
return b;
default:
return a;
}
case STRING:
return a;
case FP64:
Expand Down Expand Up @@ -408,6 +423,7 @@ public enum OpOp2 {
MINUS(true), MODULUS(true), MOMENT(false), MULT(true), NOTEQUAL(true), OR(true),
PLUS(true), POW(true), PRINT(false), QUANTILE(false), SOLVE(false),
RBIND(false), VALUE_SWAP(false), XOR(true),
CAST_AS_FRAME(false), // cast as frame with column names
//fused ML-specific operators for performance
MINUS_NZ(false), //sparse-safe minus: X-(mean*ppred(X,0,!=))
LOG_NZ(false), //sparse-safe log; ppred(X,0,"!=")*log(X,0.5)
Expand Down
4 changes: 3 additions & 1 deletion src/main/java/org/apache/sysds/conf/DMLConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ public class DMLConfig
public static final String DEFAULT_BLOCK_SIZE = "sysds.defaultblocksize";
public static final String CP_PARALLEL_OPS = "sysds.cp.parallel.ops";
public static final String CP_PARALLEL_IO = "sysds.cp.parallel.io";
public static final String IO_COMPRESSION_CODEC = "sysds.io.compression.encoding";
public static final String PARALLEL_ENCODE = "sysds.parallel.encode"; // boolean: enable multi-threaded transformencode and apply
public static final String PARALLEL_ENCODE_STAGED = "sysds.parallel.encode.staged";
public static final String PARALLEL_ENCODE_APPLY_BLOCKS = "sysds.parallel.encode.applyBlocks";
Expand Down Expand Up @@ -154,6 +155,7 @@ public class DMLConfig
_defaultVals.put(DEFAULT_BLOCK_SIZE, String.valueOf(OptimizerUtils.DEFAULT_BLOCKSIZE) );
_defaultVals.put(CP_PARALLEL_OPS, "true" );
_defaultVals.put(CP_PARALLEL_IO, "true" );
_defaultVals.put(IO_COMPRESSION_CODEC, "none");
_defaultVals.put(PARALLEL_TOKENIZE, "false");
_defaultVals.put(PARALLEL_TOKENIZE_NUM_BLOCKS, "64");
_defaultVals.put(PARALLEL_ENCODE, "true" );
Expand Down Expand Up @@ -463,7 +465,7 @@ public String getConfigInfo() {
FLOATING_POINT_PRECISION, GPU_EVICTION_POLICY, LOCAL_SPARK_NUM_THREADS, EVICTION_SHADOW_BUFFERSIZE,
GPU_MEMORY_ALLOCATOR, GPU_MEMORY_UTILIZATION_FACTOR, USE_SSL_FEDERATED_COMMUNICATION,
DEFAULT_FEDERATED_INITIALIZATION_TIMEOUT, FEDERATED_TIMEOUT, FEDERATED_MONITOR_FREQUENCY, FEDERATED_COMPRESSION,
ASYNC_PREFETCH, ASYNC_SPARK_BROADCAST, ASYNC_SPARK_CHECKPOINT
ASYNC_PREFETCH, ASYNC_SPARK_BROADCAST, ASYNC_SPARK_CHECKPOINT, IO_COMPRESSION_CODEC
};

StringBuilder sb = new StringBuilder();
Expand Down
34 changes: 28 additions & 6 deletions src/main/java/org/apache/sysds/hops/FunctionOp.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,14 @@
import java.util.List;

import org.apache.sysds.api.DMLScript;
import org.apache.sysds.lops.Compression;
import org.apache.sysds.lops.FunctionCallCP;
import org.apache.sysds.lops.Lop;
import org.apache.sysds.common.Types.ExecType;
import org.apache.sysds.parser.DMLProgram;
import org.apache.sysds.common.Types.DataType;
import org.apache.sysds.common.Types.ValueType;
import org.apache.sysds.runtime.compress.SingletonLookupHashMap;
import org.apache.sysds.runtime.controlprogram.Program;
import org.apache.sysds.runtime.controlprogram.parfor.opt.CostEstimatorHops;
import org.apache.sysds.runtime.meta.DataCharacteristics;
Expand Down Expand Up @@ -295,18 +297,42 @@ public Lop constructLops()
tmp.add( in.constructLops() );

//construct function call
Lop fcall = new FunctionCallCP(tmp, _fnamespace, _fname, _inputNames, _outputNames, _outputHops, _opt, et);
FunctionCallCP fcall = new FunctionCallCP(tmp, _fnamespace, _fname, _inputNames, _outputNames, _outputHops, _opt, et);
setLineNumbers(fcall);
setLops(fcall);

//note: no reblock lop because outputs directly bound
constructAndSetCompressionLopFunctionalIfRequired(et);

return getLops();
}

protected void constructAndSetCompressionLopFunctionalIfRequired(ExecType et) {
if((requiresCompression()) && ((FunctionCallCP) getLops()).getFunctionName().equalsIgnoreCase("transformencode")){ // xor

// Lop matrixOut = lop.getFunctionOutputs().get(0);
Lop compressionInstruction = null;

if(_compressedWorkloadTree != null) {
SingletonLookupHashMap m = SingletonLookupHashMap.getMap();
int singletonID = m.put(_compressedWorkloadTree);
compressionInstruction = new Compression(getLops(), DataType.MATRIX, ValueType.FP64, et, singletonID);
}
else
compressionInstruction = new Compression(getLops(), DataType.MATRIX, ValueType.FP64, et, 0);


setOutputDimensions( compressionInstruction );
setLineNumbers( compressionInstruction );
setLops( compressionInstruction );

}
}


@Override
public String getOpString() {
return OPCODE;
return OPCODE + " " + _fnamespace + " " + _fname;
}

@Override
Expand Down Expand Up @@ -385,8 +411,4 @@ public boolean compare(Hop that) {
return false;
}

@Override
public String toString(){
return getOpString();
}
}
Loading

0 comments on commit 738d93f

Please sign in to comment.