Skip to content

Commit

Permalink
[SYSTEMDS-3787] New rewrite for transformencode w/o metadata output
Browse files Browse the repository at this point in the history
This patch add a simple statement block rewrite which checks if the
metadata frame output of transformencode is used at all, and if not
sets a flag on the transformencode operation to avoid allocating and
serializing this meta data. The rewrite applies in about half of all
existing 'org.apache.sysds.test.functions.transform' tests.
  • Loading branch information
mboehm7 committed Oct 24, 2024
1 parent 12d8cd7 commit ae2ad07
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ public ProgramRewriter(boolean staticRewrites, boolean dynamicRewrites)
_sbRuleSet.add( new RewriteMarkLoopVariablesUpdateInPlace() );
if( LineageCacheConfig.getCompAssRW() )
_sbRuleSet.add( new MarkForLineageReuse() );
_sbRuleSet.add( new RewriteRemoveTransformEncodeMeta() );
}

// DYNAMIC REWRITES (which do require size information)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.sysds.hops.rewrite;

import java.util.Arrays;
import java.util.List;

import org.apache.sysds.hops.FunctionOp;
import org.apache.sysds.hops.Hop;
import org.apache.sysds.hops.LiteralOp;
import org.apache.sysds.parser.StatementBlock;

/**
* Rule: If transformencode procudes a meta data frame which is never
* used, flag transformencode to never allocate an serialize this frame.
*/
public class RewriteRemoveTransformEncodeMeta extends StatementBlockRewriteRule
{
private final static String TF_OPCODE = "TRANSFORMENCODE";

@Override
public List<StatementBlock> rewriteStatementBlock(StatementBlock sb, ProgramRewriteStatus state)
{
if( sb.getHops() == null || sb.getHops().isEmpty() )
return Arrays.asList(sb);

//Transformencode is a multi-return FunctionOp and always appears as root
//of the DAG. We then check that the meta data object is never used,
//that is, the meta data is not in the live-out variables of the statementblock
Hop root = sb.getHops().get(0);
if( root instanceof FunctionOp
&& TF_OPCODE.equals(((FunctionOp)root).getFunctionName()) )
{
FunctionOp func = (FunctionOp)root;
if( !sb.liveOut().containsVariable(func.getOutputVariableNames()[1])
&& func.getInput().size() == 2) { //not added yet
func.getInput().add(new LiteralOp(false));
LOG.debug("Applied removeTransformEncodeMeta (line "+ func.getBeginLine() +").");
}
}

return Arrays.asList(sb);
}

@Override
public List<StatementBlock> rewriteStatementBlocks(List<StatementBlock> sbs, ProgramRewriteStatus sate) {
return sbs;
}

@Override
public boolean createsSplitDag() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,13 @@

public class MultiReturnParameterizedBuiltinCPInstruction extends ComputationCPInstruction {
protected final ArrayList<CPOperand> _outputs;

protected final boolean _metaReturn;

private MultiReturnParameterizedBuiltinCPInstruction(Operator op, CPOperand input1, CPOperand input2,
ArrayList<CPOperand> outputs, String opcode, String istr) {
boolean metaReturn, ArrayList<CPOperand> outputs, String opcode, String istr) {
super(CPType.MultiReturnBuiltin, op, input1, input2, outputs.get(0), opcode, istr);
_outputs = outputs;
_metaReturn = metaReturn;
}

public CPOperand getOutput(int i) {
Expand All @@ -67,9 +69,14 @@ public static MultiReturnParameterizedBuiltinCPInstruction parseInstruction(Stri
// one input and two outputs
CPOperand in1 = new CPOperand(parts[1]);
CPOperand in2 = new CPOperand(parts[2]);
outputs.add(new CPOperand(parts[3], ValueType.FP64, DataType.MATRIX));
outputs.add(new CPOperand(parts[4], ValueType.STRING, DataType.FRAME));
return new MultiReturnParameterizedBuiltinCPInstruction(null, in1, in2, outputs, opcode, str);
int pos = 3;
boolean metaReturn = true;
if( parts.length == 7 ) //no need for meta data
metaReturn = new CPOperand(parts[pos++]).getLiteral().getBooleanValue();
outputs.add(new CPOperand(parts[pos], ValueType.FP64, DataType.MATRIX));
outputs.add(new CPOperand(parts[pos+1], ValueType.STRING, DataType.FRAME));
return new MultiReturnParameterizedBuiltinCPInstruction(
null, in1, in2, metaReturn, outputs, opcode, str);
}
else {
throw new DMLRuntimeException("Invalid opcode in MultiReturnBuiltin instruction: " + opcode);
Expand All @@ -87,9 +94,10 @@ public void processInstruction(ExecutionContext ec) {
// execute block transform encode
MultiColumnEncoder encoder = EncoderFactory.createEncoder(spec, colnames, fin.getNumColumns(), null);
// TODO: Assign #threads in compiler and pass via the instruction string
int k = OptimizerUtils.getTransformNumThreads();
MatrixBlock data = encoder.encode(fin, OptimizerUtils.getTransformNumThreads()); // build and apply
FrameBlock meta = encoder.getMetaData(new FrameBlock(fin.getNumColumns(), ValueType.STRING),
OptimizerUtils.getTransformNumThreads());
FrameBlock meta = !_metaReturn ? new FrameBlock() :
encoder.getMetaData(new FrameBlock(fin.getNumColumns(), ValueType.STRING), k);
meta.setColumnNames(colnames);

// release input and outputs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,12 @@

public class MultiReturnParameterizedBuiltinSPInstruction extends ComputationSPInstruction {
protected ArrayList<CPOperand> _outputs;
protected final boolean _metaReturn;

private MultiReturnParameterizedBuiltinSPInstruction(Operator op, CPOperand input1, CPOperand input2,
ArrayList<CPOperand> outputs, String opcode, String istr) {
boolean metaReturn, ArrayList<CPOperand> outputs, String opcode, String istr) {
super(SPType.MultiReturnBuiltin, op, input1, input2, outputs.get(0), opcode, istr);
_metaReturn = metaReturn;
_outputs = outputs;
}

Expand All @@ -93,14 +95,17 @@ public static MultiReturnParameterizedBuiltinSPInstruction parseInstruction(Stri
// one input and two outputs
CPOperand in1 = new CPOperand(parts[1]);
CPOperand in2 = new CPOperand(parts[2]);
outputs.add(new CPOperand(parts[3], ValueType.FP64, DataType.MATRIX));
outputs.add(new CPOperand(parts[4], ValueType.STRING, DataType.FRAME));
return new MultiReturnParameterizedBuiltinSPInstruction(null, in1, in2, outputs, opcode, str);
int pos = 3;
boolean metaReturn = true;
if( parts.length == 6 ) //no need for meta data
metaReturn = new CPOperand(parts[pos++]).getLiteral().getBooleanValue();
outputs.add(new CPOperand(parts[pos], ValueType.FP64, DataType.MATRIX));
outputs.add(new CPOperand(parts[pos+1], ValueType.STRING, DataType.FRAME));
return new MultiReturnParameterizedBuiltinSPInstruction(null, in1, in2, metaReturn, outputs, opcode, str);
}
else {
throw new DMLRuntimeException("Invalid opcode in MultiReturnBuiltin instruction: " + opcode);
}

}

@Override
Expand All @@ -112,8 +117,8 @@ public void processInstruction(ExecutionContext ec) {
// get input RDD and meta data
FrameObject fo = sec.getFrameObject(input1.getName());
FrameObject fometa = sec.getFrameObject(_outputs.get(1).getName());
JavaPairRDD<Long, FrameBlock> in = (JavaPairRDD<Long, FrameBlock>) sec.getRDDHandleForFrameObject(fo,
FileFormat.BINARY);
JavaPairRDD<Long, FrameBlock> in = (JavaPairRDD<Long, FrameBlock>)
sec.getRDDHandleForFrameObject(fo, FileFormat.BINARY);
String spec = ec.getScalarInput(input2).getStringValue();
DataCharacteristics mcIn = sec.getDataCharacteristics(input1.getName());
DataCharacteristics mcOut = sec.getDataCharacteristics(output.getName());
Expand Down Expand Up @@ -163,7 +168,10 @@ public void processInstruction(ExecutionContext ec) {
// set output and maintain lineage/output characteristics
sec.setRDDHandleForVariable(_outputs.get(0).getName(), out);
sec.addLineageRDD(_outputs.get(0).getName(), input1.getName());
sec.setFrameOutput(_outputs.get(1).getName(), meta);
if( _metaReturn )
sec.setFrameOutput(_outputs.get(1).getName(), meta);
else
sec.setFrameOutput(_outputs.get(1).getName(), new FrameBlock());
}
catch(IOException ex) {
throw new RuntimeException(ex);
Expand Down

0 comments on commit ae2ad07

Please sign in to comment.