[SYSTEMML-1656] Fix BLAS integration (corrupted matrix block apis)

mboehm7 · mboehm7 · commit 474050653b8b · 2017-06-01T15:00:23.000-07:00
The dispatching between operations over uncompressed or compressed
matrix blocks is realized via late binding. The recently added BLAS
integration introduced additional matrix block APIs without overriding
them for compressed matrix blocks. This corrupted, for example,
matrix-vector operations over compressed matrices as they are mistakenly
routed to uncompressed operations. This patch fixes this issue my
removing these unnecessary API extensions and simplifying the CP
aggregate binary instruction to avoid the impression that all compressed
matrices are handled through the vector-matrix branch.
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/AggregateBinaryCPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/AggregateBinaryCPInstruction.java
@@ -31,7 +31,6 @@
 import org.apache.sysml.runtime.matrix.operators.AggregateBinaryOperator;
 import org.apache.sysml.runtime.matrix.operators.AggregateOperator;
 import org.apache.sysml.runtime.matrix.operators.Operator;
-import org.apache.sysml.utils.NativeHelper;
 
 public class AggregateBinaryCPInstruction extends BinaryCPInstruction
 {
@@ -72,20 +71,16 @@ public void processInstruction(ExecutionContext ec)
 	{	
 		//get inputs
 		MatrixBlock matBlock1 = ec.getMatrixInput(input1.getName());
-    MatrixBlock matBlock2 = ec.getMatrixInput(input2.getName());
+		MatrixBlock matBlock2 = ec.getMatrixInput(input2.getName());
+		
+		//compute matrix multiplication
+		AggregateBinaryOperator ab_op = (AggregateBinaryOperator) _optr;
+		MatrixBlock main = (matBlock2 instanceof CompressedMatrixBlock) ? matBlock2 : matBlock1;
+		MatrixBlock ret = (MatrixBlock) main.aggregateBinaryOperations(matBlock1, matBlock2, new MatrixBlock(), ab_op);
 		
-    //compute matrix multiplication
-    AggregateBinaryOperator ab_op = (AggregateBinaryOperator) _optr;
-		MatrixBlock soresBlock = null;
-		if( matBlock2 instanceof CompressedMatrixBlock )
-			soresBlock = (MatrixBlock) (matBlock2.aggregateBinaryOperations(matBlock1, matBlock2, new MatrixBlock(), ab_op));
-		else  {
-			soresBlock = (MatrixBlock) (matBlock1.aggregateBinaryOperations(matBlock1, matBlock2, new MatrixBlock(), ab_op, NativeHelper.isNativeLibraryLoaded()));
-		}
-			
 		//release inputs/outputs
 		ec.releaseMatrixInput(input1.getName());
 		ec.releaseMatrixInput(input2.getName());
-		ec.setMatrixOutput(output.getName(), soresBlock);
+		ec.setMatrixOutput(output.getName(), ret);
 	}
 }
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -4894,20 +4894,10 @@ private double sumWeightForQuantile()
 	public MatrixValue aggregateBinaryOperations(MatrixIndexes m1Index, MatrixValue m1Value, MatrixIndexes m2Index, MatrixValue m2Value, 
 			MatrixValue result, AggregateBinaryOperator op ) throws DMLRuntimeException
 	{
-		return aggregateBinaryOperations(m1Value, m2Value, result, op, NativeHelper.isNativeLibraryLoaded());
+		return aggregateBinaryOperations(m1Value, m2Value, result, op);
 	}
 
-	public MatrixValue aggregateBinaryOperations(MatrixIndexes m1Index, MatrixValue m1Value, MatrixIndexes m2Index, MatrixValue m2Value, 
-			MatrixValue result, AggregateBinaryOperator op, boolean enableNativeBLAS ) throws DMLRuntimeException
-	{
-		return aggregateBinaryOperations(m1Value, m2Value, result, op, enableNativeBLAS);
-	}
-	
-	public MatrixValue aggregateBinaryOperations(MatrixValue m1Value, MatrixValue m2Value, MatrixValue result, AggregateBinaryOperator op) throws DMLRuntimeException {
-		return aggregateBinaryOperations(m1Value, m2Value, result, op, NativeHelper.isNativeLibraryLoaded());
-	}
-
-	public MatrixValue aggregateBinaryOperations(MatrixValue m1Value, MatrixValue m2Value, MatrixValue result, AggregateBinaryOperator op, boolean nativeMatMult) 
+	public MatrixValue aggregateBinaryOperations(MatrixValue m1Value, MatrixValue m2Value, MatrixValue result, AggregateBinaryOperator op) 
 		throws DMLRuntimeException
 	{
 		//check input types, dimensions, configuration
@@ -4933,7 +4923,7 @@ public MatrixValue aggregateBinaryOperations(MatrixValue m1Value, MatrixValue m2
 			ret.reset(rl, cl, sp.sparse, sp.estimatedNonZeros);
 		
 		//compute matrix multiplication (only supported binary aggregate operation)
-		if( nativeMatMult )
+		if( NativeHelper.isNativeLibraryLoaded() )
 			LibMatrixNative.matrixMult(m1, m2, ret, op.getNumThreads());
 		else if( op.getNumThreads() > 1 )
 			LibMatrixMult.matrixMult(m1, m2, ret, op.getNumThreads());
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/CompressedL2SVM.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/CompressedL2SVM.java
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.compress;
+
+import java.io.File;
+import java.util.HashMap;
+
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
+import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.runtime.compress.CompressedMatrixBlock;
+import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.integration.TestConfiguration;
+import org.apache.sysml.test.utils.TestUtils;
+import org.junit.Test;
+
+/**
+ * 
+ */
+public class CompressedL2SVM extends AutomatedTestBase 
+{
+	private final static String TEST_NAME1 = "L2SVM";
+	private final static String TEST_DIR = "functions/compress/";
+	private final static String TEST_CONF = "SystemML-config-compress.xml";
+	private final static File   TEST_CONF_FILE = new File(SCRIPT_DIR + TEST_DIR, TEST_CONF);
+	
+	private final static double eps = 1e-4;
+	
+	private final static int rows = 1468;
+	private final static int cols = 980;
+		
+	private final static double sparsity1 = 0.7; //dense
+	private final static double sparsity2 = 0.1; //sparse
+	
+	private final static int intercept = 0;
+	private final static double epsilon = 0.000000001;
+	private final static double maxiter = 10;
+	
+	@Override
+	public void setUp() {
+		TestUtils.clearAssertionInformation();
+		addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_DIR, TEST_NAME1, new String[] { "w" })); 
+	}
+
+	@Test
+	public void testL2SVMDenseCP() {
+		runL2SVMTest(TEST_NAME1, false, ExecType.CP);
+	}
+	
+	@Test
+	public void testL2SVMSparseCP() {
+		runL2SVMTest(TEST_NAME1, true, ExecType.CP);
+	}
+	
+	@Test
+	public void testL2SVMDenseSP() {
+		runL2SVMTest(TEST_NAME1, false, ExecType.SPARK);
+	}
+	
+	@Test
+	public void testL2SVMSparseSP() {
+		runL2SVMTest(TEST_NAME1, true, ExecType.SPARK);
+	}
+	
+	/**
+	 * 
+	 * @param sparseM1
+	 * @param sparseM2
+	 * @param instType
+	 */
+	private void runL2SVMTest( String testname,boolean sparse, ExecType instType)
+	{
+		//rtplatform for MR
+		RUNTIME_PLATFORM platformOld = rtplatform;
+		switch( instType ){
+			case MR: rtplatform = RUNTIME_PLATFORM.HADOOP; break;
+			case SPARK: rtplatform = RUNTIME_PLATFORM.SPARK; break;
+			default: rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK; break;
+		}
+		
+		boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
+		if( rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK || rtplatform == RUNTIME_PLATFORM.SPARK )
+			DMLScript.USE_LOCAL_SPARK_CONFIG = true;
+		
+		try
+		{
+			String TEST_NAME = testname;
+			TestConfiguration config = getTestConfiguration(TEST_NAME);
+			loadTestConfiguration(config);
+			
+			fullDMLScriptName = "scripts/algorithms/l2-svm.dml";
+			programArgs = new String[]{ "-explain", "-stats", "-nvargs", "X="+input("X"), "Y="+input("Y"),
+				"icpt="+String.valueOf(intercept), "tol="+String.valueOf(epsilon), "reg=0.001",
+				"maxiter="+String.valueOf(maxiter), "model="+output("w"), "Log= "};
+
+			rCmd = getRCmd(inputDir(), String.valueOf(intercept),String.valueOf(epsilon),
+				String.valueOf(maxiter), expectedDir());
+
+			//generate actual datasets
+			double[][] X = getRandomMatrix(rows, cols, 0, 1, sparse?sparsity2:sparsity1, 714);
+			writeInputMatrixWithMTD("X", X, true);
+			double[][] y = TestUtils.round(getRandomMatrix(rows, 1, 0, 1, 1.0, 136));
+			writeInputMatrixWithMTD("Y", y, true);
+					
+			runTest(true, false, null, -1); 
+			runRScript(true); 
+			
+			//compare matrices 
+			HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("w");
+			HashMap<CellIndex, Double> rfile  = readRMatrixFromFS("w");
+			TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
+		}
+		finally {
+			rtplatform = platformOld;
+			DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
+			CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
+		}
+	}
+
+	/**
+	 * Override default configuration with custom test configuration to ensure
+	 * scratch space and local temporary directory locations are also updated.
+	 */
+	@Override
+	protected File getConfigTemplateFile() {
+		// Instrumentation in this test's output log to show custom configuration file used for template.
+		System.out.println("This test case overrides default configuration with " + TEST_CONF_FILE.getPath());
+		return TEST_CONF_FILE;
+	}
+}
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/CompressedLinregCG.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/CompressedLinregCG.java
@@ -62,32 +62,26 @@ public void setUp() {
 	}
 
 	@Test
-	public void testGDFOLinregCGDenseCP() {
-		runGDFOTest(TEST_NAME1, false, ExecType.CP);
+	public void testLinregCGDenseCP() {
+		runLinregCGTest(TEST_NAME1, false, ExecType.CP);
 	}
 	
 	@Test
-	public void testGDFOLinregCGSparseCP() {
-		runGDFOTest(TEST_NAME1, true, ExecType.CP);
+	public void testLinregCGSparseCP() {
+		runLinregCGTest(TEST_NAME1, true, ExecType.CP);
 	}
 	
 	@Test
-	public void testGDFOLinregCGDenseSP() {
-		runGDFOTest(TEST_NAME1, false, ExecType.SPARK);
+	public void testLinregCGDenseSP() {
+		runLinregCGTest(TEST_NAME1, false, ExecType.SPARK);
 	}
 	
 	@Test
-	public void testGDFOLinregCGSparseSP() {
-		runGDFOTest(TEST_NAME1, true, ExecType.SPARK);
+	public void testLinregCGSparseSP() {
+		runLinregCGTest(TEST_NAME1, true, ExecType.SPARK);
 	}
 	
-	/**
-	 * 
-	 * @param sparseM1
-	 * @param sparseM2
-	 * @param instType
-	 */
-	private void runGDFOTest( String testname,boolean sparse, ExecType instType)
+	private void runLinregCGTest( String testname,boolean sparse, ExecType instType)
 	{
 		//rtplatform for MR
 		RUNTIME_PLATFORM platformOld = rtplatform;
diff --git a/src/test/scripts/functions/compress/L2SVM.R b/src/test/scripts/functions/compress/L2SVM.R
@@ -0,0 +1,108 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+library("Matrix")
+
+X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")));
+Y = as.matrix(readMM(paste(args[1], "Y.mtx", sep="")));
+intercept = as.integer(args[2]);
+epsilon = as.double(args[3]);
+lambda = 0.001;
+maxiterations = as.integer(args[4]);
+
+check_min = min(Y)
+check_max = max(Y)
+num_min = sum(Y == check_min)
+num_max = sum(Y == check_max)
+if(num_min + num_max != nrow(Y)){ 
+	print("please check Y, it should contain only 2 labels") 
+}else{
+	if(check_min != -1 | check_max != +1) 
+		Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - check_min)
+}
+
+dimensions = ncol(X)
+
+if (intercept == 1) {
+	ones  = matrix(1, rows=num_samples, cols=1)
+	X = cbind(X, ones);
+}
+
+num_rows_in_w = dimensions
+if(intercept == 1){
+	num_rows_in_w = num_rows_in_w + 1
+}
+w = matrix(0, num_rows_in_w, 1)
+
+g_old = t(X) %*% Y
+s = g_old
+
+Xw = matrix(0,nrow(X),1)
+iter = 0
+positive_label = check_max
+negative_label = check_min
+
+continue = TRUE
+while(continue && iter < maxiterations){
+	t = 0
+	Xd = X %*% s
+	wd = lambda * sum(w * s)
+	dd = lambda * sum(s * s)
+	continue1 = TRUE
+	while(continue1){
+		tmp_Xw = Xw + t*Xd
+		out = 1 - Y * (tmp_Xw)
+		sv = which(out > 0)
+		g = wd + t*dd - sum(out[sv] * Y[sv] * Xd[sv])
+		h = dd + sum(Xd[sv] * Xd[sv])
+		t = t - g/h
+		continue1 = (g*g/h >= 1e-10)
+	}
+	
+	w = w + t*s
+	Xw = Xw + t*Xd
+		
+	out = 1 - Y * (X %*% w)
+	sv = which(out > 0)
+	obj = 0.5 * sum(out[sv] * out[sv]) + lambda/2 * sum(w * w)
+	g_new = t(X[sv,]) %*% (out[sv] * Y[sv]) - lambda * w
+	
+	print(paste("OBJ : ", obj))
+
+	continue = (t*sum(s * g_old) >= epsilon*obj)
+	
+	be = sum(g_new * g_new)/sum(g_old * g_old)
+	s = be * s + g_new
+	g_old = g_new
+	
+	iter = iter + 1
+}
+
+extra_model_params = matrix(0, 4, 1)
+extra_model_params[1,1] = positive_label
+extra_model_params[2,1] = negative_label
+extra_model_params[3,1] = intercept
+extra_model_params[4,1] = dimensions
+
+w = t(cbind(t(w), t(extra_model_params)))
+
+writeMM(as(w,"CsparseMatrix"), paste(args[5], "w", sep=""));