scripts/algorithms/ALS-CG.dml

#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------

#  
# THIS SCRIPT COMPUTES AN APPROXIMATE FACTORIZATION OF A LOW-RANK MATRIX X INTO TWO MATRICES U AND V
# USING THE ALTERNATING-LEAST-SQUARES (ALS) ALGORITHM WITH CONJUGATE GRADIENT.
# MATRICES U AND V ARE COMPUTED BY MINIMIZING A LOSS FUNCTION (WITH REGULARIZATION).
#
# INPUT   PARAMETERS:
# ---------------------------------------------------------------------------------------------
# NAME    TYPE     DEFAULT  MEANING
# ---------------------------------------------------------------------------------------------
# X       String   ---      Location to read the input matrix X to be factorized
# U       String   ---      Location to write the factor matrix U
# V       String   ---      Location to write the factor matrix V
# rank    Int      10       Rank of the factorization
# reg     String   "L2"	    Regularization: 
#                           "L2" = L2 regularization;
#                           "wL2" = weighted L2 regularization
# lambda  Double   0.000001 Regularization parameter, no regularization if 0.0
# maxi    Int      50       Maximum number of iterations
# check   Boolean  TRUE     Check for convergence after every iteration, i.e., updating U and V once
# thr     Double   0.0001   Assuming check is set to TRUE, the algorithm stops and convergence is declared 
#                           if the decrease in loss in any two consecutive iterations falls below this threshold; 
#                           if check is FALSE thr is ignored
# fmt     String   "text"   The output format of the factor matrices L and R, such as "text" or "csv"
# ---------------------------------------------------------------------------------------------
# OUTPUT: 
# 1- An m x r matrix U, where r is the factorization rank 
# 2- An r x n matrix V
#
# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
# hadoop jar SystemDS.jar -f ALS-CG.dml -nvargs X=INPUT_DIR/X U=OUTPUT_DIR/U V=OUTPUT_DIR/V rank=10 reg="L2" lambda=0.0001 fmt=csv

fileX      = $X;
fileU      = $U;
fileV      = $V;

# Default values of some parameters
r          = ifdef($rank, 10);
reg        = ifdef($reg, "L2");
lambda     = ifdef($lambda, 0.000001);
max_iter   = ifdef($maxi, 50);
check      = ifdef($check, TRUE);
thr        = ifdef($thr, 0.0001);
fmtO       = ifdef($fmt, "text");
 
 
###### MAIN PART ######
X = read (fileX);
m = nrow (X);
n = ncol (X);

# initializing factor matrices
U = rand (rows = m, cols = r, min = -0.5, max = 0.5); # mxr
V = rand (rows = n, cols = r, min = -0.5, max = 0.5); # nxr

W = (X != 0);
  
# check for regularization
row_nonzeros = matrix(0,rows=1,cols=1);
col_nonzeros = matrix(0,rows=1,cols=1);
if( reg == "L2" ) {
  print ("BEGIN ALS-CG SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH LAMBDA - " + lambda);
  row_nonzeros = matrix(1, nrow(W), 1);
  col_nonzeros = matrix(1, ncol(W), 1);
} 
else if( reg == "wL2" ) {
  print ("BEGIN ALS-CG SCRIPT WITH NONZERO SQUARED LOSS + WEIGHTED L2 WITH LAMBDA - " + lambda);
  row_nonzeros = rowSums(W);
  col_nonzeros = t(colSums(W));
} 
else {
  stop ("wrong regularization! " + reg);
}

# Loss Function with L2:
# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
#          + 0.5 * lambda * (sum (U ^ 2) + sum (V ^ 2))
# Loss Function with weighted L2:
# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
#          + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros))

is_U = TRUE;  # TRUE = Optimize U, FALSE = Optimize V
maxinneriter = r ; # min (ncol (U), 15);

loss_init = 0.0; # only used if check is TRUE
if( check ) {
  loss_init = 0.5 * sum( (X != 0) * (U %*% t(V) - X) ^ 2);
  loss_init = loss_init + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros));
  print ("-----   Initial train loss: " + loss_init + " -----");
}

it = 0;
converged = FALSE;
while( as.integer(it/2) < max_iter & ! converged ) 
{
  it = it + 1;
  if( is_U ) {
    G = ((X != 0) * (U %*% t(V) - X)) %*% V + lambda * U * row_nonzeros;
  } 
  else {
    G = t(t(U) %*% ((X != 0) * (U %*% t(V) - X))) + lambda * V * col_nonzeros;
  }

  R = -G;
  S = R;
  norm_G2 = sum (G ^ 2);
  norm_R2 = norm_G2;
  
  inneriter = 1;
  tt = 0.000000001;
  while( norm_R2 > tt * norm_G2 & inneriter <= maxinneriter )
  {
    if( is_U ) {
      HS = (W * (S %*% t(V))) %*% V + lambda * S * row_nonzeros;
      alpha = norm_R2 / sum (S * HS);
      U = U + alpha * S;  # OK since U is not used in HS
    } 
    else {
      HS = t(t(U) %*% (W * (U %*% t(S)))) + lambda * S * col_nonzeros;
      alpha = norm_R2 / sum (S * HS);
      V = V + alpha * S;  # OK since V is not used in HS
    }

    R = R - alpha * HS;
    old_norm_R2 = norm_R2;
    norm_R2 = sum (R ^ 2);
    S = R + (norm_R2 / old_norm_R2) * S;
    inneriter = inneriter + 1;
  }  

  is_U = ! is_U;
	
  # check for convergence
  if( check & (it%%2 == 0) ) {
    loss_cur = 0.5 * sum( (X != 0) * (U %*% t(V) - X) ^ 2);
    loss_cur = loss_cur + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros));
	
    loss_dec = (loss_init - loss_cur) / loss_init;
    print ("Train loss at iteration (" + as.integer(it/2) + "): " + loss_cur + " loss-dec " + loss_dec);
    if( loss_dec >= 0 & loss_dec < thr | loss_init == 0 ) {
      print ("----- ALS-CG converged after " + as.integer(it/2) + " iterations!");
      converged = TRUE;
    }
    loss_init = loss_cur;
  }
}

if( check ) {
  print ("----- Final train loss: " + loss_init + " -----");
}

if( !converged ) {
  print ("Max iteration achieved but not converged!");
}

V = t(V);
write (U, fileU, format=fmtO);
write (V, fileV, format=fmtO);