Select Git revision

MARMORET Axel authored
Fixing issue #1: code compatible with recent versions of numpy (> 1.25) and preparing for compatibility with high versions of tensorly.
ntd.py 41.41 KiB
# -*- coding: utf-8 -*-
"""
Created on Tue Jun 11 16:52:21 2019
@author: amarmore
# TODO: An error is raised with numpy versions higher that 1.24. It is probably due to the following modification in numpy:
# "Ragged array creation will now always raise a ValueError unless dtype=object is passed. This includes very deeply nested sequences." (https://numpy.org/devdocs/release/1.24.0-notes.html)
# To debug.
"""
import numpy as np
import scipy
import time
import tensorly as tl
from tensorly.decomposition import tucker as tl_tucker
import math
import nn_fac.update_rules.nnls as nnls
import nn_fac.update_rules.mu as mu
import nn_fac.utils.beta_divergence as beta_div
import nn_fac.utils.errors as err
import numpy as np
# %% HALS
def ntd(tensor, ranks, init = "random", core_0 = None, factors_0 = [], n_iter_max=100, tol=1e-6,
sparsity_coefficients = [], fixed_modes = [], normalize = [], mode_core_norm = None,
verbose=False, return_costs=False, deterministic=False):
"""
======================================
Nonnegative Tucker Decomposition (NTD)
======================================
Factorization of a tensor T in nonnegative matrices,
linked by a nonnegative core tensor, of dimensions equal to the ranks
(in general smaller than the tensor).
See more details about the NTD in [1].
For example, in the third-order case, resolution of:
T \approx (W \otimes H \otimes Q) G
In this example, W, H and Q are the factors, one per mode, and G is the core tensor.
W is of size T.shape[0] * ranks[0],
H is of size T.shape[1] * ranks[1],
Q is of size T.shape[2] * ranks[2],
G is of size ranks[0] * ranks[1] * ranks[2].
More precisely, the chosen optimization algorithm is the HALS [2] for the factors,
which updates each factor columnwise, fixing every other columns,
and a projected gradient for the core,
which reduces the memory neccesited to perfome HALS on the core.
The projected gradient rule is derived by the authors, and doesn't appear in citation for now.
Tensors are manipulated with the tensorly toolbox [3].
In tensorly and in our convention, tensors are unfolded and treated as described in [4].
Parameters
----------
tensor: tensorly tensor
The nonnegative tensor T, to factorize
ranks: list of integers
The ranks for each factor of the decomposition
init: "random" | "tucker" | "custom" |
- If set to random:
Initializes with random factors of the correct size.
The randomization is the uniform distribution in [0,1),
which is the default from numpy random.
- If set to tucker:
Resolve a tucker decomposition of the tensor T (by HOSVD) and
initializes the factors and the core as this resolution, clipped to be nonnegative.
The tucker decomposition is performed with tensorly [3].
- If set to "chromas":
Resolve a tucker decomposition of the tensor T (by HOSVD) and
initializes the factors and the core as this resolution, clipped to be nonnegative.
The tucker decomposition is performed with tensorly [3].
Contrary to "tucker" init, the first factor will then be set to the 12-size identity matrix,
because it's a decomposition model specific for modeling music expressed in chromas.
- If set to custom:
core_0 and factors_0 (see below) will be used for the initialization
Default: random
core_0: None or tensor of nonnegative floats
A custom initialization of the core, used only in "custom" init mode.
Default: None
factors_0: None or list of array of nonnegative floats
A custom initialization of the factors, used only in "custom" init mode.
Default: None
n_iter_max: integer
The maximal number of iteration before stopping the algorithm
Default: 100
tol: float
Threshold on the improvement in cost function value.
Between two succesive iterations, if the difference between
both cost function values is below this threshold, the algorithm stops.
Default: 1e-6
sparsity_coefficients: list of float (as much as the number of modes + 1 for the core)
The sparsity coefficients on each factor and on the core respectively.
If set to None or [], the algorithm is computed without sparsity
Default: []
fixed_modes: list of integers (between 0 and the number of modes + 1 for the core)
Has to be set not to update a factor, taken in the order of modes and lastly on the core.
Default: []
normalize: list of boolean (as much as the number of modes + 1 for the core)
Indicates whether the factors need to be normalized or not.
The normalization is a l_2 normalization on each of the rank components
(For the factors, each column will be normalized, ie each atom of the dimension of the current rank).
Default: []
mode_core_norm: integer or None
The mode on which normalize the core, or None if normalization shouldn't be enforced.
Will only be useful if the last element of the previous "normalise" argument is set to True.
Indexes of the modes start at 0.
Default: None
verbose: boolean
Indicates whether the algorithm prints the successive
normalized cost function values or not
Default: False
return_costs: boolean
Indicates whether the algorithm should return all normalized cost function
values and computation time of each iteration or not
Default: False
deterministic:
Runs the algorithm as a deterministic way, by fixing seed in all possible randomisation,
and optimization techniques in the NNLS, function of the runtime.
This is made to enhance reproducible research, and should be set to True for computation of results.
Returns
-------
core: tensorly tensor
The core tensor linking the factors of the decomposition
factors: numpy #TODO: For tensorly pulling, replace numpy by backend
An array containing all the factors computed with the NTD
cost_fct_vals: list
A list of the normalized cost function values, for every iteration of the algorithm.
toc: list, only if return_errors == True
A list with accumulated time at each iterations
Example
-------
tensor = np.random.rand(80,100,120)
ranks = [10,20,15]
core, factors = NTD.ntd(tensor, ranks = ranks, init = "tucker", verbose = True,
sparsity_coefficients = [None, None, None, None], normalize = [True, True, False, True])
References
----------
[1] Tamara G Kolda and Brett W Bader. "Tensor decompositions and applications",
SIAM review 51.3 (2009), pp. 455{500.
[2]: N. Gillis and F. Glineur, Accelerated Multiplicative Updates and
Hierarchical ALS Algorithms for Nonnegative Matrix Factorization,
Neural Computation 24 (4): 1085-1105, 2012.
[3] J. Kossai et al. "TensorLy: Tensor Learning in Python",
arxiv preprint (2018)
[4] Jeremy E Cohen. "About notations in multiway array processing",
arXiv preprint arXiv:1511.01306, (2015).
"""
factors = []
nb_modes = len(tensor.shape)
if type(ranks) is int:
ranks = [ranks for i in nb_modes]
elif len(ranks) != nb_modes:
raise err.InvalidRanksException("The number of ranks is different than the dim of the tensor, which is incorrect.") from None
for i in range(nb_modes):
if ranks[i] > tensor.shape[i]:
#raise err.InvalidRanksException("The " + str(i) + "-th mode rank is larger than the shape of the tensor, which is incorrect.") from None
ranks[i] = tensor.shape[i]
#warnings.warn('ignoring MIDI message %s' % msg)
print("The " + str(i) + "-th mode rank was larger than the shape of the tensor, which is incorrect. Set to the shape of the tensor")
if init.lower() == "random":
for mode in range(nb_modes):
if deterministic:
seed = np.random.RandomState(mode * 10)
random_array = seed.rand(tensor.shape[mode], ranks[mode])
else:
random_array = np.random.rand(tensor.shape[mode], ranks[mode])
factors.append(tl.tensor(random_array))
if deterministic:
seed = np.random.RandomState(nb_modes * 10)
core = tl.tensor(seed.rand(np.prod(ranks)).reshape(tuple(ranks)))
else:
core = tl.tensor(np.random.rand(np.prod(ranks)).reshape(tuple(ranks)))
elif init.lower() == "tucker":
if deterministic:
init_core, init_factors = tl_tucker(tensor, ranks, random_state = 8142)
else:
init_core, init_factors = tl_tucker(tensor, ranks)
factors = [tl.abs(f) for f in init_factors]
core = tl.abs(init_core)
elif init.lower() == "chromas":
#TODO: Tucker where W is already set to I12
if deterministic:
init_core, init_factors = tl_tucker(tensor, ranks, random_state = 8142)
else:
init_core, init_factors = tl_tucker(tensor, ranks)
init_factors[0] = np.identity(12) # En dur, à être modifié
factors = [tl.abs(f) for f in init_factors]
if 0 not in fixed_modes:
fixed_modes.append(0)
core = tl.abs(init_core)
elif init.lower() == "custom":
factors = factors_0
core = core_0
if len(factors) != nb_modes:
raise err.CustomNotEngouhFactors("Custom initialization, but not enough factors")
else:
for array in factors:
if array is None:
raise err.CustomNotValidFactors("Custom initialization, but one factor is set to 'None'")
if core is None:
raise err.CustomNotValidCore("Custom initialization, but the core is set to 'None'")
else:
raise err.InvalidInitializationType('Initialization type not understood: ' + init)
return compute_ntd(tensor, ranks, core, factors, n_iter_max=n_iter_max, tol=tol,
sparsity_coefficients = sparsity_coefficients, fixed_modes = fixed_modes,
normalize = normalize, mode_core_norm = mode_core_norm,
verbose=verbose, return_costs=return_costs, deterministic = deterministic)
def compute_ntd(tensor_in, ranks, core_in, factors_in, n_iter_max=100, tol=1e-6,
sparsity_coefficients = [], fixed_modes = [], normalize = [], mode_core_norm=None,
verbose=False, return_costs=False, deterministic=False):
"""
Computation of a Nonnegative Tucker Decomposition [1]
via hierarchical alternating least squares (HALS) [2],
with factors_in as initialization.
Tensors are manipulated with the tensorly toolbox [3].
In tensorly and in our convention, tensors are unfolded and treated as described in [4].
Parameters
----------
tensor_in: tensorly tensor
The tensor T, which is factorized
rank: integer
The rank of the decomposition
core_in: tensorly tensor
The initial core
factors_in: list of array of nonnegative floats
The initial factors
n_iter_max: integer
The maximal number of iteration before stopping the algorithm
Default: 100
tol: float
Threshold on the improvement in cost function value.
Between two iterations, if the difference between
both cost function values is below this threshold, the algorithm stops.
Default: 1e-8
sparsity_coefficients: list of float (as much as the number of modes + 1 for the core)
The sparsity coefficients on each factor and on the core respectively.
If set to None or [], the algorithm is computed without sparsity
Default: []
fixed_modes: list of integers (between 0 and the number of modes + 1 for the core)
Has to be set not to update a factor, taken in the order of modes and lastly on the core.
Default: []
normalize: list of boolean (as much as the number of modes + 1 for the core)
Indicates whether the factors need to be normalized or not.
The normalization is a l_2 normalization on each of the rank components
(For the factors, each column will be normalized, ie each atom of the dimension of the current rank).
Default: []
mode_core_norm: integer or None
The mode on which normalize the core, or None if normalization shouldn't be enforced.
Will only be useful if the last element of the previous "normalise" argument is set to True.
Indexes of the modes start at 0.
Default: None
verbose: boolean
Indicates whether the algorithm prints the successive
normalized cost function values or not
Default: False
return_costs: boolean
Indicates whether the algorithm should return all normalized cost function
values and computation time of each iteration or not
Default: False
deterministic:
Runs the algorithm as a deterministic way, by fixing seed in all possible randomisation.
This is made to enhance reproducible research.
Returns
-------
core: tensorly tensor
The core tensor linking the factors of the decomposition
factors: numpy #TODO: For tensorly pulling, replace numpy by backend
An array containing all the factors computed with the NTD
cost_fct_vals: list
A list of the normalized cost function values, for every iteration of the algorithm.
toc: list, only if return_errors == True
A list with accumulated time at each iterations
References
----------
[1] Tamara G Kolda and Brett W Bader. "Tensor decompositions and applications",
SIAM review 51.3 (2009), pp. 455{500.
[2]: N. Gillis and F. Glineur, Accelerated Multiplicative Updates and
Hierarchical ALS Algorithms for Nonnegative Matrix Factorization,
Neural Computation 24 (4): 1085-1105, 2012.
[3] J. Kossai et al. "TensorLy: Tensor Learning in Python",
arxiv preprint (2018)
[4] Jeremy E Cohen. "About notations in multiway array processing",
arXiv preprint arXiv:1511.01306, (2015).
"""
# initialisation - store the input varaibles
core = core_in.copy()
factors = factors_in.copy()
tensor = tensor_in
norm_tensor = tl.norm(tensor, 2)
# set init if problem
#TODO: set them as warnings
nb_modes = len(tensor.shape)
if sparsity_coefficients == None or len(sparsity_coefficients) != nb_modes + 1:
print("Irrelevant number of sparsity coefficient (different from the number of modes + 1 for the core), they have been set to None.")
sparsity_coefficients = [None for i in range(nb_modes + 1)]
if fixed_modes == None:
fixed_modes = []
if normalize == None or len(normalize) != nb_modes + 1:
print("Irrelevant number of normalization booleans (different from the number of modes + 1 for the core), they have been set to False.")
normalize = [False for i in range(nb_modes + 1)]
if normalize[-1] and (mode_core_norm == None or mode_core_norm < 0 or mode_core_norm >= nb_modes):
print("The core was asked to be normalized, but an invalid mode was specified. Normalization has been set to False.")
normalize[-1] = False
if not normalize[-1] and (mode_core_norm != None and mode_core_norm >= 0 and mode_core_norm < nb_modes):
print("The core was asked NOT to be normalized, but mode_core_norm was set to a valid norm. Is this a mistake?")
# initialisation - declare local varaibles
cost_fct_vals = []
tic = time.time()
toc = []
# initialisation - unfold the tensor according to the modes
#unfolded_tensors = []
#for mode in range(tl.ndim(tensor)):
# unfolded_tensors.append(tl.base.unfold(tensor, mode))
# Iterate over one step of NTD
for iteration in range(n_iter_max):
# One pass of least squares on each updated mode
if deterministic:
core, factors, cost = one_ntd_step(tensor, ranks, core, factors, norm_tensor,
sparsity_coefficients, fixed_modes, normalize, mode_core_norm, alpha = math.inf)
else:
core, factors, cost = one_ntd_step(tensor, ranks, core, factors, norm_tensor,
sparsity_coefficients, fixed_modes, normalize, mode_core_norm)
# Store the computation time
toc.append(time.time() - tic)
cost_fct_vals.append(cost)
if verbose:
if iteration == 0:
print('Normalized cost function value={}'.format(cost))
else:
if cost_fct_vals[-2] - cost_fct_vals[-1] > 0:
print('Normalized cost function value={}, variation={}.'.format(
cost_fct_vals[-1], cost_fct_vals[-2] - cost_fct_vals[-1]))
else:
# print in red when the reconstruction error is negative (shouldn't happen)
print('\033[91m' + 'Normalized cost function value={}, variation={}.'.format(
cost_fct_vals[-1], cost_fct_vals[-2] - cost_fct_vals[-1]) + '\033[0m')
if iteration > 0 and abs(cost_fct_vals[-2] - cost_fct_vals[-1]) < tol:
# Stop condition: relative error between last two iterations < tol
if verbose:
print('Converged in {} iterations.'.format(iteration))
break
if return_costs:
return core, factors, cost_fct_vals, toc
else:
return core, factors
def one_ntd_step(tensor, ranks, in_core, in_factors, norm_tensor,
sparsity_coefficients, fixed_modes, normalize, mode_core_norm,
alpha=0.5, delta=0.01):
"""
One pass of Hierarchical Alternating Least Squares update along all modes,
and gradient update on the core,
which decreases reconstruction error in Nonnegative Tucker Decomposition.
Update the factors by solving a least squares problem per mode, as described in [1].
Note that the unfolding order is the one described in [2], which is different from [1].
This function is strictly superior to a least squares solver ran on the
matricized problems min_X ||Y - AX||_F^2 since A is structured as a
Kronecker product of the other factors/core.
Tensors are manipulated with the tensorly toolbox [3].
Parameters
----------
unfolded_tensors: list of array
The spectrogram tensor, unfolded according to all its modes.
ranks: list of integers
Ranks for eac factor of the decomposition.
in_core : tensorly tensor
Current estimates of the core
in_factors: list of array
Current estimates for the factors of this NTD.
The value of factor[update_mode] will be updated using a least squares update.
The values in in_factors are not modified.
norm_tensor : float
The Frobenius norm of the input tensor
sparsity_coefficients: list of float (as much as the number of modes + 1 for the core)
The sparsity coefficients on each factor and on the core respectively.
fixed_modes: list of integers (between 0 and the number of modes + 1 for the core)
Has to be set not to update a factor, taken in the order of modes and lastly on the core.
normalize: list of boolean (as much as the number of modes + 1 for the core)
A boolean whereas the factors need to be normalized.
The normalization is a l_2 normalization on each of the rank components
(For the factors, each column will be normalized, ie each atom of the dimension of the current rank).
mode_core_norm: integer or None
The mode on which normalize the core, or None if normalization shouldn't be enforced.
Will only be useful if the last element of the previous "normalise" argument is set to True.
Indexes of the modes start at 0.
Default: None
alpha : positive float
Ratio between outer computations and inner loops. Typically set to 0.5 or 1.
Set to +inf in the deterministic mode, as it depends on runtime.
Default: 0.5
delta : float in [0,1]
Early stop criterion, while err_k > delta*err_0. Set small for
almost exact nnls solution, or larger (e.g. 1e-2) for inner loops
of a NTD computation.
Default: 0.01
Returns
-------
core: tensorly tensor
The core tensor linking the factors of the decomposition
factors: list of factors
An array containing all the factors computed with the NTD
cost_fct_val:
The value of the cost function at this step,
normalized by the squared norm of the original tensor.
References
----------
[1] Tamara G Kolda and Brett W Bader. "Tensor decompositions and applications",
SIAM review 51.3 (2009), pp. 455{500.
[2] Jeremy E Cohen. "About notations in multiway array processing",
arXiv preprint arXiv:1511.01306, (2015).
[3] J. Kossai et al. "TensorLy: Tensor Learning in Python",
arxiv preprint (2018)
"""
# Avoiding errors
for fixed_value in fixed_modes:
sparsity_coefficients[fixed_value] = None
# Copy
core = in_core.copy()
factors = in_factors.copy()
# Generating the mode update sequence
modes_list = [mode for mode in range(tl.ndim(tensor)) if mode not in fixed_modes]
for mode in modes_list:
#unfolded_core = tl.base.unfold(core, mode)
tic = time.time()
# UtU
# First, element-wise products
# some computations could be reused but the gain is small.
elemprod = factors.copy()
for i, factor in enumerate(factors):
if i != mode:
elemprod[i] = tl.dot(tl.conj(tl.transpose(factor)), factor)
# Second, the multiway product with core G
temp = tl.tenalg.multi_mode_dot(core, elemprod, skip=mode)
# this line can be computed with tensor contractions
con_modes = [i for i in range(tl.ndim(tensor)) if i != mode]
# Depending on the version of tensorly, use different version of the API
UtU = tl.tenalg.contract(temp, con_modes, core, con_modes) # Tensorly v0.6.0
# UtU = tl.tenalg.tensordot(temp, core, con_modes) # Replacing contraction because they had been removed in Tensorly v0.9.0
#UtU = unfold(temp, mode)@tl.transpose(unfold(core, mode))
# UtM
# First, the contraction of data with other factors
temp = tl.tenalg.multi_mode_dot(tensor, factors, skip=mode, transpose = True)
# again, computable by tensor contractions
#MtU = unfold(temp, mode)@tl.transpose(unfold(core, mode))
# Depedning on the version of tensorly, use different version of the API
MtU = tl.tenalg.contract(temp, con_modes, core, con_modes) # Tensorly v0.6.0
# MtU = tl.tenalg.tensordot(temp, core, con_modes) # Replacing contraction because they had been removed in Tensorly v0.9.0
UtM = tl.transpose(MtU)
# Computing the Kronekcer product
#kron = tl.tenalg.kronecker(factors, skip_matrix = mode, reverse = False)
#kron_core = tl.dot(kron, tl.transpose(unfolded_core))
#rhs = tl.dot(unfolded_tensors[mode], kron_core)
# Maybe suboptimal
#cross = tl.dot(tl.transpose(kron_core), kron_core)
timer = time.time() - tic
# Call the hals resolution with nnls, optimizing the current mode
factors[mode] = tl.transpose(nnls.hals_nnls_acc(UtM, UtU, tl.transpose(factors[mode]),
maxiter=100, atime=timer, alpha=alpha, delta=delta,
sparsity_coefficient = sparsity_coefficients[mode], normalize = normalize[mode])[0])
#refolded_tensor = tl.base.fold(unfolded_tensors[0], 0, tensor_shape)
# Core update
#all_MtX = tl.tenalg.multi_mode_dot(tensor, factors, transpose = True)
# better implementation: reuse the computation of temp !
# Also reuse elemprod form last update
all_MtX = tl.tenalg.mode_dot(temp, tl.transpose(factors[modes_list[-1]]), modes_list[-1])
all_MtM = elemprod.copy()
all_MtM[modes_list[-1]] = factors[modes_list[-1]].T@factors[modes_list[-1]]
#all_MtM = np.array([fac.T@fac for fac in factors])
# Projected gradient
gradient_step = 1
#print(f"factors[modes_list[-1]]: {factors[modes_list[-1]]}")
#print(f"all_MtM: {all_MtM}")
for MtM in all_MtM:
#print(f"MtM: {MtM}")
gradient_step *= 1/(scipy.sparse.linalg.svds(MtM, k=1)[1][0])
gradient_step = round(gradient_step, 6) # Heurisitc, to avoid consecutive imprecision
cnt = 1
upd_0 = 0
upd = 1
if sparsity_coefficients[-1] is None:
sparse = 0
else:
sparse = sparsity_coefficients[-1]
# TODO: dynamic stopping criterion
# Maybe: try fast gradient instead of gradient
while cnt <= 300 and upd>= delta * upd_0:
gradient = - all_MtX + tl.tenalg.multi_mode_dot(core, all_MtM, transpose = False) + sparse * tl.ones(core.shape)
# Proposition of reformulation for error computations
delta_core = np.minimum(gradient_step*gradient, core)
core = core - delta_core
upd = tl.norm(delta_core)
if cnt == 1:
upd_0 = upd
cnt += 1
if normalize[-1]:
unfolded_core = tl.unfold(core, mode_core_norm)
for idx_mat in range(unfolded_core.shape[0]):
if tl.norm(unfolded_core[idx_mat]) != 0:
unfolded_core[idx_mat] = unfolded_core[idx_mat] / tl.norm(unfolded_core[idx_mat], 2)
core = tl.fold(unfolded_core, mode_core_norm, core.shape)
# Adding the l1 norm value to the reconstruction error
sparsity_error = 0
for index, sparse in enumerate(sparsity_coefficients):
if sparse:
if index < len(factors):
sparsity_error += 2 * (sparse * np.linalg.norm(factors[index], ord=1))
elif index == len(factors):
sparsity_error += 2 * (sparse * tl.norm(core, 1))
else:
raise NotImplementedError("TODEBUG: Too many sparsity coefficients, should have been raised before.")
rec_error = norm_tensor ** 2 - 2*tl.tenalg.inner(all_MtX, core) + tl.tenalg.inner(tl.tenalg.multi_mode_dot(core, all_MtM, transpose = False), core)
cost_fct_val = (rec_error + sparsity_error) / (norm_tensor ** 2)
#exhaustive_rec_error = (tl.norm(tensor - tl.tenalg.multi_mode_dot(core, factors, transpose = False), 2) + sparsity_error) / norm_tensor
#print("diff: " + str(rec_error - exhaustive_rec_error))
#print("max" + str(np.amax(factors[2])))
return core, factors, cost_fct_val # exhaustive_rec_error
# %% With MU rules
######################### Temporary, to test mu and not break everything (should be merged to ntd in the end, but there is still development on this function)
def ntd_mu(tensor, ranks, init = "random", core_0 = None, factors_0 = [], n_iter_max=1000, tol=1e-6,
beta = 2, sparsity_coefficients = [], fixed_modes = [], normalize = [], mode_core_norm = None,
verbose=False, return_costs=False, deterministic=False):
"""
=============================================================================
Nonnegative Tucker Decomposition (NTD) minimizing the $\beta$-divergence loss
=============================================================================
Factorization of a tensor T in nonnegative matrices,
linked by a nonnegative core tensor, of dimensions equal to the ranks
(in general smaller than the tensor).
See more details about the NTD in [1].
For example, in the third-order case, resolution of:
T \approx (W \otimes H \otimes Q) G
In this example, W, H and Q are the factors, one per mode, and G is the core tensor.
W is of size T.shape[0] * ranks[0],
H is of size T.shape[1] * ranks[1],
Q is of size T.shape[2] * ranks[2],
G is of size ranks[0] * ranks[1] * ranks[2].
In this context, \approx means that the tensor product (W \otimes H \otimes Q) G
is meant to be as close as possible to T, with respect to some distance or divergence function.
In `ntd_mu`, this divergence is the $\beta$-divergence loss [2].
This divergence function generalizes some other divergences such as the Euclidean norm,
the Kullback-Leibler divergence and the Itakura-Saito divergence.
This approximation is obtained with an optimization algorithm, and more specfically,
by using the Multiplicative Update as defined for NMF in [3] and redeveloped in [2].
This algorithm will be described in a future publication
(TODO: update this comments at submission/publication time.)
Tensors are manipulated with the tensorly toolbox [4].
In tensorly and in our convention, tensors are unfolded and treated as described in [5].
Parameters
----------
tensor: tensorly tensor
The nonnegative tensor T, to factorize
ranks: list of integers
The ranks for each factor of the decomposition
init: "random" | "tucker" | "custom" |
- If set to random:
Initializes with random factors of the correct size.
The randomization is the uniform distribution in [0,1),
which is the default from numpy random.
- If set to tucker:
Resolve a tucker decomposition of the tensor T (by HOSVD) and
initializes the factors and the core as this resolution, clipped to be nonnegative.
The tucker decomposition is performed with tensorly [3].
- If set to custom:
core_0 and factors_0 (see below) will be used for the initialization
Default: random
core_0: None or tensor of nonnegative floats
A custom initialization of the core, used only in "custom" init mode.
Default: None
factors_0: None or list of array of nonnegative floats
A custom initialization of the factors, used only in "custom" init mode.
Default: None
n_iter_max: integer
The maximal number of iteration before stopping the algorithm
Default: 100
tol: float
Threshold on the improvement in cost function value.
Between two succesive iterations, if the difference between
both cost function values is below this threshold, the algorithm stops.
Default: 1e-6
beta: float
The beta parameter for the beta-divergence.
2 - Euclidean norm
1 - Kullback-Leibler divergence
0 - Itakura-Saito divergence
sparsity_coefficients: list of float (as much as the number of modes + 1 for the core)
The sparsity coefficients on each factor and on the core respectively.
If set to None or [], the algorithm is computed without sparsity
Default: []
fixed_modes: list of integers (between 0 and the number of modes + 1 for the core)
Has to be set not to update a factor, taken in the order of modes and lastly on the core.
Default: []
normalize: list of boolean (as much as the number of modes + 1 for the core)
Indicates whether the factors need to be normalized or not.
The normalization is a l_2 normalization on each of the rank components
(For the factors, each column will be normalized, ie each atom of the dimension of the current rank).
Default: []
mode_core_norm: integer or None
The mode on which normalize the core, or None if normalization shouldn't be enforced.
Will only be useful if the last element of the previous "normalise" argument is set to True.
Indexes of the modes start at 0.
Default: None
verbose: boolean
Indicates whether the algorithm prints the successive
normalized cost function values or not
Default: False
return_costs: boolean
Indicates whether the algorithm should return all normalized cost function
values and computation time of each iteration or not
Default: False
deterministic:
Runs the algorithm as a deterministic way, by fixing seed in all possible randomisation,
and optimization techniques in the NNLS, function of the runtime.
This is made to enhance reproducible research, and should be set to True for computation of results.
Returns
-------
core: tensorly tensor
The core tensor linking the factors of the decomposition
factors: numpy #TODO: For tensorly pulling, replace numpy by backend
An array containing all the factors computed with the NTD
cost_fct_vals: list
A list of the normalized cost function values, for every iteration of the algorithm.
toc: list, only if return_errors == True
A list with accumulated time at each iterations
Example
-------
tensor = np.random.rand(80,100,120)
ranks = [10,20,15]
core, factors = NTD.ntd(tensor, ranks = ranks, init = "tucker", verbose = True, hals = False,
sparsity_coefficients = [None, None, None, None], normalize = [True, True, False, True])
References
----------
[1] Tamara G Kolda and Brett W Bader. "Tensor decompositions and applications",
SIAM review 51.3 (2009), pp. 455{500.
[2] Févotte, C., & Idier, J. (2011).
Algorithms for nonnegative matrix factorization with the β-divergence.
Neural computation, 23(9), 2421-2456.
[3] Lee, D. D., & Seung, H. S. (1999).
Learning the parts of objects by non-negative matrix factorization.
Nature, 401(6755), 788-791.
[4] J. Kossai et al. "TensorLy: Tensor Learning in Python",
arxiv preprint (2018)
[5] Jeremy E Cohen. "About notations in multiway array processing",
arXiv preprint arXiv:1511.01306, (2015).
"""
factors = []
nb_modes = len(tensor.shape)
if type(ranks) is int:
ranks = [ranks for i in nb_modes]
elif len(ranks) != nb_modes:
raise err.InvalidRanksException("The number of ranks is different than the dim of the tensor, which is incorrect.") from None
for i in range(nb_modes):
if ranks[i] > tensor.shape[i]:
#raise err.InvalidRanksException("The " + str(i) + "-th mode rank is larger than the shape of the tensor, which is incorrect.") from None
ranks[i] = tensor.shape[i]
#warnings.warn('ignoring MIDI message %s' % msg)
print("The " + str(i) + "-th mode rank was larger than the shape of the tensor, which is incorrect. Set to the shape of the tensor")
if init.lower() == "random":
for mode in range(nb_modes):
if deterministic:
seed = np.random.RandomState(mode * 10)
random_array = seed.rand(tensor.shape[mode], ranks[mode])
else:
random_array = np.random.rand(tensor.shape[mode], ranks[mode])
factors.append(tl.tensor(random_array))
if deterministic:
seed = np.random.RandomState(nb_modes * 10)
core = tl.tensor(seed.rand(np.prod(ranks)).reshape(tuple(ranks)))
else:
core = tl.tensor(np.random.rand(np.prod(ranks)).reshape(tuple(ranks)))
factors = [np.maximum(f, 1e-12) for f in factors]
core = np.maximum(core, 1e-12)
elif init.lower() == "tucker":
if deterministic:
init_core, init_factors = tl_tucker(tensor, ranks, random_state = 8142)
else:
init_core, init_factors = tl_tucker(tensor, ranks)
factors = [np.maximum(tl.abs(f), 1e-12) for f in init_factors]
core = np.maximum(tl.abs(init_core), 1e-12)
elif init.lower() == "custom":
factors = factors_0
core = core_0
if len(factors) != nb_modes:
raise err.CustomNotEngouhFactors("Custom initialization, but not enough factors")
else:
for array in factors:
if array is None:
raise err.CustomNotValidFactors("Custom initialization, but one factor is set to 'None'")
if core is None:
raise err.CustomNotValidCore("Custom initialization, but the core is set to 'None'")
else:
raise err.InvalidInitializationType('Initialization type not understood: ' + init)
return compute_ntd_mu(tensor, ranks, core, factors, n_iter_max=n_iter_max, tol=tol,
sparsity_coefficients = sparsity_coefficients, fixed_modes = fixed_modes,
normalize = normalize, mode_core_norm = mode_core_norm,
verbose=verbose, return_costs=return_costs, beta = beta, deterministic = deterministic)
def compute_ntd_mu(tensor_in, ranks, core_in, factors_in, n_iter_max=100, tol=1e-6,
sparsity_coefficients = [], fixed_modes = [], normalize = [], beta = 2, mode_core_norm=None,
verbose=False, return_costs=False, deterministic=False):
"""
Computes the NTD, with parameters initialized in the `ntd_mu` function.
"""
# initialisation - store the input varaibles
core = core_in.copy()
factors = factors_in.copy()
tensor = tensor_in
norm_tensor = tl.norm(tensor, 2)
# set init if problem
#TODO: set them as warnings
nb_modes = len(tensor.shape)
if sparsity_coefficients == None or len(sparsity_coefficients) != nb_modes + 1:
print("Irrelevant number of sparsity coefficient (different from the number of modes + 1 for the core), they have been set to None.")
sparsity_coefficients = [None for i in range(nb_modes + 1)]
if fixed_modes == None:
fixed_modes = []
if normalize == None or len(normalize) != nb_modes + 1:
print("Irrelevant number of normalization booleans (different from the number of modes + 1 for the core), they have been set to False.")
normalize = [False for i in range(nb_modes + 1)]
if normalize[-1] and (mode_core_norm == None or mode_core_norm < 0 or mode_core_norm >= nb_modes):
print("The core was asked to be normalized, but an invalid mode was specified. Normalization has been set to False.")
normalize[-1] = False
if not normalize[-1] and (mode_core_norm != None and mode_core_norm >= 0 and mode_core_norm < nb_modes):
print("The core was asked NOT to be normalized, but mode_core_norm was set to a valid norm. Is this a mistake?")
# initialisation - declare local varaibles
cost_fct_vals = []
tic = time.time()
toc = []
# initialisation - unfold the tensor according to the modes
#unfolded_tensors = []
#for mode in range(tl.ndim(tensor)):
# unfolded_tensors.append(tl.base.unfold(tensor, mode))
# Iterate over one step of NTD
for iteration in range(n_iter_max):
# One pass of least squares on each updated mode
core, factors, cost = one_ntd_step_mu(tensor, ranks, core, factors, beta, norm_tensor,
fixed_modes, normalize, mode_core_norm)
# Store the computation time
toc.append(time.time() - tic)
cost_fct_vals.append(cost)
if verbose:
if iteration == 0:
print('Initial Obj={}'.format(cost))
else:
if cost_fct_vals[-2] - cost_fct_vals[-1] > 0:
print('Iter={}|Obj={}| Var={} (target is {}).'.format(iteration,
cost_fct_vals[-1], (abs(cost_fct_vals[-2] - cost_fct_vals[-1])/abs(cost_fct_vals[-2])),tol))
else:
# print in red when the reconstruction error is negative (shouldn't happen)
print('\033[91m' + 'Iter={}|Obj={}| Var={} (target is {}).'.format(iteration,
cost_fct_vals[-1], (abs(cost_fct_vals[-2] - cost_fct_vals[-1])/abs(cost_fct_vals[-2])),tol) + '\033[0m')
if iteration > 0 and (abs(cost_fct_vals[-2] - cost_fct_vals[-1])/abs(cost_fct_vals[-2])) < tol:
# Stop condition: relative error between last two iterations < tol
if verbose:
print('Converged to the required tolerance in {} iterations.'.format(iteration))
break
if return_costs:
return core, factors, cost_fct_vals, toc
else:
return core, factors
def one_ntd_step_mu(tensor, ranks, in_core, in_factors, beta, norm_tensor,
fixed_modes, normalize, mode_core_norm):
"""
One step of Multiplicative Uodate applied for every mode of the tensor
and on the core.
"""
# Copy
core = in_core.copy()
factors = in_factors.copy()
# Generating the mode update sequence
modes_list = [mode for mode in range(tl.ndim(tensor)) if mode not in fixed_modes]
for mode in modes_list:
factors[mode] = mu.mu_betadivmin(factors[mode], tl.unfold(tl.tenalg.multi_mode_dot(core, factors, skip = mode), mode), tl.unfold(tensor,mode), beta)
core = mu.mu_tensorial(core, factors, tensor, beta)
if normalize[-1]:
unfolded_core = tl.unfold(core, mode_core_norm)
for idx_mat in range(unfolded_core.shape[0]):
if tl.norm(unfolded_core[idx_mat]) != 0:
unfolded_core[idx_mat] = unfolded_core[idx_mat] / tl.norm(unfolded_core[idx_mat], 2)
core = tl.fold(unfolded_core, mode_core_norm, core.shape)
# # Adding the l1 norm value to the reconstruction error
# sparsity_error = 0
# for index, sparse in enumerate(sparsity_coefficients):
# if sparse:
# if index < len(factors):
# sparsity_error += 2 * (sparse * np.linalg.norm(factors[index], ord=1))
# elif index == len(factors):
# sparsity_error += 2 * (sparse * tl.norm(core, 1))
# else:
# raise NotImplementedError("TODEBUG: Too many sparsity coefficients, should have been raised before.")
reconstructed_tensor = tl.tenalg.multi_mode_dot(core, factors)
cost_fct_val = beta_div.beta_divergence(tensor, reconstructed_tensor, beta)
return core, factors, cost_fct_val