Source code for pytudes._2021.miscellany.machine_learning.linear_algebra

import numpy as np
import scipy.spatial


[docs] def cosine_similarity(A: np.ndarray, B: np.ndarray) -> np.ndarray: """For samples x features matrixes or 1 x features column vectors [ [cosine_similarity(A[0], B[0]), ... ,cosine_similarity(A[0], B[len(B)-1])], ..., [cosine_similarity(A[len(A)-1], B[len(B]-1), ... ,cosine_similarity(A[len(A)-1], B[len(B)-1])], ] [ [A_1 . B_1, ... ,A_1 . B_m], ... [A_n . B_1, ... ,A_n . B_m], ] Args: A: B: Examples: >>> x = np.array([3, 45, 7, 2] ) >>> y = np.array([2,54,13,15] ) >>> cosine_similarity(x, y)[0] 0.9722842517123499 >>> assert cosine_similarity(x, y) == cosine_similarity(y, x) >>> cosine_similarity(x, x)[0] 1.0 >>> cosine_similarity(x, -x)[0] -1.0 >>> cosine_similarity(x, np.zeros(x.shape))[0] 0.0 >>> np.testing.assert_almost_equal(cosine_similarity(x, y), 1 - scipy.spatial.distance.cosine(x, y)) >>> a, b = np.array([x,y]), np.array([y,x]) >>> cosine_similarity(a, a) array([[1. , 0.97228425], [0.97228425, 1. ]]) >>> cosine_similarity(a, b) array([[0.97228425, 1. ], [1. , 0.97228425]]) >>> np.testing.assert_almost_equal(cosine_similarity(a, b), 1 - scipy.spatial.distance.cdist(a, b, "cosine")) """ # Dot product w/ normalized magnitudes # Note: # For *vectors* A,B: # - euclidean_length(B).T == euclidean_length(B) # For *matrices* A,B: # - length vectors multiplication broadcasting will produce a len(A) x len(B) # lengths matrix s.t. # `lengths[i,j] == euclidean_length(A[i]) * euclidean_length(B[j])` # <=> multiplied length magnitudes corresponding to the element (A @ B.T)[i,j] # lengths matrix corresponding to the multiplied lengths of corresponding column # vector dot product elements in A @ B.T similarity = A @ B.T / (euclidean_length(A) * euclidean_length(B).T) return np.nan_to_num(similarity) # Map NaN to 0
[docs] def euclidean_length(arr: np.ndarray) -> np.ndarray: """ Args: arr: Examples: >>> x = np.array([3, 45, 7, 2] ) >>> y = np.array([2,54,13,15] ) >>> euclidean_length(x)[0] 45.68369512200168 >>> euclidean_length(y)[0] 57.56735185849702 >>> euclidean_length(np.array([x,y])) array([[45.68369512], [57.56735186]]) """ return np.linalg.norm(arr, keepdims=True, axis=len(arr.shape) - 1)