给定若干向量(query vectors),在向量库(keys vectors)中找到和它欧式距离相近的 top 个向量.

1. W1 - 矩阵

import numpy as np
from scipy.spatial.distance import cdist

#
query_matrix = '' #NxC
keys_matrix = '' #NxC
keys_labels = '' #Nx1
topk = 100 #
dist = cdist(query_matrix, keys_matrix,metric='euclidean')
nearest = keys_labels[np.argsort(dist,axis=1)[:,:topk]] 

2. W2 - 向量

from typing import Dict, List
import numpy as np 
import scipy

def vector_distance(
    vec1: np.ndarray,
    vec2: np.ndarray,
    method: str = "l2",
    l2_normalize: bool = True,
) -> float:
    """
    Computes the distance between 2 vectors
    Args:
        vec1: First vector between which the distance will be computed
        vec2: Second vector
        method: Type of distance to be computed, e.g. "l1" or "l2"
        l2_normalize: Flag indicating whether the vectors should be normalized
        to be of unit length before the distance between them is computed
    Returns: Distance between the 2 input vectors
    """
    # Pre-processing
    if l2_normalize:
        vec1 = vec1 / np.linalg.norm(vec1, 2)
        vec2 = vec2 / np.linalg.norm(vec2, 2)

    # Distance computation
    vecDiff = vec1 - vec2
    method = method.lower()
    if method == "l1":
        dist = sum(abs(vecDiff))
    elif method == "l2":
        dist = np.linalg.norm(vecDiff, 2)
    elif method == "normalizedl2":
        a = vec1 / np.linalg.norm(vec1, 2)
        b = vec2 / np.linalg.norm(vec2, 2)
        dist = np.linalg.norm(a - b, 2)
    elif method == "cosine":
        dist = scipy.spatial.distance.cosine(vec1, vec2)
    elif method == "correlation":
        dist = scipy.spatial.distance.correlation(vec1, vec2)
    elif method == "chisquared":
        dist = scipy.chiSquared(vec1, vec2)
    elif method == "normalizedchisquared":
        a = vec1 / sum(vec1)
        b = vec2 / sum(vec2)
        dist = scipy.chiSquared(a, b)
    elif method == "hamming":
        dist = scipy.spatial.distance.hamming(vec1 > 0, vec2 > 0)
    else:
        raise Exception("Distance method unknown: " + method)
    return dist


def compute_distances(
  query_feature: np.array, feature_dict: dict, method: str = "l2"
) -> List:
    """
    Computes the distance between query_image and all the images present in
       feature_dict (query_image included)
    Args:
        query_feature: Features for the query image
        feature_dict: Dictionary of features, where key = image path and value = array of floats
        method: distance method
    Returns: List of (image path, distance) pairs.
    """
    distances = []
    for im_path, feature in feature_dict.items():
        distance = vector_distance(query_feature, feature, method)
        distances.append((im_path, distance))
    return distances

3. 向量计算 vs 最近邻

import numpy as np 
from sklearn.neighbors import NearestNeighbors

# Get random query image
query_im_path = '/path/to/images'
query_feature = train_features[query_im_path]
assert len(query_feature) == 512 #特征维度

#
valid_features = dict() # key: image_path, value: feature
valid_image_paths = list() #
#构建NN
#将所有特征归一化(到单位长度)
valid_features_list = np.array(list(valid_features.values()))
valid_features_list /= np.linalg.norm(valid_features_list, axis=1)[:,None]
#构建NN对象
nn = NearestNeighbors(algorithm='auto', metric='euclidean', n_neighbors=min(100,len(valid_features_list)))
nn.fit(valid_features_list)
#NearestNeighbors(algorithm='auto', leaf_size=30, metric='euclidean',
#                 metric_params=None, n_jobs=None, n_neighbors=100, p=2,
#                 radius=1.0)

#逐一暴力搜索,查找最匹配项
bf_distances_and_paths = compute_distances(query_feature, valid_features)
bf_distances = [d for (p,d) in bf_distances_and_paths]
bf_closest_match_path = bf_distances_and_paths[np.argmin(bf_distances)][0]

#采用NN(nearest-neighbor search),查找最匹配项
query_feature /= np.linalg.norm(query_feature, 2)
query_feature = np.reshape(query_feature, (-1, len(query_feature)))
approx_distances, approx_im_indices = nn.kneighbors(query_feature)
approx_im_paths = [valid_image_paths[i] for i in approx_im_indices[0]]

#对比
rank = np.where(np.array(approx_im_paths) == bf_closest_match_path)[0] 
assert len(rank) == 1
assert approx_im_paths[int(rank)] == bf_closest_match_path
Last modification:May 7th, 2021 at 10:42 am