from __future__ import print_function
from __future__ import division

import numpy as np
import faiss
import sys
import time
import warnings

if not sys.warnoptions:
    # suppress pesky PIL EXIF warnings
    warnings.filterwarnings("ignore", message="(Possibly )?corrupt EXIF data.*")
    warnings.filterwarnings("ignore", message="numpy.dtype size changed.*")
    warnings.filterwarnings("ignore", message="numpy.ufunc size changed.*")

def reserve_faiss_gpu_memory(gpu_id=0):
    设定 Faiss 所使用的显卡和显存.
    Reserves around 2.4 Gb memory on Titan Xp.
    `r = reserve_faiss_gpu_memory()`
    To release the memory run `del r`
    Something like 200 Mb will still be hold afterwards.
    res = faiss.StandardGpuResources()
    cfg = faiss.GpuIndexFlatConfig()
    cfg.useFloat16 = False
    cfg.device = gpu_id
    index = faiss.GpuIndexFlatL2(res, 2048, cfg)
    return index, res

class MemoryReserver():
    Faiss 显存管理.
    Faiss memory manager. 
    If not used and another process takes up memory of
    currently used GPU, then the program will crash.
    def __init__(self):
        self.memory_holder = None

    def lock(self, backend):
        # reserve memory for faiss if backend is faiss-gpu
        if backend == 'faiss-gpu':
            print('[INFO]Reserve some memory for FAISS')
            self.memory_holder = reserve_faiss_gpu_memory(gpu_id=0)
            self.memory_holder = None

    def release(self):
        if self.memory_holder is not None:
            print('[INFO]Release memory for FAISS')
            self.memory_holder = None

#PCA + Whitening + L2Normalization
def preprocess_features(x, x2=None, d=256):
    Calculate PCA + Whitening + L2 normalization for each vector
        x (ndarray): N x D, where N is number of vectors, D - dimensionality
        x2 (ndarray): optional, if not None apply PCA+Whitening learned on x to x2.
        d (int): number of output dimensions (how many principal components to use).
        transformed [N x d] matrix xt .
    n, orig_d = x.shape
    pcaw = faiss.PCAMatrix(d_in=orig_d, d_out=d, eigen_power=-0.5, random_rotation=False)
    assert pcaw.is_trained
    print('Performing PCA + whitening')
    x = pcaw.apply_py(x)
    print('x.shape after PCA + whitening:', x.shape)
    l2normalization = faiss.NormalizationTransform(d, 2.0)
    print('Performing L2 normalization')
    x = l2normalization.apply_py(x)
    if x2 is not None:
        print('Perform PCA + whitening for x2')
        x2 = pcaw.apply_py(x2)
        x2 = l2normalization.apply_py(x2)
        return x, x2
        return x

def train_kmeans(x, num_clusters=1000, gpu_ids=None, niter=100, nredo=1, verbose=0):
    Runs k-means clustering on one or several GPUs
    assert np.all(~np.isnan(x)), 'x contains NaN'
    assert np.all(np.isfinite(x)), 'x contains Inf'
    if isinstance(gpu_ids, int):
        gpu_ids = [gpu_ids]
    assert gpu_ids is None or len(gpu_ids)

    d = x.shape[1]
    kmeans = faiss.Clustering(d, num_clusters)
    kmeans.verbose = bool(verbose)
    kmeans.niter = niter
    kmeans.nredo = nredo

    # otherwise the kmeans implementation sub-samples the training set
    kmeans.max_points_per_centroid = 10000000

    if gpu_ids is not None:
        res = [faiss.StandardGpuResources() for i in gpu_ids]

        flat_config = []
        for i in gpu_ids:
            cfg = faiss.GpuIndexFlatConfig()
            cfg.useFloat16 = False
            cfg.device = i

        if len(gpu_ids) == 1:
            index = faiss.GpuIndexFlatL2(res[0], d, flat_config[0])
            indexes = [faiss.GpuIndexFlatL2(res[i], d, flat_config[i])
                       for i in range(len(gpu_ids))]
            index = faiss.IndexProxy()
            for sub_index in indexes:
        index = faiss.IndexFlatL2(d)

    # perform the training
    kmeans.train(x, index)
    centroids = faiss.vector_float_to_array(kmeans.centroids)

    objective = faiss.vector_float_to_array(kmeans.obj)
    #print("[INFO]Final objective: %.4g" % objective[-1])

    return centroids.reshape(num_clusters, d)

def compute_cluster_assignment(centroids, x):
    assert centroids is not None, "should train before assigning"
    d = centroids.shape[1]
    index = faiss.IndexFlatL2(d)
    distances, labels =, 1)
    return labels.ravel()

def do_clustering(features, num_clusters, gpu_ids=None,
                  num_pca_components=None, niter=100, nredo=1, verbose=0):
    print('[INFO]FAISS: using GPUs {}'.format(gpu_ids))
    features = np.asarray(features.reshape(features.shape[0], -1), dtype=np.float32)

    if num_pca_components is not None:
        features = preprocess_features(features, d=num_pca_components,
                                       niter=niter, nredo=nredo, verbose=verbose)

    print('[INFO]FAISS: clustering...')
    t0 = time.time()
    centroids = train_kmeans(features, num_clusters, gpu_ids=gpu_ids, verbose=1)
    labels = compute_cluster_assignment(centroids, features)
    t1 = time.time()
     print('[INFO]FAISS: Clustering total elapsed time: %.3f m' % ((t1 - t0) / 60.0))
    return labels

def find_nearest_neighbors(x, queries=None, k=5, gpu_id=None):
    Find k nearest neighbors for each of the n examples.
    Distances are computed using Squared Euclidean distance metric.
    x (ndarray): N examples to search within. [N x d].
    gpu_id (int): use CPU if None else use GPU with the specified id.
    queries (ndarray): find nearest neigbor for each query example. [M x d] matrix
        If None than find k nearest neighbors for each row of x
        (excluding self exampels).
    k (int): number of nearest neighbors to find.
    I (ndarray): Indices of the nearest neighnpors. [M x k]
    distances (ndarray): Distances to the nearest neighbors. [M x k]
    if gpu_id is not None and not isinstance(gpu_id, int):
        raise ValueError('gpu_id must be None or int')
    x = np.asarray(x.reshape(x.shape[0], -1), dtype=np.float32)
    remove_self = False # will have queries in the search results?
    if queries is None:
        remove_self = True
        queries = x
        k += 1

    d = x.shape[1]

    tic = time.time()
    if gpu_id is None:
        print('[INFO]FAISS: cpu::find {} nearest neighbors'\
                     .format(k - int(remove_self)))
        index = faiss.IndexFlatL2(d)
        print('[INFO]FAISS: gpu[{}]::find {} nearest neighbors'\
                     .format(gpu_id, k - int(remove_self)))
        cfg = faiss.GpuIndexFlatConfig()
        cfg.useFloat16 = False
        cfg.device = gpu_id

        flat_config = [cfg]
        resources = [faiss.StandardGpuResources()]
        index = faiss.GpuIndexFlatL2(resources[0], d, flat_config[0])
    distances, nns =, k)
    if remove_self:
        for i in range(len(nns)):
            indices = np.nonzero(nns[i, :] != i)[0]
            if len(indices) > k - 1:
                indices = indices[:-1]
            nns[i, :-1] = nns[i, indices]
            distances[i, :-1] = distances[i, indices]
        nns = nns[:, :-1]
        distances = distances[:, :-1]
    print('[INFO]FAISS: Neighbors search total elapsed time: {:.2f} sec'.format(time.time() - tic))
    return nns, distances

def example(size=30000, k=10, num_pca_components=256):
    gpu_ids = [0]

    x = np.random.rand(size, 512)
    x = x.reshape(x.shape[0], -1).astype('float32')
    x, _ = preprocess_features(x, x, d=num_pca_components)

    t0 = time.time()
    centroids = train_kmeans(x, k, gpu_ids=gpu_ids)
    labels = compute_cluster_assignment(centroids, x)
    print('[INFO]centroids.shape:', centroids.shape)
    print('[INFO]labels.type:', labels.__class__, labels.dtype)
    print('[INFO]labels.shape:', labels.shape)
    t1 = time.time()

    print("[INFO]total runtime: %.2f s" % (t1 - t0))

def test_knn_search(size=10000, gpu_id=None):
    x = np.random.rand(size, 512)
    x = x.reshape(x.shape[0], -1).astype('float32')
    d = x.shape[1]

    tic = time.time()
    if gpu_id is None:
        index = faiss.IndexFlatL2(d)
        cfg = faiss.GpuIndexFlatConfig()
        cfg.useFloat16 = False
        cfg.device = gpu_id

        flat_config = [cfg]
        resources = [faiss.StandardGpuResources()]
        index = faiss.GpuIndexFlatL2(resources[0], d, flat_config[0])
    print('[INFO]Index built in {} sec'.format(time.time() - tic))
    distances, I =, 21)
    print('[INFO]Searched in {} sec'.format(time.time() - tic))
    print('[INFO]distance shape', distances.shape)
    print('[INFO]index shape', I.shape)
    print('[INFO]top5 distance', distances[:5])
    print('[INFO]top5 index', I[:5])

if __name__ == '__main__':
    example(size=100000, k=3, num_pca_components=32)
    test_knn_search(size=100000, gpu_id=5)


Last modification:May 7th, 2021 at 01:38 pm