From c5975cda72981329af97df36b4986ae6f65550e2 Mon Sep 17 00:00:00 2001 From: Matthijs Douze Date: Wed, 16 Dec 2020 01:17:59 -0800 Subject: [PATCH] PQ4 fast scan benchmarks (#1555) Summary: Code + scripts for Faiss benchmarks around the Fast scan codes. Pull Request resolved: https://github.com/facebookresearch/faiss/pull/1555 Test Plan: buck test //faiss/tests/:test_refine Reviewed By: wickedfoo Differential Revision: D25546505 Pulled By: mdouze fbshipit-source-id: 902486b7f47e36221a2671d124df8c114f25db58 --- benchs/bench_all_ivf/bench_all_ivf.py | 221 ++++-- benchs/bench_all_ivf/bench_kmeans.py | 4 +- benchs/bench_all_ivf/cmp_with_scann.py | 308 +++++++++ benchs/bench_all_ivf/datasets.py | 198 ++---- benchs/bench_all_ivf/make_groundtruth.py | 27 + benchs/bench_all_ivf/parse_bench_all_ivf.py | 498 ++++++++++---- .../bench_all_ivf/run_on_cluster_generic.bash | 632 ++++++++++++++---- contrib/README.md | 4 + contrib/datasets.py | 11 +- contrib/exhaustive_search.py | 1 + contrib/factory_tools.py | 100 +++ contrib/vecs_io.py | 14 + faiss/AutoTune.cpp | 67 +- faiss/AutoTune.h | 9 +- faiss/CMakeLists.txt | 2 + faiss/IndexFlat.cpp | 138 ---- faiss/IndexFlat.h | 41 -- faiss/IndexIVF.cpp | 141 +++- faiss/IndexIVF.h | 12 +- faiss/IndexIVFFlat.cpp | 3 +- faiss/IndexIVFFlat.h | 3 +- faiss/IndexIVFPQFastScan.cpp | 89 ++- faiss/IndexIVFPQR.cpp | 14 +- faiss/IndexIVFPQR.h | 3 +- faiss/IndexPQ.cpp | 49 +- faiss/IndexPQFastScan.cpp | 3 + faiss/IndexPQFastScan.h | 1 + faiss/IndexPreTransform.cpp | 47 ++ faiss/IndexPreTransform.h | 2 + faiss/IndexRefine.cpp | 253 +++++++ faiss/IndexRefine.h | 73 ++ faiss/IndexScalarQuantizer.cpp | 4 +- faiss/IndexScalarQuantizer.h | 2 +- faiss/impl/AuxIndexStructures.h | 3 +- faiss/impl/ProductQuantizer.h | 2 + faiss/impl/ScalarQuantizer.cpp | 6 +- faiss/impl/index_read.cpp | 17 +- faiss/impl/index_write.cpp | 7 +- faiss/index_factory.cpp | 103 ++- faiss/python/__init__.py | 21 +- faiss/python/swigfaiss.swig | 27 +- faiss/utils/quantize_lut.cpp | 3 +- tests/test_autotune.py | 47 ++ tests/test_factory.py | 43 +- tests/test_index.py | 2 +- tests/test_index_accuracy.py | 8 +- tests/test_refine.py | 53 ++ 47 files changed, 2463 insertions(+), 853 deletions(-) mode change 100755 => 100644 benchs/bench_all_ivf/bench_all_ivf.py mode change 100755 => 100644 benchs/bench_all_ivf/bench_kmeans.py create mode 100644 benchs/bench_all_ivf/cmp_with_scann.py mode change 100755 => 100644 benchs/bench_all_ivf/datasets.py create mode 100644 benchs/bench_all_ivf/make_groundtruth.py mode change 100755 => 100644 benchs/bench_all_ivf/parse_bench_all_ivf.py create mode 100644 contrib/factory_tools.py create mode 100644 faiss/IndexRefine.cpp create mode 100644 faiss/IndexRefine.h create mode 100644 tests/test_autotune.py create mode 100644 tests/test_refine.py diff --git a/benchs/bench_all_ivf/bench_all_ivf.py b/benchs/bench_all_ivf/bench_all_ivf.py old mode 100755 new mode 100644 index 429e3f911d..cce8e0b342 --- a/benchs/bench_all_ivf/bench_all_ivf.py +++ b/benchs/bench_all_ivf/bench_all_ivf.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python2 - # Copyright (c) Facebook, Inc. and its affiliates. # # This source code is licensed under the MIT license found in the @@ -8,12 +6,15 @@ import os import sys import time +import pdb import numpy as np import faiss import argparse import datasets from datasets import sanitize + + ###################################################### # Command-line parsing ###################################################### @@ -34,8 +35,8 @@ def aa(*args, **kwargs): group = parser.add_argument_group('index consturction') aa('--indexkey', default='HNSW32', help='index_factory type') -aa('--efConstruction', default=200, type=int, - help='HNSW construction factor') +aa('--by_residual', default=-1, type=int, + help="set if index should use residuals (default=unchanged)") aa('--M0', default=-1, type=int, help='size of base level') aa('--maxtrain', default=256 * 256, type=int, help='maximum number of training points (0 to set automatically)') @@ -54,6 +55,8 @@ def aa(*args, **kwargs): group = parser.add_argument_group('searching') aa('--k', default=100, type=int, help='nb of nearest neighbors') +aa('--inter', default=False, action='store_true', + help='use intersection measure instead of 1-recall as metric') aa('--searchthreads', default=-1, type=int, help='nb of threads to use at search time') aa('--searchparams', nargs='+', default=['autotune'], @@ -64,7 +67,7 @@ def aa(*args, **kwargs): help='set max value for autotune variables format "var:val" (exclusive)') aa('--autotune_range', default=[], nargs='*', help='set complete autotune range, format "var:val1,val2,..."') -aa('--min_test_duration', default=0, type=float, +aa('--min_test_duration', default=3.0, type=float, help='run test at least for so long to avoid jitter') args = parser.parse_args() @@ -79,64 +82,126 @@ def aa(*args, **kwargs): # Load dataset ###################################################### -xt, xb, xq, gt = datasets.load_data( +ds = datasets.load_dataset( dataset=args.db, compute_gt=args.compute_gt) -print("dataset sizes: train %s base %s query %s GT %s" % ( - xt.shape, xb.shape, xq.shape, gt.shape)) +print(ds) -nq, d = xq.shape -nb, d = xb.shape +nq, d = ds.nq, ds.d +nb, d = ds.nq, ds.d ###################################################### # Make index ###################################################### +def unwind_index_ivf(index): + if isinstance(index, faiss.IndexPreTransform): + assert index.chain.size() == 1 + vt = index.chain.at(0) + index_ivf, vt2 = unwind_index_ivf(faiss.downcast_index(index.index)) + assert vt2 is None + return index_ivf, vt + if hasattr(faiss, "IndexRefine") and isinstance(index, faiss.IndexRefine): + return unwind_index_ivf(faiss.downcast_index(index.base_index)) + if isinstance(index, faiss.IndexIVF): + return index, None + else: + return None, None + + if args.indexfile and os.path.exists(args.indexfile): print("reading", args.indexfile) index = faiss.read_index(args.indexfile) - if isinstance(index, faiss.IndexPreTransform): - index_ivf = faiss.downcast_index(index.index) - else: - index_ivf = index - assert isinstance(index_ivf, faiss.IndexIVF) + index_ivf, vec_transform = unwind_index_ivf(index) + if vec_transform is None: vec_transform = lambda x: x - assert isinstance(index_ivf, faiss.IndexIVF) else: print("build index, key=", args.indexkey) - index = faiss.index_factory(d, args.indexkey) + index = faiss.index_factory( + d, args.indexkey, faiss.METRIC_L2 if ds.metric == "L2" else + faiss.METRIC_INNER_PRODUCT + ) - if isinstance(index, faiss.IndexPreTransform): - index_ivf = faiss.downcast_index(index.index) - vec_transform = index.chain.at(0).apply_py + index_ivf, vec_transform = unwind_index_ivf(index) + if vec_transform is None: + vec_transform = lambda x: x + else: + vec_transform = faiss.downcast_VectorTransform(vec_transform) + + if args.by_residual != -1: + by_residual = args.by_residual == 1 + print("setting by_residual = ", by_residual) + index_ivf.by_residual # check if field exists + index_ivf.by_residual = by_residual + + + if index_ivf: + print("Update add-time parameters") + # adjust default parameters used at add time for quantizers + # because otherwise the assignment is inaccurate + quantizer = faiss.downcast_index(index_ivf.quantizer) + if isinstance(quantizer, faiss.IndexRefine): + print(" update quantizer k_factor=", quantizer.k_factor, end=" -> ") + quantizer.k_factor = 32 if index_ivf.nlist < 1e6 else 64 + print(quantizer.k_factor) + base_index = faiss.downcast_index(quantizer.base_index) + if isinstance(base_index, faiss.IndexIVF): + print(" update quantizer nprobe=", base_index.nprobe, end=" -> ") + base_index.nprobe = ( + 16 if base_index.nlist < 1e5 else + 32 if base_index.nlist < 4e6 else + 64) + print(base_index.nprobe) + elif isinstance(quantizer, faiss.IndexHNSW): + print(" update quantizer efSearch=", quantizer.hnsw.efSearch, end=" -> ") + quantizer.hnsw.efSearch = 40 if index_ivf.nlist < 4e6 else 64 + print(quantizer.hnsw.efSearch) + + if index_ivf: + index_ivf.verbose = True + index_ivf.quantizer.verbose = True + index_ivf.cp.verbose = True else: - index_ivf = index - vec_transform = lambda x:x - assert isinstance(index_ivf, faiss.IndexIVF) - index_ivf.verbose = True - index_ivf.quantizer.verbose = True - index_ivf.cp.verbose = True + index.verbose = True maxtrain = args.maxtrain if maxtrain == 0: if 'IMI' in args.indexkey: maxtrain = int(256 * 2 ** (np.log2(index_ivf.nlist) / 2)) - else: + elif index_ivf: maxtrain = 50 * index_ivf.nlist + else: + # just guess... + maxtrain = 256 * 100 + maxtrain = max(maxtrain, 256 * 100) print("setting maxtrain to %d" % maxtrain) - args.maxtrain = maxtrain - xt2 = sanitize(xt[:args.maxtrain]) - assert np.all(np.isfinite(xt2)) + try: + xt2 = ds.get_train(maxtrain=maxtrain) + except NotImplementedError: + print("No training set: training on database") + xt2 = ds.get_database()[:maxtrain] print("train, size", xt2.shape) + assert np.all(np.isfinite(xt2)) + + if (isinstance(vec_transform, faiss.OPQMatrix) and + isinstance(index_ivf, faiss.IndexIVFPQFastScan)): + print(" Forcing OPQ training PQ to PQ4") + ref_pq = index_ivf.pq + training_pq = faiss.ProductQuantizer( + ref_pq.d, ref_pq.M, ref_pq.nbits + ) + vec_transform.pq + vec_transform.pq = training_pq + if args.get_centroids_from == '': @@ -147,7 +212,8 @@ def aa(*args, **kwargs): if args.train_on_gpu: print("add a training index on GPU") - train_index = faiss.index_cpu_to_all_gpus(faiss.IndexFlatL2(d)) + train_index = faiss.index_cpu_to_all_gpus( + faiss.IndexFlatL2(index_ivf.d)) index_ivf.clustering_index = train_index else: @@ -158,13 +224,15 @@ def aa(*args, **kwargs): centroids = centroids.reshape(-1, d) print(" centroid table shape", centroids.shape) - if isinstance(index, faiss.IndexPreTransform): + if isinstance(vec_transform, faiss.VectorTransform): print(" training vector transform") - assert index.chain.size() == 1 - vt = index.chain.at(0) - vt.train(xt2) + vec_transform.train(xt2) print(" transform centroids") - centroids = vt.apply_py(centroids) + centroids = vec_transform.apply_py(centroids) + + if not index_ivf.quantizer.is_trained: + print(" training quantizer") + index_ivf.quantizer.train(centroids) print(" add centroids to quantizer") index_ivf.quantizer.add(centroids) @@ -177,12 +245,16 @@ def aa(*args, **kwargs): print("adding") t0 = time.time() if args.add_bs == -1: - index.add(sanitize(xb)) + index.add(sanitize(ds.get_database())) else: - for i0 in range(0, nb, args.add_bs): - i1 = min(nb, i0 + args.add_bs) - print(" adding %d:%d / %d" % (i0, i1, nb)) - index.add(sanitize(xb[i0:i1])) + i0 = 0 + for xblock in ds.database_iterator(bs=args.add_bs): + i1 = i0 + len(xblock) + print(" adding %d:%d / %d [%.3f s, RSS %d kiB] " % ( + i0, i1, ds.nb, time.time() - t0, + faiss.get_mem_usage_kb())) + index.add(xblock) + i0 = i1 print(" add in %.3f s" % (time.time() - t0)) if args.indexfile: @@ -211,39 +283,65 @@ def aa(*args, **kwargs): # Index is ready ############################################################# -xq = sanitize(xq) +xq = sanitize(ds.get_queries()) +gt = ds.get_groundtruth(k=args.k) +assert gt.shape[1] == args.k, pdb.set_trace() if args.searchthreads != -1: print("Setting nb of threads to", args.searchthreads) faiss.omp_set_num_threads(args.searchthreads) - ps = faiss.ParameterSpace() ps.initialize(index) - parametersets = args.searchparams -header = '%-40s R@1 R@10 R@100 time(ms/q) nb distances #runs' % "parameters" -def eval_setting(index, xq, gt, min_time): +if args.inter: + header = ( + '%-40s inter@%3d time(ms/q) nb distances #runs' % + ("parameters", args.k) + ) +else: + + header = ( + '%-40s R@1 R@10 R@100 time(ms/q) nb distances #runs' % + "parameters" + ) + +def compute_inter(a, b): + nq, rank = a.shape + ninter = sum( + np.intersect1d(a[i, :rank], b[i, :rank]).size + for i in range(nq) + ) + return ninter / a.size + + + +def eval_setting(index, xq, gt, k, inter, min_time): nq = xq.shape[0] ivf_stats = faiss.cvar.indexIVF_stats ivf_stats.reset() nrun = 0 t0 = time.time() while True: - D, I = index.search(xq, 100) + D, I = index.search(xq, k) nrun += 1 t1 = time.time() if t1 - t0 > min_time: break ms_per_query = ((t1 - t0) * 1000.0 / nq / nrun) - for rank in 1, 10, 100: - n_ok = (I[:, :rank] == gt[:, :1]).sum() - print("%.4f" % (n_ok / float(nq)), end=' ') - print(" %8.3f " % ms_per_query, end=' ') + if inter: + rank = k + inter_measure = compute_inter(gt[:, :rank], I[:, :rank]) + print("%.4f" % inter_measure, end=' ') + else: + for rank in 1, 10, 100: + n_ok = (I[:, :rank] == gt[:, :1]).sum() + print("%.4f" % (n_ok / float(nq)), end=' ') + print(" %9.5f " % ms_per_query, end=' ') print("%12d " % (ivf_stats.ndis / nrun), end=' ') print(nrun) @@ -269,15 +367,20 @@ def eval_setting(index, xq, gt, min_time): pr = ps.add_range(k) faiss.copy_array_to_vector(vals, pr.values) - # setup the Criterion object: optimize for 1-R@1 - crit = faiss.OneRecallAtRCriterion(nq, 1) + # setup the Criterion object + if args.inter: + print("Optimize for intersection @ ", args.k) + crit = faiss.IntersectionCriterion(nq, args.k) + else: + print("Optimize for 1-recall @ 1") + crit = faiss.OneRecallAtRCriterion(nq, 1) # by default, the criterion will request only 1 NN - crit.nnn = 100 + crit.nnn = args.k crit.set_groundtruth(None, gt.astype('int64')) # then we let Faiss find the optimal parameters by itself - print("exploring operating points") + print("exploring operating points, %d threads" % faiss.omp_get_max_threads()); ps.display() t0 = time.time() @@ -286,17 +389,19 @@ def eval_setting(index, xq, gt, min_time): op.display() + print("Re-running evaluation on selected OPs") print(header) opv = op.optimal_pts + maxw = max(max(len(opv.at(i).key) for i in range(opv.size())), 40) for i in range(opv.size()): opt = opv.at(i) ps.set_index_parameters(index, opt.key) - print("%-40s " % opt.key, end=' ') + print(opt.key.ljust(maxw), end=' ') sys.stdout.flush() - eval_setting(index, xq, gt, args.min_test_duration) + eval_setting(index, xq, gt, args.k, args.inter, args.min_test_duration) else: print(header) @@ -305,4 +410,4 @@ def eval_setting(index, xq, gt, min_time): sys.stdout.flush() ps.set_index_parameters(index, param) - eval_setting(index, xq, gt, args.min_test_duration) + eval_setting(index, xq, gt, args.k, args.inter, args.min_test_duration) diff --git a/benchs/bench_all_ivf/bench_kmeans.py b/benchs/bench_all_ivf/bench_kmeans.py old mode 100755 new mode 100644 index 4850844717..43184ec59b --- a/benchs/bench_all_ivf/bench_kmeans.py +++ b/benchs/bench_all_ivf/bench_kmeans.py @@ -1,11 +1,8 @@ -#!/usr/bin/env python2 - # Copyright (c) Facebook, Inc. and its affiliates. # # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -from __future__ import print_function import os import numpy as np import faiss @@ -98,6 +95,7 @@ def aa(*args, **kwargs): clustering.max_points_per_centroid = 10**6 clustering.min_points_per_centroid = 1 +centroids = None for iter0 in range(0, args.niter, args.eval_freq): iter1 = min(args.niter, iter0 + args.eval_freq) diff --git a/benchs/bench_all_ivf/cmp_with_scann.py b/benchs/bench_all_ivf/cmp_with_scann.py new file mode 100644 index 0000000000..69909e8f31 --- /dev/null +++ b/benchs/bench_all_ivf/cmp_with_scann.py @@ -0,0 +1,308 @@ +import time +import sys +import os +import argparse + +import numpy as np + + +def eval_recalls(name, I, gt, times): + k = I.shape[1] + s = "%-40s recall" % name + nq = len(gt) + for rank in 1, 10, 100, 1000: + if rank > k: + break + recall = (I[:, :rank] == gt[:, :1]).sum() / nq + s += "@%d: %.4f " % (rank, recall) + s += "time: %.4f s (± %.4f)" % (np.mean(times), np.std(times)) + print(s) + +def eval_inters(name, I, gt, times): + k = I.shape[1] + s = "%-40s inter" % name + nq = len(gt) + for rank in 1, 10, 100, 1000: + if rank > k: + break + ninter = 0 + for i in range(nq): + ninter += np.intersect1d(I[i, :rank], gt[i, :rank]).size + inter = ninter / (nq * rank) + s += "@%d: %.4f " % (rank, inter) + s += "time: %.4f s (± %.4f)" % (np.mean(times), np.std(times)) + print(s) + + +def main(): + + parser = argparse.ArgumentParser() + + def aa(*args, **kwargs): + group.add_argument(*args, **kwargs) + + group = parser.add_argument_group('dataset options') + + aa('--db', default='deep1M', help='dataset') + aa('--measure', default="1-recall", + help="perf measure to use: 1-recall or inter") + aa('--download', default=False, action="store_true") + aa('--lib', default='faiss', help='library to use (faiss or scann)') + aa('--thenscann', default=False, action="store_true") + aa('--base_dir', default='/checkpoint/matthijs/faiss_improvements/cmp_ivf_scan_2') + + group = parser.add_argument_group('searching') + aa('--k', default=10, type=int, help='nb of nearest neighbors') + aa('--pre_reorder_k', default="0,10,100,1000", help='values for reorder_k') + aa('--nprobe', default="1,2,5,10,20,50,100,200", help='values for nprobe') + aa('--nrun', default=5, type=int, help='nb of runs to perform') + args = parser.parse_args() + + print("args:", args) + pre_reorder_k_tab = [int(x) for x in args.pre_reorder_k.split(',')] + nprobe_tab = [int(x) for x in args.nprobe.split(',')] + + os.system('echo -n "nb processors "; ' + 'cat /proc/cpuinfo | grep ^processor | wc -l; ' + 'cat /proc/cpuinfo | grep ^"model name" | tail -1') + + cache_dir = args.base_dir + "/" + args.db + "/" + k = args.k + nrun = args.nrun + + if args.lib == "faiss": + # prepare cache + import faiss + from datasets import load_dataset + + ds = load_dataset(args.db, download=args.download) + print(ds) + if not os.path.exists(cache_dir + "xb.npy"): + # store for SCANN + os.system(f"rm -rf {cache_dir}; mkdir -p {cache_dir}") + tosave = dict( + # xt = ds.get_train(10), + xb = ds.get_database(), + xq = ds.get_queries(), + gt = ds.get_groundtruth() + ) + for name, v in tosave.items(): + fname = cache_dir + "/" + name + ".npy" + print("save", fname) + np.save(fname, v) + + open(cache_dir + "metric", "w").write(ds.metric) + + name1_to_metric = { + "IP": faiss.METRIC_INNER_PRODUCT, + "L2": faiss.METRIC_L2 + } + + index_fname = cache_dir + "index.faiss" + if not os.path.exists(index_fname): + index = faiss_make_index( + ds.get_database(), name1_to_metric[ds.metric], index_fname) + else: + index = faiss.read_index(index_fname) + + xb = ds.get_database() + xq = ds.get_queries() + gt = ds.get_groundtruth() + + faiss_eval_search( + index, xq, xb, nprobe_tab, pre_reorder_k_tab, k, gt, + nrun, args.measure + ) + + if args.lib == "scann": + from scann.scann_ops.py import scann_ops_pybind + + dataset = {} + for kn in "xb xq gt".split(): + fname = cache_dir + "/" + kn + ".npy" + print("load", fname) + dataset[kn] = np.load(fname) + name1_to_name2 = { + "IP": "dot_product", + "L2": "squared_l2" + } + distance_measure = name1_to_name2[open(cache_dir + "metric").read()] + + xb = dataset["xb"] + xq = dataset["xq"] + gt = dataset["gt"] + + scann_dir = cache_dir + "/scann1.1.1_serialized" + if os.path.exists(scann_dir + "/scann_config.pb"): + searcher = scann_ops_pybind.load_searcher(scann_dir) + else: + searcher = scann_make_index(xb, distance_measure, scann_dir, 0) + + scann_dir = cache_dir + "/scann1.1.1_serialized_reorder" + if os.path.exists(scann_dir + "/scann_config.pb"): + searcher_reo = scann_ops_pybind.load_searcher(scann_dir) + else: + searcher_reo = scann_make_index(xb, distance_measure, scann_dir, 100) + + scann_eval_search( + searcher, searcher_reo, + xq, xb, nprobe_tab, pre_reorder_k_tab, k, gt, + nrun, args.measure + ) + + if args.lib != "scann" and args.thenscann: + # just append --lib scann, that will override the previous cmdline + # options + cmdline = " ".join(sys.argv) + " --lib scann" + cmdline = ( + ". ~/anaconda3/etc/profile.d/conda.sh ; " + + "conda activate scann_1.1.1; " + "python -u " + cmdline) + + print("running", cmdline) + + os.system(cmdline) + + +############################################################### +# SCANN +############################################################### + +def scann_make_index(xb, distance_measure, scann_dir, reorder_k): + import scann + + print("build index") + + if distance_measure == "dot_product": + thr = 0.2 + else: + thr = 0 + k = 10 + sb = scann.scann_ops_pybind.builder(xb, k, distance_measure) + sb = sb.tree(num_leaves=2000, num_leaves_to_search=100, training_sample_size=250000) + sb = sb.score_ah(2, anisotropic_quantization_threshold=thr) + + if reorder_k > 0: + sb = sb.reorder(reorder_k) + + searcher = sb.build() + + print("done") + + print("write index to", scann_dir) + + os.system(f"rm -rf {scann_dir}; mkdir -p {scann_dir}") + # os.mkdir(scann_dir) + searcher.serialize(scann_dir) + return searcher + +def scann_eval_search( + searcher, searcher_reo, + xq, xb, nprobe_tab, pre_reorder_k_tab, k, gt, + nrun, measure): + + # warmup + for _run in range(5): + searcher.search_batched(xq) + + for nprobe in nprobe_tab: + + for pre_reorder_k in pre_reorder_k_tab: + + times = [] + for _run in range(nrun): + if pre_reorder_k == 0: + t0 = time.time() + I, D = searcher.search_batched( + xq, leaves_to_search=nprobe, final_num_neighbors=k + ) + t1 = time.time() + else: + t0 = time.time() + I, D = searcher_reo.search_batched( + xq, leaves_to_search=nprobe, final_num_neighbors=k, + pre_reorder_num_neighbors=pre_reorder_k + ) + t1 = time.time() + + times.append(t1 - t0) + header = "SCANN nprobe=%4d reo=%4d" % (nprobe, pre_reorder_k) + if measure == "1-recall": + eval_recalls(header, I, gt, times) + else: + eval_inters(header, I, gt, times) + + + + +############################################################### +# Faiss +############################################################### + + +def faiss_make_index(xb, metric_type, fname): + import faiss + + d = xb.shape[1] + M = d // 2 + index = faiss.index_factory(d, f"IVF2000,PQ{M}x4fs", metric_type) + # if not by_residual: + # print("setting no residual") + # index.by_residual = False + + print("train") + # index.train(ds.get_train()) + index.train(xb[:250000]) + print("add") + index.add(xb) + print("write index", fname) + faiss.write_index(index, fname) + + return index + +def faiss_eval_search( + index, xq, xb, nprobe_tab, pre_reorder_k_tab, + k, gt, nrun, measure + ): + import faiss + + print("use precomputed table=", index.use_precomputed_table, + "by residual=", index.by_residual) + + print("adding a refine index") + index_refine = faiss.IndexRefineFlat(index, faiss.swig_ptr(xb)) + + print("set single thread") + faiss.omp_set_num_threads(1) + + print("warmup") + for _run in range(5): + index.search(xq, k) + + print("run timing") + for nprobe in nprobe_tab: + for pre_reorder_k in pre_reorder_k_tab: + index.nprobe = nprobe + times = [] + for _run in range(nrun): + if pre_reorder_k == 0: + t0 = time.time() + D, I = index.search(xq, k) + t1 = time.time() + else: + index_refine.k_factor = pre_reorder_k / k + t0 = time.time() + D, I = index_refine.search(xq, k) + t1 = time.time() + + times.append(t1 - t0) + + header = "Faiss nprobe=%4d reo=%4d" % (nprobe, pre_reorder_k) + if measure == "1-recall": + eval_recalls(header, I, gt, times) + else: + eval_inters(header, I, gt, times) + + +if __name__ == "__main__": + main() diff --git a/benchs/bench_all_ivf/datasets.py b/benchs/bench_all_ivf/datasets.py old mode 100755 new mode 100644 index 5551d86b5c..3f712d43a4 --- a/benchs/bench_all_ivf/datasets.py +++ b/benchs/bench_all_ivf/datasets.py @@ -1,5 +1,3 @@ -#! /usr/bin/env python2 - # Copyright (c) Facebook, Inc. and its affiliates. # # This source code is licensed under the MIT license found in the @@ -9,168 +7,83 @@ Common functions to load datasets and compute their ground-truth """ -from __future__ import print_function import time import numpy as np import faiss -import sys - -# set this to the directory that contains the datafiles. -# deep1b data should be at simdir + 'deep1b' -# bigann data should be at simdir + 'bigann' -simdir = '/mnt/vol/gfsai-east/ai-group/datasets/simsearch/' - -################################################################# -# Small I/O functions -################################################################# +from faiss.contrib import datasets as faiss_datasets -def ivecs_read(fname): - a = np.fromfile(fname, dtype='int32') - d = a[0] - return a.reshape(-1, d + 1)[:, 1:].copy() +print("path:", faiss_datasets.__file__) +faiss_datasets.dataset_basedir = '/checkpoint/matthijs/simsearch/' -def fvecs_read(fname): - return ivecs_read(fname).view('float32') +def sanitize(x): + return np.ascontiguousarray(x, dtype='float32') -def ivecs_mmap(fname): - a = np.memmap(fname, dtype='int32', mode='r') - d = a[0] - return a.reshape(-1, d + 1)[:, 1:] +################################################################# +# Dataset +################################################################# +class DatasetCentroids(faiss_datasets.Dataset): -def fvecs_mmap(fname): - return ivecs_mmap(fname).view('float32') + def __init__(self, ds, indexfile): + self.d = ds.d + self.metric = ds.metric + self.nq = ds.nq + self.xq = ds.get_queries() + # get the xb set + src_index = faiss.read_index(indexfile) + src_quant = faiss.downcast_index(src_index.quantizer) + centroids = faiss.vector_to_array(src_quant.xb) + self.xb = centroids.reshape(-1, self.d) + self.nb = self.nt = len(self.xb) -def bvecs_mmap(fname): - x = np.memmap(fname, dtype='uint8', mode='r') - d = x[:4].view('int32')[0] - return x.reshape(-1, d + 4)[:, 4:] + def get_queries(self): + return self.xq + def get_database(self): + return self.xb -def ivecs_write(fname, m): - n, d = m.shape - m1 = np.empty((n, d + 1), dtype='int32') - m1[:, 0] = d - m1[:, 1:] = m - m1.tofile(fname) + def get_train(self, maxtrain=None): + return self.xb + def get_groundtruth(self, k=100): + return faiss.knn( + self.xq, self.xb, k, + faiss.METRIC_L2 if self.metric == 'L2' else faiss.METRIC_INNER_PRODUCT + )[1] -def fvecs_write(fname, m): - m = m.astype('float32') - ivecs_write(fname, m.view('int32')) -################################################################# -# Dataset -################################################################# -def sanitize(x): - return np.ascontiguousarray(x, dtype='float32') - -class ResultHeap: - """ Combine query results from a sliced dataset """ - - def __init__(self, nq, k): - " nq: number of query vectors, k: number of results per query " - self.I = np.zeros((nq, k), dtype='int64') - self.D = np.zeros((nq, k), dtype='float32') - self.nq, self.k = nq, k - heaps = faiss.float_maxheap_array_t() - heaps.k = k - heaps.nh = nq - heaps.val = faiss.swig_ptr(self.D) - heaps.ids = faiss.swig_ptr(self.I) - heaps.heapify() - self.heaps = heaps - - def add_batch_result(self, D, I, i0): - assert D.shape == (self.nq, self.k) - assert I.shape == (self.nq, self.k) - I += i0 - self.heaps.addn_with_ids( - self.k, faiss.swig_ptr(D), - faiss.swig_ptr(I), self.k) - - def finalize(self): - self.heaps.reorder() - - -def compute_GT_sliced(xb, xq, k): - print("compute GT") - t0 = time.time() - nb, d = xb.shape - nq, d = xq.shape - rh = ResultHeap(nq, k) - bs = 10 ** 5 - - xqs = sanitize(xq) - - db_gt = faiss.index_cpu_to_all_gpus(faiss.IndexFlatL2(d)) - - # compute ground-truth by blocks of bs, and add to heaps - for i0 in range(0, nb, bs): - i1 = min(nb, i0 + bs) - xsl = sanitize(xb[i0:i1]) - db_gt.add(xsl) - D, I = db_gt.search(xqs, k) - rh.add_batch_result(D, I, i0) - db_gt.reset() - print("\r %d/%d, %.3f s" % (i0, nb, time.time() - t0), end=' ') - sys.stdout.flush() - print() - rh.finalize() - gt_I = rh.I - - print("GT time: %.3f s" % (time.time() - t0)) - return gt_I - - -def do_compute_gt(xb, xq, k): - print("computing GT") - nb, d = xb.shape - index = faiss.index_cpu_to_all_gpus(faiss.IndexFlatL2(d)) - if nb < 100 * 1000: - print(" add") - index.add(np.ascontiguousarray(xb, dtype='float32')) - print(" search") - D, I = index.search(np.ascontiguousarray(xq, dtype='float32'), k) - else: - I = compute_GT_sliced(xb, xq, k) - - return I.astype('int32') - - -def load_data(dataset='deep1M', compute_gt=False): +def load_dataset(dataset='deep1M', compute_gt=False, download=False): print("load data", dataset) if dataset == 'sift1M': - basedir = simdir + 'sift1M/' - - xt = fvecs_read(basedir + "sift_learn.fvecs") - xb = fvecs_read(basedir + "sift_base.fvecs") - xq = fvecs_read(basedir + "sift_query.fvecs") - gt = ivecs_read(basedir + "sift_groundtruth.ivecs") + return faiss_datasets.DatasetSIFT1M() elif dataset.startswith('bigann'): - basedir = simdir + 'bigann/' dbsize = 1000 if dataset == "bigann1B" else int(dataset[6:-1]) - xb = bvecs_mmap(basedir + 'bigann_base.bvecs') - xq = bvecs_mmap(basedir + 'bigann_query.bvecs') - xt = bvecs_mmap(basedir + 'bigann_learn.bvecs') - # trim xb to correct size - xb = xb[:dbsize * 1000 * 1000] - gt = ivecs_read(basedir + 'gnd/idx_%dM.ivecs' % dbsize) + + return faiss_datasets.DatasetBigANN(nb_M=dbsize) + + elif dataset.startswith("deep_centroids_"): + ncent = int(dataset[len("deep_centroids_"):]) + centdir = "/checkpoint/matthijs/bench_all_ivf/precomputed_clusters" + return DatasetCentroids( + faiss_datasets.DatasetDeep1B(nb=1000000), + f"{centdir}/clustering.dbdeep1M.IVF{ncent}.faissindex" + ) + elif dataset.startswith("deep"): - basedir = simdir + 'deep1b/' + szsuf = dataset[4:] if szsuf[-1] == 'M': dbsize = 10 ** 6 * int(szsuf[:-1]) @@ -180,28 +93,17 @@ def load_data(dataset='deep1M', compute_gt=False): dbsize = 1000 * int(szsuf[:-1]) else: assert False, "did not recognize suffix " + szsuf + return faiss_datasets.DatasetDeep1B(nb=dbsize) - xt = fvecs_mmap(basedir + "learn.fvecs") - xb = fvecs_mmap(basedir + "base.fvecs") - xq = fvecs_read(basedir + "deep1B_queries.fvecs") + elif dataset == "music-100": + return faiss_datasets.DatasetMusic100() - xb = xb[:dbsize] - - gt_fname = basedir + "%s_groundtruth.ivecs" % dataset - if compute_gt: - gt = do_compute_gt(xb, xq, 100) - print("store", gt_fname) - ivecs_write(gt_fname, gt) - - gt = ivecs_read(gt_fname) + elif dataset == "glove": + return faiss_datasets.DatasetGlove(download=download) else: assert False - print("dataset %s sizes: B %s Q %s T %s" % ( - dataset, xb.shape, xq.shape, xt.shape)) - - return xt, xb, xq, gt ################################################################# # Evaluation diff --git a/benchs/bench_all_ivf/make_groundtruth.py b/benchs/bench_all_ivf/make_groundtruth.py new file mode 100644 index 0000000000..8ddf901ba7 --- /dev/null +++ b/benchs/bench_all_ivf/make_groundtruth.py @@ -0,0 +1,27 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging + +# https://stackoverflow.com/questions/7016056/python-logging-not-outputting-anything +logging.basicConfig() +logger = logging.getLogger('faiss.contrib.exhaustive_search') +logger.setLevel(logging.INFO) + +from faiss.contrib import datasets +from faiss.contrib.exhaustive_search import knn_ground_truth +from faiss.contrib import vecs_io + +ds = datasets.DatasetDeep1B(nb=int(1e9)) + +print("computing GT matches for", ds) + +D, I = knn_ground_truth( + ds.get_queries(), + ds.database_iterator(bs=65536), + k=100 +) + +vecs_io.ivecs_write("/tmp/tt.ivecs", I) diff --git a/benchs/bench_all_ivf/parse_bench_all_ivf.py b/benchs/bench_all_ivf/parse_bench_all_ivf.py old mode 100755 new mode 100644 index cacbffb015..6e438c90c8 --- a/benchs/bench_all_ivf/parse_bench_all_ivf.py +++ b/benchs/bench_all_ivf/parse_bench_all_ivf.py @@ -1,54 +1,18 @@ -#! /usr/bin/env python2 - # Copyright (c) Facebook, Inc. and its affiliates. # # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -from __future__ import print_function import os import numpy as np +from collections import defaultdict from matplotlib import pyplot import re from argparse import Namespace - -# the directory used in run_on_cluster.bash -basedir = '/mnt/vol/gfsai-east/ai-group/users/matthijs/bench_all_ivf/' -logdir = basedir + 'logs/' - - -# which plot to output -db = 'bigann1B' -code_size = 8 - - - -def unitsize(indexkey): - """ size of one vector in the index """ - mo = re.match('.*,PQ(\\d+)', indexkey) - if mo: - return int(mo.group(1)) - if indexkey.endswith('SQ8'): - bits_per_d = 8 - elif indexkey.endswith('SQ4'): - bits_per_d = 4 - elif indexkey.endswith('SQfp16'): - bits_per_d = 16 - else: - assert False - mo = re.match('PCAR(\\d+),.*', indexkey) - if mo: - return bits_per_d * int(mo.group(1)) / 8 - mo = re.match('OPQ\\d+_(\\d+),.*', indexkey) - if mo: - return bits_per_d * int(mo.group(1)) / 8 - mo = re.match('RR(\\d+),.*', indexkey) - if mo: - return bits_per_d * int(mo.group(1)) / 8 - assert False +from faiss.contrib.factory_tools import get_code_size as unitsize def dbsize_from_name(dbname): @@ -84,10 +48,20 @@ def parse_result_file(fname): keys = [] stats = {} stats['run_version'] = fname[-8] + indexkey = None for l in open(fname): - if st == 0: - if l.startswith('CHRONOS_JOB_INSTANCE_ID'): - stats['CHRONOS_JOB_INSTANCE_ID'] = l.split()[-1] + if l.startswith("srun:"): + # looks like a crash... + if indexkey is None: + raise RuntimeError("instant crash") + break + elif st == 0: + if l.startswith("dataset in dimension"): + fi = l.split() + stats["d"] = int(fi[3][:-1]) + stats["nq"] = int(fi[9]) + stats["nb"] = int(fi[11]) + stats["nt"] = int(fi[13]) if l.startswith('index size on disk:'): stats['index_size'] = int(l.split()[-1]) if l.startswith('current RSS:'): @@ -101,7 +75,25 @@ def parse_result_file(fname): if l.startswith('args:'): args = eval(l[l.find(' '):]) indexkey = args.indexkey - elif 'R@1 R@10 R@100' in l: + elif "time(ms/q)" in l: + # result header + if 'R@1 R@10 R@100' in l: + stats["measure"] = "recall" + stats["ranks"] = [1, 10, 100] + elif 'I@1 I@10 I@100' in l: + stats["measure"] = "inter" + stats["ranks"] = [1, 10, 100] + elif 'inter@' in l: + stats["measure"] = "inter" + fi = l.split() + if fi[1] == "inter@": + rank = int(fi[2]) + else: + rank = int(fi[1][len("inter@"):]) + stats["ranks"] = [rank] + + else: + assert False st = 1 elif 'index size on disk:' in l: index_size = int(l.split()[-1]) @@ -109,115 +101,106 @@ def parse_result_file(fname): st = 2 elif st == 2: fi = l.split() + if l[0] == " ": + # means there are 0 parameters + fi = [""] + fi keys.append(fi[0]) res.append([float(x) for x in fi[1:]]) return indexkey, np.array(res), keys, stats -# run parsing -allres = {} -allstats = {} -nts = [] -missing = [] -versions = {} - -fnames = keep_latest_stdout(os.listdir(logdir)) -# print fnames -# filenames are in the form .x.stdout -# where x is a version number (from a to z) -# keep only latest version of each name - -for fname in fnames: - if not ('db' + db in fname and fname.endswith('.stdout')): - continue - indexkey, res, _, stats = parse_result_file(logdir + fname) - if res.size == 0: - missing.append(fname) - errorline = open( - logdir + fname.replace('.stdout', '.stderr')).readlines() - if len(errorline) > 0: - errorline = errorline[-1] - else: - errorline = 'NO STDERR' - print(fname, stats['CHRONOS_JOB_INSTANCE_ID'], errorline) +# the directory used in run_on_cluster.bash +basedir = "/checkpoint/matthijs/bench_all_ivf/" +logdir = basedir + 'logs/' - else: - if indexkey in allres: - if allstats[indexkey]['run_version'] > stats['run_version']: - # don't use this run - continue - n_threads = stats.get('n_threads', 1) - nts.append(n_threads) - allres[indexkey] = res - allstats[indexkey] = stats -assert len(set(nts)) == 1 -n_threads = nts[0] +def collect_results_for(db='deep1M', prefix="autotune."): + # run parsing + allres = {} + allstats = {} + missing = [] + + fnames = keep_latest_stdout(os.listdir(logdir)) + # print fnames + # filenames are in the form .x.stdout + # where x is a version number (from a to z) + # keep only latest version of each name + + for fname in fnames: + if not ( + 'db' + db in fname and + fname.startswith(prefix) and + fname.endswith('.stdout') + ): + continue + print("parse", fname, end=" ", flush=True) + try: + indexkey, res, _, stats = parse_result_file(logdir + fname) + except RuntimeError as e: + print("FAIL %s" % e) + res = np.zeros((2, 0)) + except Exception as e: + print("PARSE ERROR " + e) + res = np.zeros((2, 0)) + else: + print(len(res), "results") + if res.size == 0: + missing.append(fname) + else: + if indexkey in allres: + if allstats[indexkey]['run_version'] > stats['run_version']: + # don't use this run + continue + allres[indexkey] = res + allstats[indexkey] = stats -def plot_tradeoffs(allres, code_size, recall_rank): - dbsize = dbsize_from_name(db) - recall_idx = int(np.log10(recall_rank)) + return allres, allstats +def extract_pareto_optimal(allres, keys, recall_idx=0, times_idx=3): bigtab = [] - names = [] - - for k,v in sorted(allres.items()): - if v.ndim != 2: continue - us = unitsize(k) - if us != code_size: continue + for i, k in enumerate(keys): + v = allres[k] perf = v[:, recall_idx] - times = v[:, 3] + times = v[:, times_idx] bigtab.append( np.vstack(( - np.ones(times.size, dtype=int) * len(names), + np.ones(times.size) * i, perf, times )) ) - names.append(k) + if bigtab == []: + return [], np.zeros((3, 0)) bigtab = np.hstack(bigtab) + # sort by perf perm = np.argsort(bigtab[1, :]) - bigtab = bigtab[:, perm] - - times = np.minimum.accumulate(bigtab[2, ::-1])[::-1] - selection = np.where(bigtab[2, :] == times) - - selected_methods = [names[i] for i in - np.unique(bigtab[0, selection].astype(int))] - not_selected = list(set(names) - set(selected_methods)) - - print("methods without an optimal OP: ", not_selected) - - nq = 10000 - pyplot.title('database ' + db + ' code_size=%d' % code_size) - - # grayed out lines + bigtab_sorted = bigtab[:, perm] + best_times = np.minimum.accumulate(bigtab_sorted[2, ::-1])[::-1] + selection, = np.where(bigtab_sorted[2, :] == best_times) + selected_keys = [ + keys[i] for i in + np.unique(bigtab_sorted[0, selection].astype(int)) + ] + ops = bigtab_sorted[:, selection] - for k in not_selected: - v = allres[k] - if v.ndim != 2: continue - us = unitsize(k) - if us != code_size: continue + return selected_keys, ops - linestyle = (':' if 'PQ' in k else - '-.' if 'SQ4' in k else - '--' if 'SQ8' in k else '-') - - pyplot.semilogy(v[:, recall_idx], v[:, 3], label=None, - linestyle=linestyle, - marker='o' if 'HNSW' in k else '+', - color='#cccccc', linewidth=0.2) +def plot_subset( + allres, allstats, selected_methods, recall_idx, times_idx=3, + report=["overhead", "build time"]): # important methods for k in selected_methods: v = allres[k] - if v.ndim != 2: continue - us = unitsize(k) - if us != code_size: continue stats = allstats[k] - tot_size = stats['index_size'] + stats['tables_size'] + d = stats["d"] + dbsize = stats["nb"] + if "index_size" in stats and "tables_size" in stats: + tot_size = stats['index_size'] + stats['tables_size'] + else: + tot_size = -1 id_size = 8 # 64 bit addt = '' @@ -230,18 +213,107 @@ def plot_tradeoffs(allres, code_size, recall_rank): add_sec = int(add_time) addt = ', %dm%02d' % (add_sec / 60, add_sec % 60) + code_size = unitsize(d, k) + + label = k + + if "code_size" in report: + label += " %d bytes" % code_size + + tight_size = (code_size + id_size) * dbsize + + if tot_size < 0 or "overhead" not in report: + pass # don't know what the index size is + elif tot_size > 10 * tight_size: + label += " overhead x%.1f" % (tot_size / tight_size) + else: + label += " overhead+%.1f%%" % ( + tot_size / tight_size * 100 - 100) + + if "build time" in report: + label += " " + addt + + linestyle = (':' if 'Refine' in k or 'RFlat' in k else + '-.' if 'SQ' in k else + '-' if '4fs' in k else + '-') + print(k, linestyle) + pyplot.semilogy(v[:, recall_idx], 1000 / v[:, times_idx], label=label, + linestyle=linestyle, + marker='o' if '4fs' in k else '+') + + recall_rank = stats["ranks"][recall_idx] + if stats["measure"] == "recall": + pyplot.xlabel('1-recall at %d' % recall_rank) + elif stats["measure"] == "inter": + pyplot.xlabel('inter @ %d' % recall_rank) + else: + assert False + pyplot.ylabel('QPS (%d threads)' % stats["n_threads"]) + + +def plot_tradeoffs(db, allres, allstats, code_size, recall_rank): + stat0 = next(iter(allstats.values())) + d = stat0["d"] + n_threads = stat0["n_threads"] + nq = stat0["nq"] + recall_idx = stat0["ranks"].index(recall_rank) + # times come after the perf measure + times_idx = len(stat0["ranks"]) + + if type(code_size) == int: + if code_size == 0: + code_size = [0, 1e50] + code_size_name = "any code size" + else: + code_size_name = "code_size=%d" % code_size + code_size = [code_size, code_size] + elif type(code_size) == tuple: + code_size_name = "code_size in [%d, %d]" % code_size + else: + assert False + + names_maxperf = [] + + for k in sorted(allres): + v = allres[k] + if v.ndim != 2: continue + us = unitsize(d, k) + if not code_size[0] <= us <= code_size[1]: continue + names_maxperf.append((v[-1, recall_idx], k)) + + # sort from lowest to highest topline accuracy + names_maxperf.sort() + names = [name for mp, name in names_maxperf] - label = k + ' (size+%.1f%%%s)' % ( - tot_size / float((code_size + id_size) * dbsize) * 100 - 100, - addt) + selected_methods, optimal_points = \ + extract_pareto_optimal(allres, names, recall_idx, times_idx) + + not_selected = list(set(names) - set(selected_methods)) + + print("methods without an optimal OP: ", not_selected) + + pyplot.title('database ' + db + ' ' + code_size_name) + + # grayed out lines + + for k in not_selected: + v = allres[k] + if v.ndim != 2: continue + us = unitsize(d, k) + if not code_size[0] <= us <= code_size[1]: continue linestyle = (':' if 'PQ' in k else '-.' if 'SQ4' in k else '--' if 'SQ8' in k else '-') - pyplot.semilogy(v[:, recall_idx], v[:, 3], label=label, + pyplot.semilogy(v[:, recall_idx], 1000 / v[:, times_idx], label=None, linestyle=linestyle, - marker='o' if 'HNSW' in k else '+') + marker='o' if 'HNSW' in k else '+', + color='#cccccc', linewidth=0.2) + + plot_subset(allres, allstats, selected_methods, recall_idx, times_idx) + if len(not_selected) == 0: om = '' @@ -255,15 +327,175 @@ def plot_tradeoffs(allres, code_size, recall_rank): om += ' ' + m nc += len(m) + 1 + # pyplot.semilogy(optimal_points[1, :], optimal_points[2, :], marker="s") + # print(optimal_points[0, :]) pyplot.xlabel('1-recall at %d %s' % (recall_rank, om) ) - pyplot.ylabel('search time per query (ms, %d threads)' % n_threads) + pyplot.ylabel('QPS (%d threads)' % n_threads) pyplot.legend() pyplot.grid() - pyplot.savefig('figs/tradeoffs_%s_cs%d_r%d.png' % ( - db, code_size, recall_rank)) return selected_methods, not_selected -pyplot.gcf().set_size_inches(15, 10) -plot_tradeoffs(allres, code_size=code_size, recall_rank=1) +if __name__ == "__main__xx": + # tests on centroids indexing (v1) + + for k in 1, 32, 128: + pyplot.gcf().set_size_inches(15, 10) + i = 1 + for ncent in 65536, 262144, 1048576, 4194304: + db = f'deep_centroids_{ncent}.k{k}.' + allres, allstats = collect_results_for( + db=db, prefix="cent_index.") + + pyplot.subplot(2, 2, i) + plot_subset( + allres, allstats, list(allres.keys()), + recall_idx=0, + times_idx=1, + report=["code_size"] + ) + i += 1 + pyplot.title(f"{ncent} centroids") + pyplot.legend() + pyplot.xlim([0.95, 1]) + pyplot.grid() + + pyplot.savefig('figs/deep1B_centroids_k%d.png' % k) + + +if __name__ == "__main__xx": + # centroids plot per k + + pyplot.gcf().set_size_inches(15, 10) + + i=1 + for ncent in 65536, 262144, 1048576, 4194304: + + xyd = defaultdict(list) + + for k in 1, 4, 8, 16, 32, 64, 128, 256: + + db = f'deep_centroids_{ncent}.k{k}.' + allres, allstats = collect_results_for(db=db, prefix="cent_index.") + + for indexkey, res in allres.items(): + idx, = np.where(res[:, 0] >= 0.99) + if idx.size > 0: + xyd[indexkey].append((k, 1000 / res[idx[0], 1])) + + pyplot.subplot(2, 2, i) + i += 1 + for indexkey, xy in xyd.items(): + xy = np.array(xy) + pyplot.loglog(xy[:, 0], xy[:, 1], 'o-', label=indexkey) + + pyplot.title(f"{ncent} centroids") + pyplot.xlabel("k") + xt = 2**np.arange(9) + pyplot.xticks(xt, ["%d" % x for x in xt]) + pyplot.ylabel("QPS (32 threads)") + pyplot.legend() + pyplot.grid() + + pyplot.savefig('../plots/deep1B_centroids_min99.png') + + + + + +if __name__ == "__main__xx": + # main indexing plots + + i = 0 + for db in 'bigann10M', 'deep10M', 'bigann100M', 'deep100M', 'deep1B', 'bigann1B': + allres, allstats = collect_results_for( + db=db, prefix="autotune.") + + for cs in 8, 16, 32, 64: + pyplot.figure(i) + i += 1 + pyplot.gcf().set_size_inches(15, 10) + + cs_range = ( + (0, 8) if cs == 8 else (cs // 2 + 1, cs) + ) + + plot_tradeoffs( + db, allres, allstats, code_size=cs_range, recall_rank=1) + pyplot.savefig('../plots/tradeoffs_%s_cs%d_r1.png' % ( + db, cs)) + + +if __name__ == "__main__": + # 1M indexes + i = 0 + for db in "glove", "music-100": + pyplot.figure(i) + pyplot.gcf().set_size_inches(15, 10) + i += 1 + allres, allstats = collect_results_for(db=db, prefix="autotune.") + plot_tradeoffs(db, allres, allstats, code_size=0, recall_rank=1) + pyplot.savefig('../plots/1M_tradeoffs_' + db + ".png") + + for db in "sift1M", "deep1M": + allres, allstats = collect_results_for(db=db, prefix="autotune.") + pyplot.figure(i) + pyplot.gcf().set_size_inches(15, 10) + i += 1 + plot_tradeoffs(db, allres, allstats, code_size=(0, 64), recall_rank=1) + pyplot.savefig('../plots/1M_tradeoffs_' + db + "_small.png") + + pyplot.figure(i) + pyplot.gcf().set_size_inches(15, 10) + i += 1 + plot_tradeoffs(db, allres, allstats, code_size=(65, 10000), recall_rank=1) + pyplot.savefig('../plots/1M_tradeoffs_' + db + "_large.png") + + + +if __name__ == "__main__xx": + db = 'sift1M' + allres, allstats = collect_results_for(db=db, prefix="autotune.") + pyplot.gcf().set_size_inches(15, 10) + + keys = [ + "IVF1024,PQ32x8", + "IVF1024,PQ64x4", + "IVF1024,PQ64x4fs", + "IVF1024,PQ64x4fsr", + "IVF1024,SQ4", + "IVF1024,SQ8" + ] + + plot_subset(allres, allstats, keys, recall_idx=0, report=["code_size"]) + + pyplot.legend() + pyplot.title(db) + pyplot.xlabel("1-recall@1") + pyplot.ylabel("QPS (32 threads)") + pyplot.grid() + + pyplot.savefig('../plots/ivf1024_variants.png') + + pyplot.figure(2) + pyplot.gcf().set_size_inches(15, 10) + + keys = [ + "HNSW32", + "IVF1024,PQ64x4fs", + "IVF1024,PQ64x4fsr", + "IVF1024,PQ64x4fs,RFlat", + "IVF1024,PQ64x4fs,Refine(SQfp16)", + "IVF1024,PQ64x4fs,Refine(SQ8)", + ] + + plot_subset(allres, allstats, keys, recall_idx=0, report=["code_size"]) + + pyplot.legend() + pyplot.title(db) + pyplot.xlabel("1-recall@1") + pyplot.ylabel("QPS (32 threads)") + pyplot.grid() + + pyplot.savefig('../plots/ivf1024_rerank.png') diff --git a/benchs/bench_all_ivf/run_on_cluster_generic.bash b/benchs/bench_all_ivf/run_on_cluster_generic.bash index 6d88f43d9a..f2007ceed7 100644 --- a/benchs/bench_all_ivf/run_on_cluster_generic.bash +++ b/benchs/bench_all_ivf/run_on_cluster_generic.bash @@ -1,3 +1,5 @@ +set -e + # Copyright (c) Facebook, Inc. and its affiliates. # # This source code is licensed under the MIT license found in the @@ -18,232 +20,584 @@ # # the stdout of the command should be stored in $logdir/.stdout -function run_on_1machine () { - # To be implemented + +function run_on () +{ + sys="$1" + shift + name="$1" + shift + script="$logdir/$name.sh" + + if [ -e "$script" ]; then + echo script "$script" exists + return + fi + + # srun handles special characters fine, but the shell interpreter + # does not + escaped_cmd=$( printf "%q " "$@" ) + + cat > $script < (1 << (2 * ls)); ls++)); do + echo -n done + sncent=$(( 1 << ls )) + + indexkeys=" + IVF$((sncent/2)),PQ48x4fs,RFlat + IVF$((sncent*2)),PQ48x4fs,RFlat + HNSW32 + PQ48x4fs + PQ48x4fs,RFlat + IVF$sncent,PQ48x4fs,RFlat + " + + for indexkey in $indexkeys; do + key="cent_index.db$db.k$k.$indexkey" + run_on_1machine_1h "$key.b" \ + python -u bench_all_ivf.py \ + --db $db \ + --indexkey "$indexkey" \ + --maxtrain 0 \ + --inter \ + --searchthreads 32 \ + --k $k + done + done done -############################### 100M experiments +############################### 10M experiments -for db in deep100M bigann100M; do - for coarse in IMI2x11 IMI2x12 IVF65536_HNSW32 IVF262144_HNSW32 +for db in deep10M bigann10M; do + + coarses=" + IVF65536(IVF256,PQHDx4fs,RFlat) + IVF16384_HNSW32 + IVF65536_HNSW32 + IVF262144_HNSW32 + IVF262144(IVF512,PQHDx4fs,RFlat) + " + + dim=$( get_db_dim $db ) + + for coarse in $coarses do - for indexkey in \ - OPQ8_64,$coarse,PQ8 \ - OPQ16_64,$coarse,PQ16 \ - PCAR32,$coarse,SQ4 \ - OPQ32_128,$coarse,PQ32 \ - PCAR64,$coarse,SQ4 \ - PCAR32,$coarse,SQ8 \ - PCAR64,$coarse,SQ8 \ - PCAR32,$coarse,SQfp16 \ - PCAR128,$coarse,SQ4 \ - OPQ64_128,$coarse,PQ64 + replace_coarse_PQHD "$coarse" $dim + + indexkeys=" + $coarseD,PQ$((dim/2))x4fs + + OPQ8_64,$coarse64,PQ8 + PCAR16,$coarse16,SQ4 + OPQ16_64,$coarse64,PQ16x4fs + OPQ16_64,$coarse64,PQ16x4fsr + + OPQ16_64,$coarse64,PQ16 + PCAR16,$coarse16,SQ8 + PCAR32,$coarse32,SQ4 + OPQ32_64,$coarse64,PQ32x4fs + OPQ32_64,$coarse64,PQ32x4fsr + + OPQ32_128,$coarse128,PQ32 + PCAR32,$coarse32,SQ8 + PCAR64,$coarse64,SQ4 + PCAR16,$coarse16,SQfp16 + OPQ64_128,$coarse128,PQ64x4fs + OPQ64_128,$coarse128,PQ64x4fsr + + OPQ64_128,$coarse128,PQ64 + PCAR64,$coarse64,SQ8 + PCAR32,$coarse32,SQfp16 + PCAR128,$coarse128,SQ4 + OPQ128_256,$coarse256,PQ128x4fs + OPQ128_256,$coarse256,PQ128x4fsr + OPQ56_112,$coarse112,PQ7+56 + OPQ16_64,$coarse64,PQ16x4fs,Refine(OPQ56_112,PQ56) + OPQ16_64,$coarse64,PQ16x4fs,Refine(PCAR72,SQ6) + OPQ32_64,$coarse64,PQ16x4fs,Refine(PCAR64,SQ6) + OPQ32_64,$coarse64,PQ32x4fs,Refine(OPQ48_96,PQ48) + " + + indexkeys=" + OPQ16_64,$coarse64,PQ16x4fsr + OPQ32_64,$coarse64,PQ32x4fsr + OPQ64_128,$coarse128,PQ64x4fsr + OPQ128_256,$coarse256,PQ128x4fsr + " + + + for indexkey in $indexkeys do key=autotune.db$db.${indexkey//,/_} - run_on_1machine $key \ - python -u bench_all_ivf.py \ + key="${key//(/_}" + key="${key//)/_}" + run_on_1machine_3h $key.l \ + python -u bench_all_ivf.py \ --db $db \ - --indexkey $indexkey \ + --indexkey "$indexkey" \ --maxtrain 0 \ - --indexfile $indexdir/$key.faissindex \ - --searchthreads 16 \ + --indexfile "$indexdir/$key.faissindex" \ + $( add_precomputed_quantizer $db $coarse ) \ + --searchthreads 32 \ --min_test_duration 3 \ - --add_bs 1000000 - + --autotune_max nprobe:2000 done done done -############################### 1B experiments - -for db in deep1B bigann1B; do +############################### 100M experiments - for coarse in IMI2x12 IMI2x13 IVF262144_HNSW32 +for db in deep100M bigann100M; do + coarses=" + IVF65536_HNSW32 + IVF262144_HNSW32 + IVF262144(IVF512,PQHDx4fs,RFlat) + IVF1048576_HNSW32 + IVF1048576(IVF1024,PQHDx4fs,RFlat) + " + dim=$( get_db_dim $db ) + + for coarse in $coarses do + replace_coarse_PQHD "$coarse" $dim + + indexkeys=" + OPQ8_64,$coarse64,PQ8 + OPQ16_64,$coarse64,PQ16x4fs + + PCAR32,$coarse32,SQ4 + OPQ16_64,$coarse64,PQ16 + OPQ32_64,$coarse64,PQ32x4fs - for indexkey in \ - OPQ8_64,$coarse,PQ8 \ - OPQ16_64,$coarse,PQ16 \ - PCAR32,$coarse,SQ4 \ - OPQ32_128,$coarse,PQ32 \ - PCAR64,$coarse,SQ4 \ - PCAR32,$coarse,SQ8 \ - PCAR64,$coarse,SQ8 \ - PCAR32,$coarse,SQfp16 \ - PCAR128,$coarse,SQ4 \ - PQ64_128,$coarse,PQ64 \ - RR128,$coarse,SQ4 + OPQ32_128,$coarse128,PQ32 + PCAR64,$coarse64,SQ4 + PCAR32,$coarse32,SQ8 + OPQ64_128,$coarse128,PQ64x4fs + + PCAR128,$coarse128,SQ4 + OPQ64_128,$coarse128,PQ64 + + PCAR32,$coarse32,SQfp16 + PCAR64,$coarse64,SQ8 + OPQ128_256,$coarse256,PQ128x4fs + + OPQ56_112,$coarse112,PQ7+56 + OPQ16_64,$coarse64,PQ16x4fs,Refine(OPQ56_112,PQ56) + + $coarseD,PQ$((dim/2))x4fs + " + + indexkeys=" + OPQ128_256,$coarse256,PQ128x4fsr + OPQ64_128,$coarse128,PQ64x4fsr + OPQ32_64,$coarse64,PQ32x4fsr + OPQ16_64,$coarse64,PQ16x4fsr + OPQ16_64,$coarse64,PQ16x4fsr,Refine(OPQ56_112,PQ56) + " + + for indexkey in $indexkeys do key=autotune.db$db.${indexkey//,/_} - run_on_1machine $key \ + key="${key//(/_}" + key="${key//)/_}" + run_on_1machine $key.e \ python -u bench_all_ivf.py \ --db $db \ - --indexkey $indexkey \ + --indexkey "$indexkey" \ --maxtrain 0 \ --indexfile $indexdir/$key.faissindex \ - --searchthreads 16 \ + --searchthreads 32 \ --min_test_duration 3 \ - --add_bs 1000000 + $( add_precomputed_quantizer $db $coarse ) \ + --add_bs 1000000 \ + --autotune_max nprobe:2000 done done - done -############################################ -# precompute centroids on GPU for large vocabularies +################################# +# 1B-scale experiment -for db in deep1M bigann1M; do - for ncent in 1048576 4194304; do - key=clustering.db$db.IVF$ncent - run_on_8gpu $key \ - python -u bench_all_ivf.py \ - --db $db \ - --indexkey IVF$ncent,SQ8 \ - --maxtrain 100000000 \ - --indexfile $indexdir/$key.faissindex \ - --searchthreads 16 \ - --min_test_duration 3 \ - --add_bs 1000000 \ - --train_on_gpu +for db in deep1B bigann1B; do + coarses=" + IVF1048576_HNSW32 + IVF4194304_HNSW32 + IVF4194304(IVF1024,PQHDx4fs,RFlat) + " + dim=$( get_db_dim $db ) - done -done + for coarse in $coarses; do + replace_coarse_PQHD "$coarse" $dim -################################# -# Run actual experiment -for db in deep1B bigann1B; do + indexkeys=" + OPQ8_64,$coarse64,PQ8 + OPQ16_64,$coarse64,PQ16x4fsr - for ncent in 1048576 4194304; do - coarse=IVF${ncent}_HNSW32 - centroidsname=clustering.db${db/1B/1M}.IVF${ncent}.faissindex - - for indexkey in \ - OPQ8_64,$coarse,PQ8 \ - OPQ16_64,$coarse,PQ16 \ - PCAR32,$coarse,SQ4 \ - OPQ32_128,$coarse,PQ32 \ - PCAR64,$coarse,SQ4 \ - PCAR32,$coarse,SQ8 \ - PCAR64,$coarse,SQ8 \ - PCAR32,$coarse,SQfp16 \ - OPQ64_128,$coarse,PQ64 \ - RR128,$coarse,SQ4 \ - OPQ64_128,$coarse,PQ64 \ - RR128,$coarse,SQ4 + OPQ16_64,$coarse64,PQ16 + OPQ32_64,$coarse64,PQ32x4fsr + + OPQ32_128,$coarse128,PQ32 + OPQ64_128,$coarse128,PQ64x4fsr + + OPQ64_128,$coarse128,PQ64 + OPQ128_256,$coarse256,PQ128x4fsr + OPQ56_112,$coarse112,PQ7+56 + OPQ16_64,$coarse64,PQ16x4fs,Refine(OPQ56_112,PQ56) + + $coarseD,PQ$((dim/2))x4fs + " + + for indexkey in $indexkeys do key=autotune.db$db.${indexkey//,/_} - - run_on_1machine $key.c $key \ + key="${key//(/_}" + key="${key//)/_}" + run_on_1machine $key.d \ python -u bench_all_ivf.py \ --db $db \ - --indexkey $indexkey \ - --maxtrain 256000 \ + --indexkey "$indexkey" \ + --maxtrain 0 \ --indexfile $indexdir/$key.faissindex \ - --get_centroids_from $indexdir/$centroidsname \ - --searchthreads 16 \ + --searchthreads 32 \ --min_test_duration 3 \ - --add_bs 1000000 - + $( add_precomputed_quantizer $db $coarse ) \ + --add_bs 1000000 \ + --autotune_max nprobe:3000 done done done + +fi \ No newline at end of file diff --git a/contrib/README.md b/contrib/README.md index a7e1601ca9..b2efbe7d7c 100644 --- a/contrib/README.md +++ b/contrib/README.md @@ -50,3 +50,7 @@ fields and converting them to the proper python array. (may require h5py) Defintion of how to access data for some standard datsets. + +### factory_tools.py + +Functions related to factory strings. diff --git a/contrib/datasets.py b/contrib/datasets.py index c09f604e0d..eb610f7eff 100644 --- a/contrib/datasets.py +++ b/contrib/datasets.py @@ -96,7 +96,7 @@ def get_queries(self): return self.xq def get_train(self, maxtrain=None): - maxtrain = maxtrain or self.nt + maxtrain = maxtrain if maxtrain is not None else self.nt return self.xt[:maxtrain] def get_database(self): @@ -140,7 +140,7 @@ def get_queries(self): return fvecs_read(self.basedir + "sift_query.fvecs") def get_train(self, maxtrain=None): - maxtrain = maxtrain or self.nt + maxtrain = maxtrain if maxtrain is not None else self.nt return fvecs_read(self.basedir + "sift_learn.fvecs")[:maxtrain] def get_database(self): @@ -176,7 +176,7 @@ def get_queries(self): return sanitize(bvecs_mmap(self.basedir + 'bigann_query.bvecs')[:]) def get_train(self, maxtrain=None): - maxtrain = maxtrain or self.nt + maxtrain = maxtrain if maxtrain is not None else self.nt return sanitize(bvecs_mmap(self.basedir + 'bigann_learn.bvecs')[:maxtrain]) def get_groundtruth(self, k=None): @@ -224,7 +224,7 @@ def get_queries(self): return sanitize(fvecs_read(self.basedir + "deep1B_queries.fvecs")) def get_train(self, maxtrain=None): - maxtrain = maxtrain or self.nt + maxtrain = maxtrain if maxtrain is not None else self.nt return sanitize(fvecs_mmap(self.basedir + "learn.fvecs")[:maxtrain]) def get_groundtruth(self, k=None): @@ -251,8 +251,9 @@ class DatasetGlove(Dataset): Data from http://ann-benchmarks.com/glove-100-angular.hdf5 """ - def __init__(self, loc=None): + def __init__(self, loc=None, download=False): import h5py + assert not download, "not implemented" if not loc: loc = dataset_basedir + 'glove/glove-100-angular.hdf5' self.glove_h5py = h5py.File(loc, 'r') diff --git a/contrib/exhaustive_search.py b/contrib/exhaustive_search.py index 31a444ee7e..32517b5392 100644 --- a/contrib/exhaustive_search.py +++ b/contrib/exhaustive_search.py @@ -16,6 +16,7 @@ def knn_ground_truth(xq, db_iterator, k): does not fit in RAM but for which we have an iterator that returns it block by block. """ + LOG.info("knn_ground_truth queries size %s k=%d" % (xq.shape, k)) t0 = time.time() nq, d = xq.shape rh = faiss.ResultHeap(nq, k) diff --git a/contrib/factory_tools.py b/contrib/factory_tools.py new file mode 100644 index 0000000000..9623ad55f4 --- /dev/null +++ b/contrib/factory_tools.py @@ -0,0 +1,100 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import faiss +import re + + +def get_code_size(d, indexkey): + """ size of one vector in an index in dimension d + constructed with factory string indexkey""" + + if indexkey == "Flat": + return d * 4 + + if indexkey.endswith(",RFlat"): + return d * 4 + get_code_size(d, indexkey[:-len(",RFlat")]) + + mo = re.match("IVF\\d+(_HNSW32)?,(.*)$", indexkey) + if mo: + return get_code_size(d, mo.group(2)) + + mo = re.match("IVF\\d+\\(.*\\)?,(.*)$", indexkey) + if mo: + return get_code_size(d, mo.group(1)) + + mo = re.match("IMI\\d+x2,(.*)$", indexkey) + if mo: + return get_code_size(d, mo.group(1)) + + mo = re.match("(.*),Refine\\((.*)\\)$", indexkey) + if mo: + return get_code_size(d, mo.group(1)) + get_code_size(d, mo.group(2)) + + mo = re.match('PQ(\\d+)x(\\d+)(fs|fsr)?$', indexkey) + if mo: + return (int(mo.group(1)) * int(mo.group(2)) + 7) // 8 + + mo = re.match('PQ(\\d+)\\+(\\d+)$', indexkey) + if mo: + return (int(mo.group(1)) + int(mo.group(2))) + + mo = re.match('PQ(\\d+)$', indexkey) + if mo: + return int(mo.group(1)) + + if indexkey == "HNSW32" or indexkey == "HNSW32,Flat": + return d * 4 + 64 * 4 # roughly + + if indexkey == 'SQ8': + return d + elif indexkey == 'SQ4': + return (d + 1) // 2 + elif indexkey == 'SQ6': + return (d * 6 + 7) // 8 + elif indexkey == 'SQfp16': + return d * 2 + + mo = re.match('PCAR?(\\d+),(.*)$', indexkey) + if mo: + return get_code_size(int(mo.group(1)), mo.group(2)) + mo = re.match('OPQ\\d+_(\\d+),(.*)$', indexkey) + if mo: + return get_code_size(int(mo.group(1)), mo.group(2)) + mo = re.match('OPQ\\d+,(.*)$', indexkey) + if mo: + return get_code_size(d, mo.group(1)) + mo = re.match('RR(\\d+),(.*)$', indexkey) + if mo: + return get_code_size(int(mo.group(1)), mo.group(2)) + raise RuntimeError("cannot parse " + indexkey) + + + +def reverse_index_factory(index): + """ + attempts to get the factory string the index was built with + """ + index = faiss.downcast_index(index) + if isinstance(index, faiss.IndexFlat): + return "Flat" + if isinstance(index, faiss.IndexIVF): + quantizer = faiss.downcast_index(index.quantizer) + + if isinstance(quantizer, faiss.IndexFlat): + prefix = "IVF%d" % index.nlist + elif isinstance(quantizer, faiss.MultiIndexQuantizer): + prefix = "IMI%dx%d" % (quantizer.pq.M, quantizer.pq.nbit) + elif isinstance(quantizer, faiss.IndexHNSW): + prefix = "IVF%d_HNSW%d" % (index.nlist, quantizer.hnsw.M) + else: + prefix = "IVF%d(%s)" % (index.nlist, reverse_index_factory(quantizer)) + + if isinstance(index, faiss.IndexIVFFlat): + return prefix + ",Flat" + if isinstance(index, faiss.IndexIVFScalarQuantizer): + return prefix + ",SQ8" + + raise NotImplementedError() diff --git a/contrib/vecs_io.py b/contrib/vecs_io.py index 4465c8fe1d..ea75d5f94d 100644 --- a/contrib/vecs_io.py +++ b/contrib/vecs_io.py @@ -7,6 +7,7 @@ """ I/O functions in fvecs, bvecs, ivecs formats +definition of the formats here: http://corpus-texmex.irisa.fr/ """ @@ -34,3 +35,16 @@ def bvecs_mmap(fname): x = np.memmap(fname, dtype='uint8', mode='r') d = x[:4].view('int32')[0] return x.reshape(-1, d + 4)[:, 4:] + + +def ivecs_write(fname, m): + n, d = m.shape + m1 = np.empty((n, d + 1), dtype='int32') + m1[:, 0] = d + m1[:, 1:] = m + m1.tofile(fname) + + +def fvecs_write(fname, m): + m = m.astype('float32') + ivecs_write(fname, m.view('int32')) diff --git a/faiss/AutoTune.cpp b/faiss/AutoTune.cpp index 95e64600cc..217e77ab9e 100644 --- a/faiss/AutoTune.cpp +++ b/faiss/AutoTune.cpp @@ -15,6 +15,7 @@ #include #include +#include #include #include @@ -32,6 +33,7 @@ #include #include #include +#include #include #include @@ -234,7 +236,7 @@ void OperatingPoints::display (bool only_optimal) const { const std::vector &pts = only_optimal ? optimal_pts : all_pts; - printf("Tested %zd operating points, %zd ones are optimal:\n", + printf("Tested %zd operating points, %zd ones are Pareto-optimal:\n", all_pts.size(), optimal_pts.size()); for (int i = 0; i < pts.size(); i++) { @@ -333,7 +335,7 @@ static void init_pq_ParameterRange (const ProductQuantizer & pq, pr.values.push_back (pq.code_size * 8); } -ParameterRange &ParameterSpace::add_range(const char * name) +ParameterRange &ParameterSpace::add_range(const std::string & name) { for (auto & pr : parameter_ranges) { if (pr.name == name) { @@ -346,13 +348,13 @@ ParameterRange &ParameterSpace::add_range(const char * name) } -/// initialize with reasonable parameters for the index +/// initialize with reasonable parameters for this type of index void ParameterSpace::initialize (const Index * index) { if (DC (IndexPreTransform)) { index = ix->index; } - if (DC (IndexRefineFlat)) { + if (DC (IndexRefine)) { ParameterRange & pr = add_range("k_factor_rf"); for (int i = 0; i <= 6; i++) { pr.values.push_back (1 << i); @@ -372,12 +374,14 @@ void ParameterSpace::initialize (const Index * index) pr.values.push_back (nprobe); } } - if (dynamic_cast(ix->quantizer)) { - ParameterRange & pr = add_range("efSearch"); - for (int i = 2; i <= 9; i++) { - pr.values.push_back (1 << i); - } + ParameterSpace ivf_pspace; + ivf_pspace.initialize(ix->quantizer); + + for (const ParameterRange & p: ivf_pspace.parameter_ranges) { + ParameterRange & pr = add_range("quantizer_" + p.name); + pr.values = p.values; } + } if (DC (IndexPQ)) { ParameterRange & pr = add_range("ht"); @@ -457,44 +461,38 @@ void ParameterSpace::set_index_parameters ( void ParameterSpace::set_index_parameter ( Index * index, const std::string & name, double val) const { - if (verbose > 1) - printf(" set %s=%g\n", name.c_str(), val); + if (verbose > 1) { + printf(" set_index_parameter %s=%g\n", name.c_str(), val); + } if (name == "verbose") { index->verbose = int(val); // and fall through to also enable it on sub-indexes } - if (DC (IndexPreTransform)) { + if (DC (IndexIDMap)) { set_index_parameter (ix->index, name, val); return; } - if (DC (IndexShards)) { - // call on all sub-indexes - auto fn = - [this, name, val](int, Index* subIndex) { - set_index_parameter(subIndex, name, val); - }; - - ix->runOnIndex(fn); + if (DC (IndexPreTransform)) { + set_index_parameter (ix->index, name, val); return; } - if (DC (IndexReplicas)) { + if (DC (ThreadedIndex)) { // call on all sub-indexes auto fn = - [this, name, val](int, Index* subIndex) { - set_index_parameter(subIndex, name, val); + [this, name, val](int /* no */, Index* subIndex) { + set_index_parameter(subIndex, name, val); }; - ix->runOnIndex(fn); return; } - if (DC (IndexRefineFlat)) { + if (DC (IndexRefine)) { if (name == "k_factor_rf") { ix->k_factor = int(val); return; } // otherwise it is for the sub-index - set_index_parameter (&ix->refine_index, name, val); + set_index_parameter (ix->base_index, name, val); return; } @@ -504,10 +502,7 @@ void ParameterSpace::set_index_parameter ( } if (name == "nprobe") { - if (DC (IndexIDMap)) { - set_index_parameter (ix->index, name, val); - return; - } else if (DC (IndexIVF)) { + if (DC (IndexIVF)) { ix->nprobe = int(val); return; } @@ -559,6 +554,14 @@ void ParameterSpace::set_index_parameter ( } } + if (name.find("quantizer_") == 0) { + if (DC(IndexIVF)) { + std::string sub_name = name.substr(strlen("quantizer_")); + set_index_parameter(ix->quantizer, sub_name, val); + return; + } + } + FAISS_THROW_FMT ("ParameterSpace::set_index_parameter:" "could not set parameter %s", name.c_str()); @@ -707,8 +710,8 @@ void ParameterSpace::explore (Index *index, bool keep = ops->add (perf, t_search, combination_name (cno), cno); if (verbose) - printf(" perf %.3f t %.3f (%d runs) %s\n", - perf, t_search, nrun, + printf(" perf %.3f t %.3f (%d %s) %s\n", + perf, t_search, nrun, nrun >= 2 ? "runs" : "run", keep ? "*" : ""); } } diff --git a/faiss/AutoTune.h b/faiss/AutoTune.h index d7eff14e64..ac16adb102 100644 --- a/faiss/AutoTune.h +++ b/faiss/AutoTune.h @@ -81,7 +81,10 @@ struct IntersectionCriterion: AutoTuneCriterion { /** * Maintains a list of experimental results. Each operating point is a * (perf, t, key) triplet, where higher perf and lower t is - * better. The key field is an arbitrary identifier for the operating point + * better. The key field is an arbitrary identifier for the operating point. + * + * Includes primitives to extract the Pareto-optimal operating points in the + * (perf, t) space. */ struct OperatingPoint { @@ -168,7 +171,7 @@ struct ParameterSpace { void display () const; /// add a new parameter (or return it if it exists) - ParameterRange &add_range(const char * name); + ParameterRange &add_range(const std::string & name); /// initialize with reasonable parameters for the index virtual void initialize (const Index * index); @@ -179,7 +182,7 @@ struct ParameterSpace { /// set a combination of parameters described by a string void set_index_parameters (Index *index, const char *param_string) const; - /// set one of the parameters + /// set one of the parameters, returns whether setting was successful virtual void set_index_parameter ( Index * index, const std::string & name, double val) const; diff --git a/faiss/CMakeLists.txt b/faiss/CMakeLists.txt index b7adaff29e..c2be910b06 100644 --- a/faiss/CMakeLists.txt +++ b/faiss/CMakeLists.txt @@ -32,6 +32,7 @@ add_library(faiss IndexReplicas.cpp IndexScalarQuantizer.cpp IndexShards.cpp + IndexRefine.cpp MatrixStats.cpp MetaIndexes.cpp VectorTransform.cpp @@ -94,6 +95,7 @@ set(FAISS_HEADERS IndexReplicas.h IndexScalarQuantizer.h IndexShards.h + IndexRefine.h MatrixStats.h MetaIndexes.h MetricType.h diff --git a/faiss/IndexFlat.cpp b/faiss/IndexFlat.cpp index 8f4842a2e7..0573ca5528 100644 --- a/faiss/IndexFlat.cpp +++ b/faiss/IndexFlat.cpp @@ -227,144 +227,6 @@ void IndexFlat::sa_decode (idx_t n, const uint8_t *bytes, float *x) const -/*************************************************** - * IndexRefineFlat - ***************************************************/ - -IndexRefineFlat::IndexRefineFlat (Index *base_index): - Index (base_index->d, base_index->metric_type), - refine_index (base_index->d, base_index->metric_type), - base_index (base_index), own_fields (false), - k_factor (1) -{ - is_trained = base_index->is_trained; - FAISS_THROW_IF_NOT_MSG (base_index->ntotal == 0, - "base_index should be empty in the beginning"); -} - - -IndexRefineFlat::IndexRefineFlat (Index *base_index, const float *xb): - Index (base_index->d, base_index->metric_type), - refine_index (base_index->d, base_index->metric_type), - base_index (base_index), own_fields (false), - k_factor (1) -{ - is_trained = base_index->is_trained; - refine_index.add (base_index->ntotal, xb); - ntotal = base_index->ntotal; -} - - - -IndexRefineFlat::IndexRefineFlat () { - base_index = nullptr; - own_fields = false; - k_factor = 1; -} - - -void IndexRefineFlat::train (idx_t n, const float *x) -{ - base_index->train (n, x); - is_trained = true; -} - -void IndexRefineFlat::add (idx_t n, const float *x) { - FAISS_THROW_IF_NOT (is_trained); - base_index->add (n, x); - refine_index.add (n, x); - ntotal = refine_index.ntotal; -} - -void IndexRefineFlat::reset () -{ - base_index->reset (); - refine_index.reset (); - ntotal = 0; -} - -namespace { - -typedef faiss::Index::idx_t idx_t; - -template -static void reorder_2_heaps ( - idx_t n, - idx_t k, idx_t *labels, float *distances, - idx_t k_base, const idx_t *base_labels, const float *base_distances) -{ -#pragma omp parallel for - for (idx_t i = 0; i < n; i++) { - idx_t *idxo = labels + i * k; - float *diso = distances + i * k; - const idx_t *idxi = base_labels + i * k_base; - const float *disi = base_distances + i * k_base; - - heap_heapify (k, diso, idxo, disi, idxi, k); - if (k_base != k) { // add remaining elements - heap_addn (k, diso, idxo, disi + k, idxi + k, k_base - k); - } - heap_reorder (k, diso, idxo); - } -} - - -} // anonymous namespace - - -void IndexRefineFlat::search ( - idx_t n, const float *x, idx_t k, - float *distances, idx_t *labels) const -{ - FAISS_THROW_IF_NOT (is_trained); - idx_t k_base = idx_t (k * k_factor); - idx_t * base_labels = labels; - float * base_distances = distances; - ScopeDeleter del1; - ScopeDeleter del2; - - - if (k != k_base) { - base_labels = new idx_t [n * k_base]; - del1.set (base_labels); - base_distances = new float [n * k_base]; - del2.set (base_distances); - } - - base_index->search (n, x, k_base, base_distances, base_labels); - - for (int i = 0; i < n * k_base; i++) - assert (base_labels[i] >= -1 && - base_labels[i] < ntotal); - - // compute refined distances - refine_index.compute_distance_subset ( - n, x, k_base, base_distances, base_labels); - - // sort and store result - if (metric_type == METRIC_L2) { - typedef CMax C; - reorder_2_heaps ( - n, k, labels, distances, - k_base, base_labels, base_distances); - - } else if (metric_type == METRIC_INNER_PRODUCT) { - typedef CMin C; - reorder_2_heaps ( - n, k, labels, distances, - k_base, base_labels, base_distances); - } else { - FAISS_THROW_MSG("Metric type not supported"); - } - -} - - - -IndexRefineFlat::~IndexRefineFlat () -{ - if (own_fields) delete base_index; -} /*************************************************** * IndexFlat1D diff --git a/faiss/IndexFlat.h b/faiss/IndexFlat.h index 9d15930a0b..6a71cea5ca 100644 --- a/faiss/IndexFlat.h +++ b/faiss/IndexFlat.h @@ -94,47 +94,6 @@ struct IndexFlatL2:IndexFlat { -/** Index that queries in a base_index (a fast one) and refines the - * results with an exact search, hopefully improving the results. - */ -struct IndexRefineFlat: Index { - - /// storage for full vectors - IndexFlat refine_index; - - /// faster index to pre-select the vectors that should be filtered - Index *base_index; - bool own_fields; ///< should the base index be deallocated? - - /// factor between k requested in search and the k requested from - /// the base_index (should be >= 1) - float k_factor; - - /// intitialize from empty index - explicit IndexRefineFlat (Index *base_index); - - /// initialize from index and corresponding data - IndexRefineFlat(Index *base_index, const float *xb); - - IndexRefineFlat (); - - void train(idx_t n, const float* x) override; - - void add(idx_t n, const float* x) override; - - void reset() override; - - void search( - idx_t n, - const float* x, - idx_t k, - float* distances, - idx_t* labels) const override; - - ~IndexRefineFlat() override; -}; - - /// optimized version for 1D "vectors". struct IndexFlat1D:IndexFlatL2 { bool continuous_update; ///< is the permutation updated continuously? diff --git a/faiss/IndexIVF.cpp b/faiss/IndexIVF.cpp index b2fe8874e6..fc6329cc5e 100644 --- a/faiss/IndexIVF.cpp +++ b/faiss/IndexIVF.cpp @@ -88,12 +88,19 @@ void Level1Quantizer::train_q1 (size_t n, const float *x, bool verbose, MetricTy } quantizer->is_trained = true; } else if (quantizer_trains_alone == 2) { - if (verbose) + if (verbose) { printf ( "Training L2 quantizer on %zd vectors in %zdD%s\n", n, d, clustering_index ? "(user provided index)" : ""); - FAISS_THROW_IF_NOT (metric_type == METRIC_L2); + } + // also accept spherical centroids because in that case + // L2 and IP are equivalent + FAISS_THROW_IF_NOT ( + metric_type == METRIC_L2 || + (metric_type == METRIC_INNER_PRODUCT && cp.spherical) + ); + Clustering clus (d, nlist, cp); if (!clustering_index) { IndexFlatL2 assigner (d); @@ -263,23 +270,76 @@ void IndexIVF::set_direct_map_type (DirectMap::Type type) direct_map.set_type (type, invlists, ntotal); } - +/** It is a sad fact of software that a conceptually simple function like this + * becomes very complex when you factor in several ways of parallelizing + + * interrupt/error handling + collecting stats + min/max collection. The + * codepath that is used 95% of time is the one for parallel_mode = 0 */ void IndexIVF::search (idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const { - std::unique_ptr idx(new idx_t[n * nprobe]); - std::unique_ptr coarse_dis(new float[n * nprobe]); - double t0 = getmillisecs(); - quantizer->search (n, x, nprobe, coarse_dis.get(), idx.get()); - indexIVF_stats.quantization_time += getmillisecs() - t0; - t0 = getmillisecs(); - invlists->prefetch_lists (idx.get(), n * nprobe); + // search function for a subset of queries + auto sub_search_func = [this, k] + (idx_t n, const float *x, float *distances, idx_t *labels, + IndexIVFStats *ivf_stats) { + + std::unique_ptr idx(new idx_t[n * nprobe]); + std::unique_ptr coarse_dis(new float[n * nprobe]); + + double t0 = getmillisecs(); + quantizer->search (n, x, nprobe, coarse_dis.get(), idx.get()); + + double t1 = getmillisecs(); + invlists->prefetch_lists (idx.get(), n * nprobe); + + search_preassigned (n, x, k, idx.get(), coarse_dis.get(), + distances, labels, false, nullptr, ivf_stats); + double t2 = getmillisecs(); + ivf_stats->quantization_time += t1 - t0; + ivf_stats->search_time += t2 - t0; + }; + + + if ((parallel_mode & ~PARALLEL_MODE_NO_HEAP_INIT) == 0) { + int nt = std::min(omp_get_max_threads(), int(n)); + std::vector stats(nt); + std::mutex exception_mutex; + std::string exception_string; + +#pragma omp parallel for if (nt > 1) + for(idx_t slice = 0; slice < nt; slice++) { + IndexIVFStats local_stats; + idx_t i0 = n * slice / nt; + idx_t i1 = n * (slice + 1) / nt; + if (i1 > i0) { + try { + sub_search_func( + i1 - i0, x + i0 * d, + distances + i0 * k, labels + i0 * k, + &stats[slice] + ); + } catch(const std::exception & e) { + std::lock_guard lock(exception_mutex); + exception_string = e.what(); + } + } + } + + if (!exception_string.empty()) { + FAISS_THROW_MSG (exception_string.c_str()); + } + + // collect stats + for(idx_t slice = 0; slice < nt; slice++) { + indexIVF_stats.add(stats[slice]); + } + } else { + // handle paralellization at level below (or don't run in parallel at all) + sub_search_func(n, x, distances, labels, &indexIVF_stats); + } + - search_preassigned (n, x, k, idx.get(), coarse_dis.get(), - distances, labels, false); - indexIVF_stats.search_time += getmillisecs() - t0; } @@ -288,7 +348,8 @@ void IndexIVF::search_preassigned (idx_t n, const float *x, idx_t k, const float *coarse_dis , float *distances, idx_t *labels, bool store_pairs, - const IVFSearchParameters *params) const + const IVFSearchParameters *params, + IndexIVFStats *ivf_stats) const { long nprobe = params ? params->nprobe : this->nprobe; long max_codes = params ? params->max_codes : this->max_codes; @@ -305,13 +366,12 @@ void IndexIVF::search_preassigned (idx_t n, const float *x, idx_t k, int pmode = this->parallel_mode & ~PARALLEL_MODE_NO_HEAP_INIT; bool do_heap_init = !(this->parallel_mode & PARALLEL_MODE_NO_HEAP_INIT); - // don't start parallel section if single query bool do_parallel = omp_get_max_threads() >= 2 && ( - pmode == 0 ? n > 1 : + pmode == 0 ? false : + pmode == 3 ? n > 1 : pmode == 1 ? nprobe > 1 : nprobe * n > 1); - #pragma omp parallel if(do_parallel) reduction(+: nlistv, ndis, nheap) { InvertedListScanner *scanner = get_InvertedListScanner(store_pairs); @@ -409,7 +469,7 @@ void IndexIVF::search_preassigned (idx_t n, const float *x, idx_t k, * Actual loops, depending on parallel_mode ****************************************************/ - if (pmode == 0) { + if (pmode == 0 || pmode == 3) { #pragma omp for for (idx_t i = 0; i < n; i++) { @@ -527,11 +587,12 @@ void IndexIVF::search_preassigned (idx_t n, const float *x, idx_t k, } } - indexIVF_stats.nq += n; - indexIVF_stats.nlist += nlistv; - indexIVF_stats.ndis += ndis; - indexIVF_stats.nheap_updates += nheap; - + if (ivf_stats) { + ivf_stats->nq += n; + ivf_stats->nlist += nlistv; + ivf_stats->ndis += ndis; + ivf_stats->nheap_updates += nheap; + } } @@ -551,7 +612,7 @@ void IndexIVF::range_search (idx_t nx, const float *x, float radius, invlists->prefetch_lists (keys.get(), nx * nprobe); range_search_preassigned (nx, x, radius, keys.get (), coarse_dis.get (), - result); + result, false, nullptr, &indexIVF_stats); indexIVF_stats.search_time += getmillisecs() - t0; } @@ -561,7 +622,8 @@ void IndexIVF::range_search_preassigned ( const idx_t *keys, const float *coarse_dis, RangeSearchResult *result, bool store_pairs, - const IVFSearchParameters *params) const + const IVFSearchParameters *params, + IndexIVFStats *stats) const { long nprobe = params ? params->nprobe : this->nprobe; long max_codes = params ? params->max_codes : this->max_codes; @@ -574,7 +636,15 @@ void IndexIVF::range_search_preassigned ( std::vector all_pres (omp_get_max_threads()); -#pragma omp parallel reduction(+: nlistv, ndis) + int pmode = this->parallel_mode & ~PARALLEL_MODE_NO_HEAP_INIT; + // don't start parallel section if single query + bool do_parallel = omp_get_max_threads() >= 2 && ( + pmode == 3 ? false : + pmode == 0 ? nx > 1 : + pmode == 1 ? nprobe > 1 : + nprobe * nx > 1); + +#pragma omp parallel if(do_parallel) reduction(+: nlistv, ndis) { RangeSearchPartialResult pres(result); std::unique_ptr scanner @@ -680,9 +750,11 @@ void IndexIVF::range_search_preassigned ( } } - indexIVF_stats.nq += nx; - indexIVF_stats.nlist += nlistv; - indexIVF_stats.ndis += ndis; + if (stats) { + stats->nq += nx; + stats->nlist += nlistv; + stats->ndis += ndis; + } } @@ -975,6 +1047,17 @@ void IndexIVFStats::reset() memset ((void*)this, 0, sizeof (*this)); } +void IndexIVFStats::add (const IndexIVFStats & other) +{ + nq += other.nq; + nlist += other.nlist; + ndis += other.ndis; + nheap_updates += other.nheap_updates; + quantization_time += other.quantization_time; + search_time += other.search_time; + +} + IndexIVFStats indexIVF_stats; diff --git a/faiss/IndexIVF.h b/faiss/IndexIVF.h index 1200f9511e..3743642e6c 100644 --- a/faiss/IndexIVF.h +++ b/faiss/IndexIVF.h @@ -76,6 +76,7 @@ struct IVFSearchParameters { struct InvertedListScanner; +struct IndexIVFStats; /** Index based on a inverted file (IVF) * @@ -109,9 +110,10 @@ struct IndexIVF: Index, Level1Quantizer { /** Parallel mode determines how queries are parallelized with OpenMP * - * 0 (default): parallelize over queries + * 0 (default): split over queries * 1: parallelize over inverted lists * 2: parallelize over both + * 3: split over queries with a finer granularity * * PARALLEL_MODE_NO_HEAP_INIT: binary or with the previous to * prevent the heap to be initialized and finalized @@ -178,13 +180,15 @@ struct IndexIVF: Index, Level1Quantizer { * instead in upper/lower 32 bit of result, * instead of ids (used for reranking). * @param params used to override the object's search parameters + * @param stats search stats to be updated (can be null) */ virtual void search_preassigned ( idx_t n, const float *x, idx_t k, const idx_t *assign, const float *centroid_dis, float *distances, idx_t *labels, bool store_pairs, - const IVFSearchParameters *params=nullptr + const IVFSearchParameters *params=nullptr, + IndexIVFStats *stats=nullptr ) const; /** assign the vectors, then call search_preassign */ @@ -199,7 +203,8 @@ struct IndexIVF: Index, Level1Quantizer { const idx_t *keys, const float *coarse_dis, RangeSearchResult *result, bool store_pairs=false, - const IVFSearchParameters *params=nullptr) const; + const IVFSearchParameters *params=nullptr, + IndexIVFStats *stats=nullptr) const; /// get a scanner for this index (store_pairs means ignore labels) virtual InvertedListScanner *get_InvertedListScanner ( @@ -365,6 +370,7 @@ struct IndexIVFStats { IndexIVFStats () {reset (); } void reset (); + void add (const IndexIVFStats & other); }; // global var that collects them all diff --git a/faiss/IndexIVFFlat.cpp b/faiss/IndexIVFFlat.cpp index ac1199a77c..80ecba5933 100644 --- a/faiss/IndexIVFFlat.cpp +++ b/faiss/IndexIVFFlat.cpp @@ -317,7 +317,8 @@ void IndexIVFFlatDedup::search_preassigned ( const float *centroid_dis, float *distances, idx_t *labels, bool store_pairs, - const IVFSearchParameters *params) const + const IVFSearchParameters *params, + IndexIVFStats *stats) const { FAISS_THROW_IF_NOT_MSG ( !store_pairs, "store_pairs not supported in IVFDedup"); diff --git a/faiss/IndexIVFFlat.h b/faiss/IndexIVFFlat.h index 45ae280918..940ba60c57 100644 --- a/faiss/IndexIVFFlat.h +++ b/faiss/IndexIVFFlat.h @@ -77,7 +77,8 @@ struct IndexIVFFlatDedup: IndexIVFFlat { const float *centroid_dis, float *distances, idx_t *labels, bool store_pairs, - const IVFSearchParameters *params=nullptr + const IVFSearchParameters *params=nullptr, + IndexIVFStats *stats=nullptr ) const override; size_t remove_ids(const IDSelector& sel) override; diff --git a/faiss/IndexIVFPQFastScan.cpp b/faiss/IndexIVFPQFastScan.cpp index 4149904955..55d14a31da 100644 --- a/faiss/IndexIVFPQFastScan.cpp +++ b/faiss/IndexIVFPQFastScan.cpp @@ -154,7 +154,7 @@ void IndexIVFPQFastScan::train_residual (idx_t n, const float *x_in) if (verbose) { printf ("training %zdx%zd product quantizer on %zd vectors in %dD\n", - pq.M, pq.ksub, n, d); + pq.M, pq.ksub, long(n), d); } pq.verbose = verbose; pq.train (n, trainset); @@ -413,17 +413,23 @@ void IndexIVFPQFastScan::compute_LUT( AlignedTable ip_table(n * dim12); pq.compute_inner_prod_tables (n, x, ip_table.get()); -#pragma omp parallel if (n * nprobe > 8000) - for(idx_t i = 0; i < n; i++) { - for(idx_t j = 0; j < nprobe; j++) { - size_t ij = i * nprobe + j; +#pragma omp parallel for if (n * nprobe > 8000) + for(idx_t ij = 0; ij < n * nprobe; ij++) { + idx_t i = ij / nprobe; + float *tab = dis_tables.get() + ij * dim12; + idx_t cij = coarse_ids[ij]; + if (cij >= 0) { fvec_madd_avx ( dim12, - precomputed_table.get() + coarse_ids[ij] * dim12, + precomputed_table.get() + cij * dim12, -2, ip_table.get() + i * dim12, - dis_tables.get() + ij * dim12 + tab ); + } else { + // fill with NaNs so that they are ignored during + // LUT quantization + memset (tab, -1, sizeof(float) * dim12); } } @@ -433,12 +439,18 @@ void IndexIVFPQFastScan::compute_LUT( biases.resize(n * nprobe); memset(biases.get(), 0, sizeof(float) * n * nprobe); -#pragma omp parallel if (n > 8000) - for(idx_t i = 0; i < n; i++) { - for(idx_t j = 0; j < nprobe; j++) { +#pragma omp parallel for if (n * nprobe > 8000) + for(idx_t ij = 0; ij < n * nprobe; ij++) { + idx_t i = ij / nprobe; + float *xij = &xrel[ij * d]; + idx_t cij = coarse_ids[ij]; + + if (cij >= 0) { ivfpq.quantizer->compute_residual( - x + i * d, &xrel[(i * nprobe + j) * d], - coarse_ids[i * nprobe + j]); + x + i * d, xij, cij); + } else { + // will fill with NaNs + memset(xij, -1, sizeof(float) * d); } } @@ -503,8 +515,8 @@ void IndexIVFPQFastScan::compute_LUT_uint8( } uint64_t t1 = get_cy(); -#pragma omp parallel if (n > 8000) - for(size_t i = 0; i < n; i++) { +#pragma omp parallel for if (n > 100) + for(int64_t i = 0; i < n; i++) { const float *t_in = dis_tables_float.get() + i * dim123; const float *b_in = nullptr; uint8_t *t_out = dis_tables.get() + i * dim123_2; @@ -569,8 +581,8 @@ void IndexIVFPQFastScan::search_dispatch_implem( } else if (impl >= 10 && impl <= 13) { size_t ndis = 0, nlist_visited = 0; - int nt = std::min(omp_get_max_threads(), int(n)); - if (nt < 2) { + + if (n < 2) { if (impl == 12 || impl == 13) { search_implem_12 (n, x, k, distances, labels, impl, &ndis, &nlist_visited); @@ -580,10 +592,27 @@ void IndexIVFPQFastScan::search_dispatch_implem( } } else { // explicitly slice over threads -#pragma omp parallel for num_threads(nt) reduction(+: ndis, nlist_visited) - for (int slice = 0; slice < nt; slice++) { - idx_t i0 = n * slice / nt; - idx_t i1 = n * (slice + 1) / nt; + int nslice; + if (n <= omp_get_max_threads()) { + nslice = n; + } else if (by_residual && metric_type == METRIC_L2) { + // make sure we don't make too big LUT tables + size_t lut_size_per_query = + pq.M * pq.ksub * nprobe * (sizeof(float) + sizeof(uint8_t)); + + size_t max_lut_size = precomputed_table_max_bytes; + // how many queries we can handle within mem budget + size_t nq_ok = std::max(max_lut_size / lut_size_per_query, size_t(1)); + nslice = roundup(std::max(size_t(n / nq_ok), size_t(1)), omp_get_max_threads()); + } else { + // LUTs unlikely to be a limiting factor + nslice = omp_get_max_threads(); + } + +#pragma omp parallel for reduction(+: ndis, nlist_visited) + for (int slice = 0; slice < nslice; slice++) { + idx_t i0 = n * slice / nslice; + idx_t i1 = n * (slice + 1) / nslice; float *dis_i = distances + i0 * k; idx_t *lab_i = labels + i0 * k; if (impl == 12 || impl == 13) { @@ -921,9 +950,9 @@ void IndexIVFPQFastScan::search_implem_12( TIC; struct QC { - int qno; - int list_no; - int rank; + int qno; // sequence number of the query + int list_no; // list to visit + int rank; // this is the rank'th result of the coarse quantizer }; bool single_LUT = !(by_residual && metric_type == METRIC_L2); @@ -947,6 +976,8 @@ void IndexIVFPQFastScan::search_implem_12( } TIC; + // prepare the result handlers + std::unique_ptr > handler; AlignedTable tmp_distances; @@ -979,6 +1010,7 @@ void IndexIVFPQFastScan::search_implem_12( while (i0 < qcs.size()) { uint64_t tt0 = get_cy(); + // find all queries that access this inverted list int list_no = qcs[i0].list_no; size_t i1 = i0 + 1; @@ -996,6 +1028,7 @@ void IndexIVFPQFastScan::search_implem_12( continue; } + // re-organize LUTs and biases into the right order int nc = i1 - i0; std::vector q_map(nc), lut_entries(nc); @@ -1017,11 +1050,15 @@ void IndexIVFPQFastScan::search_implem_12( LUT.get() ); + // access the inverted list + ndis += (i1 - i0) * list_size; InvertedLists::ScopedCodes codes(invlists, list_no); InvertedLists::ScopedIds ids(invlists, list_no); + // prepare the handler + handler->ntotal = list_size; handler->q_map = q_map.data(); handler->id_map = ids.get(); @@ -1039,14 +1076,16 @@ void IndexIVFPQFastScan::search_implem_12( else DISPATCH(ReservoirHC) else DISPATCH(SingleResultHC) + // prepare for next loop + i0 = i1; + uint64_t tt2 = get_cy(); t_copy_pack += tt1 - tt0; t_scan += tt2 - tt1; - i0 = i1; } TIC; - // labels is the same array + // labels is in-place for HeapHC handler->to_flat_arrays( distances, labels, skip & 16 ? nullptr : normalizers.get() diff --git a/faiss/IndexIVFPQR.cpp b/faiss/IndexIVFPQR.cpp index c2897fffcb..04259a8759 100644 --- a/faiss/IndexIVFPQR.cpp +++ b/faiss/IndexIVFPQR.cpp @@ -97,13 +97,13 @@ void IndexIVFPQR::add_core (idx_t n, const float *x, const idx_t *xids, #define TOC get_cycles () - t0 -void IndexIVFPQR::search_preassigned (idx_t n, const float *x, idx_t k, - const idx_t *idx, - const float *L1_dis, - float *distances, idx_t *labels, - bool store_pairs, - const IVFSearchParameters *params - ) const +void IndexIVFPQR::search_preassigned ( + idx_t n, const float *x, idx_t k, + const idx_t *idx, const float *L1_dis, + float *distances, idx_t *labels, + bool store_pairs, + const IVFSearchParameters *params, IndexIVFStats *stats + ) const { uint64_t t0; TIC; diff --git a/faiss/IndexIVFPQR.h b/faiss/IndexIVFPQR.h index 934b912d25..53b3d74fff 100644 --- a/faiss/IndexIVFPQR.h +++ b/faiss/IndexIVFPQR.h @@ -55,7 +55,8 @@ struct IndexIVFPQR: IndexIVFPQ { const float *centroid_dis, float *distances, idx_t *labels, bool store_pairs, - const IVFSearchParameters *params=nullptr + const IVFSearchParameters *params=nullptr, + IndexIVFStats *stats=nullptr ) const override; IndexIVFPQR(); diff --git a/faiss/IndexPQ.cpp b/faiss/IndexPQ.cpp index d420ff4a08..fdb57a6d9e 100644 --- a/faiss/IndexPQ.cpp +++ b/faiss/IndexPQ.cpp @@ -129,9 +129,10 @@ void IndexPQ::reconstruct (idx_t key, float * recons) const namespace { - -struct PQDis: DistanceComputer { +template +struct PQDistanceComputer: DistanceComputer { size_t d; + MetricType metric; Index::idx_t nb; const uint8_t *codes; size_t code_size; @@ -144,10 +145,11 @@ struct PQDis: DistanceComputer { { const uint8_t *code = codes + i * code_size; const float *dt = precomputed_table.data(); + PQDecoder decoder(code, pq.nbits); float accu = 0; for (int j = 0; j < pq.M; j++) { - accu += dt[*code++]; - dt += 256; + accu += dt[decoder.decode()]; + dt += 1 << decoder.nbits; } ndis++; return accu; @@ -155,33 +157,43 @@ struct PQDis: DistanceComputer { float symmetric_dis(idx_t i, idx_t j) override { + FAISS_THROW_IF_NOT(sdc); const float * sdci = sdc; float accu = 0; - const uint8_t *codei = codes + i * code_size; - const uint8_t *codej = codes + j * code_size; + PQDecoder codei (codes + i * code_size, pq.nbits); + PQDecoder codej (codes + j * code_size, pq.nbits); for (int l = 0; l < pq.M; l++) { - accu += sdci[(*codei++) + (*codej++) * 256]; - sdci += 256 * 256; + accu += sdci[codei.decode() + (codej.decode() << codei.nbits)]; + sdci += uint64_t(1) << (2 * codei.nbits); } + ndis++; return accu; } - explicit PQDis(const IndexPQ& storage, const float* /*q*/ = nullptr) - : pq(storage.pq) { + explicit PQDistanceComputer(const IndexPQ& storage) + : pq(storage.pq) { precomputed_table.resize(pq.M * pq.ksub); nb = storage.ntotal; d = storage.d; + metric = storage.metric_type; codes = storage.codes.data(); code_size = pq.code_size; - FAISS_ASSERT(pq.ksub == 256); - FAISS_ASSERT(pq.sdc_table.size() == pq.ksub * pq.ksub * pq.M); - sdc = pq.sdc_table.data(); + if (pq.sdc_table.size() == pq.ksub * pq.ksub * pq.M) { + sdc = pq.sdc_table.data(); + } else { + sdc = nullptr; + } ndis = 0; } void set_query(const float *x) override { - pq.compute_distance_table(x, precomputed_table.data()); + if (metric == METRIC_L2) { + pq.compute_distance_table(x, precomputed_table.data()); + } else { + pq.compute_inner_prod_table(x, precomputed_table.data()); + } + } }; @@ -190,8 +202,13 @@ struct PQDis: DistanceComputer { DistanceComputer * IndexPQ::get_distance_computer() const { - FAISS_THROW_IF_NOT(pq.nbits == 8); - return new PQDis(*this); + if (pq.nbits == 8) { + return new PQDistanceComputer(*this); + } else if (pq.nbits == 16) { + return new PQDistanceComputer(*this); + } else { + return new PQDistanceComputer(*this); + } } diff --git a/faiss/IndexPQFastScan.cpp b/faiss/IndexPQFastScan.cpp index dc8295f972..a5178cd874 100644 --- a/faiss/IndexPQFastScan.cpp +++ b/faiss/IndexPQFastScan.cpp @@ -14,6 +14,7 @@ #include + #include #include #include @@ -38,6 +39,7 @@ IndexPQFastScan::IndexPQFastScan( Index(d, metric), pq(d, M, nbits), bbs(bbs), ntotal2(0), M2(roundup(M, 2)) { + FAISS_THROW_IF_NOT(nbits == 4); is_trained = false; } @@ -231,6 +233,7 @@ void IndexPQFastScan::search_dispatch_implem( } } + if (implem == 1) { FAISS_THROW_IF_NOT(orig_codes); FAISS_THROW_IF_NOT(is_max); diff --git a/faiss/IndexPQFastScan.h b/faiss/IndexPQFastScan.h index ee32f2fdb4..bdc0177939 100644 --- a/faiss/IndexPQFastScan.h +++ b/faiss/IndexPQFastScan.h @@ -85,6 +85,7 @@ struct IndexPQFastScan: Index { idx_t n, const float* x, idx_t k, float* distances, idx_t* labels) const; + template void search_implem_12( idx_t n, const float* x, idx_t k, diff --git a/faiss/IndexPreTransform.cpp b/faiss/IndexPreTransform.cpp index 7e9a37619c..06b688a146 100644 --- a/faiss/IndexPreTransform.cpp +++ b/faiss/IndexPreTransform.cpp @@ -15,6 +15,7 @@ #include #include +#include namespace faiss { @@ -282,6 +283,52 @@ void IndexPreTransform::sa_decode (idx_t n, const uint8_t *bytes, } } +namespace { + +struct PreTransformDistanceComputer: DistanceComputer { + const IndexPreTransform *index; + std::unique_ptr sub_dc; + std::unique_ptr query; + + explicit PreTransformDistanceComputer(const IndexPreTransform *index): + index(index), + sub_dc(index->index->get_distance_computer()) + {} + + void set_query(const float *x) override { + const float *xt = index->apply_chain (1, x); + if (xt == x) { + sub_dc->set_query (x); + } else { + query.reset(xt); + sub_dc->set_query (xt); + } + } + + float symmetric_dis(idx_t i, idx_t j) override + { + return sub_dc->symmetric_dis(i, j); + } + + float operator () (idx_t i) override + { + return (*sub_dc)(i); + } + +}; + + +} // anonymous namespace + + +DistanceComputer * IndexPreTransform::get_distance_computer() const { + if (chain.empty()) { + return index->get_distance_computer(); + } else { + return new PreTransformDistanceComputer(this); + } +} + } // namespace faiss diff --git a/faiss/IndexPreTransform.h b/faiss/IndexPreTransform.h index a3becc9188..370ea76d5a 100644 --- a/faiss/IndexPreTransform.h +++ b/faiss/IndexPreTransform.h @@ -77,6 +77,8 @@ struct IndexPreTransform: Index { void reverse_chain (idx_t n, const float* xt, float* x) const; + DistanceComputer * get_distance_computer() const override; + /* standalone codec interface */ size_t sa_code_size () const override; void sa_encode (idx_t n, const float *x, diff --git a/faiss/IndexRefine.cpp b/faiss/IndexRefine.cpp new file mode 100644 index 0000000000..03d262f100 --- /dev/null +++ b/faiss/IndexRefine.cpp @@ -0,0 +1,253 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + + +#include + +#include +#include +#include +#include +#include +#include + +namespace faiss { + + + +/*************************************************** + * IndexRefine + ***************************************************/ + +IndexRefine::IndexRefine (Index *base_index, Index *refine_index): + Index (base_index->d, base_index->metric_type), + base_index (base_index), + refine_index (refine_index) +{ + own_fields = own_refine_index = false; + FAISS_THROW_IF_NOT (base_index->d == refine_index->d); + FAISS_THROW_IF_NOT (base_index->metric_type == refine_index->metric_type); + is_trained = base_index->is_trained && refine_index->is_trained; + FAISS_THROW_IF_NOT (base_index->ntotal == refine_index->ntotal); + ntotal = base_index->ntotal; +} + +IndexRefine::IndexRefine (): + base_index(nullptr), refine_index(nullptr), + own_fields(false), own_refine_index(false) +{ +} + +void IndexRefine::train (idx_t n, const float *x) +{ + base_index->train (n, x); + refine_index->train (n, x); + is_trained = true; +} + +void IndexRefine::add (idx_t n, const float *x) { + FAISS_THROW_IF_NOT (is_trained); + base_index->add (n, x); + refine_index->add (n, x); + ntotal = refine_index->ntotal; +} + +void IndexRefine::reset () +{ + base_index->reset (); + refine_index->reset (); + ntotal = 0; +} + +namespace { + +typedef faiss::Index::idx_t idx_t; + +template +static void reorder_2_heaps ( + idx_t n, + idx_t k, idx_t *labels, float *distances, + idx_t k_base, const idx_t *base_labels, const float *base_distances) +{ +#pragma omp parallel for + for (idx_t i = 0; i < n; i++) { + idx_t *idxo = labels + i * k; + float *diso = distances + i * k; + const idx_t *idxi = base_labels + i * k_base; + const float *disi = base_distances + i * k_base; + + heap_heapify (k, diso, idxo, disi, idxi, k); + if (k_base != k) { // add remaining elements + heap_addn (k, diso, idxo, disi + k, idxi + k, k_base - k); + } + heap_reorder (k, diso, idxo); + } +} + + +} // anonymous namespace + + + +void IndexRefine::search ( + idx_t n, const float *x, idx_t k, + float *distances, idx_t *labels) const +{ + FAISS_THROW_IF_NOT (is_trained); + idx_t k_base = idx_t (k * k_factor); + idx_t * base_labels = labels; + float * base_distances = distances; + ScopeDeleter del1; + ScopeDeleter del2; + + if (k != k_base) { + base_labels = new idx_t [n * k_base]; + del1.set (base_labels); + base_distances = new float [n * k_base]; + del2.set (base_distances); + } + + base_index->search (n, x, k_base, base_distances, base_labels); + + for (int i = 0; i < n * k_base; i++) + assert (base_labels[i] >= -1 && + base_labels[i] < ntotal); + + // parallelize over queries +#pragma omp parallel if (n > 1) + { + std::unique_ptr dc( + refine_index->get_distance_computer() + ); +#pragma omp for + for (idx_t i = 0; i < n; i++) { + dc->set_query(x + i * d); + idx_t ij = i * k_base; + for (idx_t j = 0; j < k_base; j++) { + idx_t idx = base_labels[ij]; + if (idx < 0) break; + base_distances[ij] = (*dc)(idx); + ij++; + } + } + } + + // sort and store result + if (metric_type == METRIC_L2) { + typedef CMax C; + reorder_2_heaps ( + n, k, labels, distances, + k_base, base_labels, base_distances); + + } else if (metric_type == METRIC_INNER_PRODUCT) { + typedef CMin C; + reorder_2_heaps ( + n, k, labels, distances, + k_base, base_labels, base_distances); + } else { + FAISS_THROW_MSG("Metric type not supported"); + } + +} + +void IndexRefine::reconstruct (idx_t key, float * recons) const { + refine_index->reconstruct (key, recons); +} + + + + +IndexRefine::~IndexRefine () +{ + if (own_fields) delete base_index; + if (own_refine_index) delete refine_index; +} + + +/*************************************************** + * IndexRefineFlat + ***************************************************/ + +IndexRefineFlat::IndexRefineFlat (Index *base_index): + IndexRefine(base_index, new IndexFlat(base_index->d, base_index->metric_type)) +{ + is_trained = base_index->is_trained; + own_refine_index = true; + FAISS_THROW_IF_NOT_MSG (base_index->ntotal == 0, + "base_index should be empty in the beginning"); +} + + +IndexRefineFlat::IndexRefineFlat (Index *base_index, const float *xb): + IndexRefine (base_index, new IndexFlat(base_index->d, base_index->metric_type)) +{ + is_trained = base_index->is_trained; + own_refine_index = true; + refine_index->add (base_index->ntotal, xb); + ntotal = base_index->ntotal; +} + +IndexRefineFlat::IndexRefineFlat(): + IndexRefine() +{ + own_refine_index = true; +} + + +void IndexRefineFlat::search ( + idx_t n, const float *x, idx_t k, + float *distances, idx_t *labels) const +{ + FAISS_THROW_IF_NOT (is_trained); + idx_t k_base = idx_t (k * k_factor); + idx_t * base_labels = labels; + float * base_distances = distances; + ScopeDeleter del1; + ScopeDeleter del2; + + if (k != k_base) { + base_labels = new idx_t [n * k_base]; + del1.set (base_labels); + base_distances = new float [n * k_base]; + del2.set (base_distances); + } + + base_index->search (n, x, k_base, base_distances, base_labels); + + for (int i = 0; i < n * k_base; i++) + assert (base_labels[i] >= -1 && + base_labels[i] < ntotal); + + // compute refined distances + auto rf = dynamic_cast(refine_index); + FAISS_THROW_IF_NOT(rf); + + rf->compute_distance_subset ( + n, x, k_base, base_distances, base_labels); + + // sort and store result + if (metric_type == METRIC_L2) { + typedef CMax C; + reorder_2_heaps ( + n, k, labels, distances, + k_base, base_labels, base_distances); + + } else if (metric_type == METRIC_INNER_PRODUCT) { + typedef CMin C; + reorder_2_heaps ( + n, k, labels, distances, + k_base, base_labels, base_distances); + } else { + FAISS_THROW_MSG("Metric type not supported"); + } + +} + + + + +} // namespace faiss diff --git a/faiss/IndexRefine.h b/faiss/IndexRefine.h new file mode 100644 index 0000000000..5cfd945875 --- /dev/null +++ b/faiss/IndexRefine.h @@ -0,0 +1,73 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + + +namespace faiss { + + +/** Index that queries in a base_index (a fast one) and refines the + * results with an exact search, hopefully improving the results. + */ +struct IndexRefine: Index { + + /// faster index to pre-select the vectors that should be filtered + Index *base_index; + + /// refinement index + Index *refine_index; + + bool own_fields; ///< should the base index be deallocated? + bool own_refine_index; ///< same with the refinement index + + /// factor between k requested in search and the k requested from + /// the base_index (should be >= 1) + float k_factor = 1; + + /// intitialize from empty index + IndexRefine (Index *base_index, Index *refine_index); + + IndexRefine (); + + void train(idx_t n, const float* x) override; + + void add(idx_t n, const float* x) override; + + void reset() override; + + void search( + idx_t n, const float* x, idx_t k, + float* distances, idx_t* labels) const override; + + // reconstruct is routed to the refine_index + void reconstruct (idx_t key, float * recons) const override; + + ~IndexRefine() override; +}; + + +/** Version where the refinement index is an IndexFlat. It has one additional + * constructor that takes a table of elements to add to the flat refinement + * index */ +struct IndexRefineFlat: IndexRefine { + explicit IndexRefineFlat (Index *base_index); + IndexRefineFlat(Index *base_index, const float *xb); + + IndexRefineFlat(); + + void search( + idx_t n, const float* x, idx_t k, + float* distances, idx_t* labels) const override; + +}; + + + +} // namespace faiss diff --git a/faiss/IndexScalarQuantizer.cpp b/faiss/IndexScalarQuantizer.cpp index de139a307c..d168f191d4 100644 --- a/faiss/IndexScalarQuantizer.cpp +++ b/faiss/IndexScalarQuantizer.cpp @@ -192,7 +192,7 @@ void IndexIVFScalarQuantizer::encode_vectors(idx_t n, const float* x, size_t coarse_size = include_listnos ? coarse_code_size () : 0; memset(codes, 0, (code_size + coarse_size) * n); -#pragma omp parallel if(n > 1) +#pragma omp parallel if(n > 1000) { std::vector residual (d); @@ -222,7 +222,7 @@ void IndexIVFScalarQuantizer::sa_decode (idx_t n, const uint8_t *codes, std::unique_ptr squant (sq.select_quantizer ()); size_t coarse_size = coarse_code_size (); -#pragma omp parallel if(n > 1) +#pragma omp parallel if(n > 1000) { std::vector residual (d); diff --git a/faiss/IndexScalarQuantizer.h b/faiss/IndexScalarQuantizer.h index bb0e20b65f..db6f3532b9 100644 --- a/faiss/IndexScalarQuantizer.h +++ b/faiss/IndexScalarQuantizer.h @@ -82,7 +82,7 @@ struct IndexScalarQuantizer: Index { /** An IVF implementation where the components of the residuals are - * encoded with a scalar uniform quantizer. All distance computations + * encoded with a scalar quantizer. All distance computations * are asymmetric, so the encoded vectors are decoded and approximate * distances are computed. */ diff --git a/faiss/impl/AuxIndexStructures.h b/faiss/impl/AuxIndexStructures.h index ecc7d64493..2fabb81222 100644 --- a/faiss/impl/AuxIndexStructures.h +++ b/faiss/impl/AuxIndexStructures.h @@ -204,7 +204,8 @@ struct RangeSearchPartialResult: BufferList { struct DistanceComputer { using idx_t = Index::idx_t; - /// called before computing distances + /// called before computing distances. Pointer x should remain valid + /// while operator () is called virtual void set_query(const float *x) = 0; /// compute distance of vector i to current query diff --git a/faiss/impl/ProductQuantizer.h b/faiss/impl/ProductQuantizer.h index c900d9c9d4..de1334242d 100644 --- a/faiss/impl/ProductQuantizer.h +++ b/faiss/impl/ProductQuantizer.h @@ -219,12 +219,14 @@ struct PQDecoderGeneric { }; struct PQDecoder8 { + static const int nbits = 8; const uint8_t *code; PQDecoder8(const uint8_t *code, int nbits); uint64_t decode(); }; struct PQDecoder16 { + static const int nbits = 16; const uint16_t *code; PQDecoder16(const uint8_t *code, int nbits); uint64_t decode(); diff --git a/faiss/impl/ScalarQuantizer.cpp b/faiss/impl/ScalarQuantizer.cpp index 2c7d15de07..704f597389 100644 --- a/faiss/impl/ScalarQuantizer.cpp +++ b/faiss/impl/ScalarQuantizer.cpp @@ -39,8 +39,12 @@ namespace faiss { * that hides the template mess. ********************************************************************/ -#if defined(__F16C__) && defined(__AVX2__) +#ifdef __AVX2__ +#ifdef __F16C__ #define USE_F16C +#else +#warning "Cannot enable AVX optimizations in scalar quantizer if -mf16c is not set as well" +#endif #endif diff --git a/faiss/impl/index_read.cpp b/faiss/impl/index_read.cpp index 589161213d..64fb63e611 100644 --- a/faiss/impl/index_read.cpp +++ b/faiss/impl/index_read.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -551,14 +552,20 @@ Index *read_index (IOReader *f, int io_flags) { read_ProductQuantizer (&imiq->pq, f); idx = imiq; } else if(h == fourcc ("IxRF")) { - IndexRefineFlat *idxrf = new IndexRefineFlat (); + IndexRefine *idxrf = new IndexRefine (); read_index_header (idxrf, f); idxrf->base_index = read_index(f, io_flags); - idxrf->own_fields = true; - IndexFlat *rf = dynamic_cast (read_index (f, io_flags)); - std::swap (*rf, idxrf->refine_index); - delete rf; + idxrf->refine_index = read_index(f, io_flags); READ1 (idxrf->k_factor); + if (dynamic_cast(idxrf->refine_index)) { + // then make a RefineFlat with it + IndexRefine *idxrf_old = idxrf; + idxrf = new IndexRefineFlat(); + *idxrf = *idxrf_old; + delete idxrf_old; + } + idxrf->own_fields = true; + idxrf->own_refine_index = true; idx = idxrf; } else if(h == fourcc ("IxMp") || h == fourcc ("IxM2")) { bool is_map2 = h == fourcc ("IxM2"); diff --git a/faiss/impl/index_write.cpp b/faiss/impl/index_write.cpp index 1049becaba..27a3e6f789 100644 --- a/faiss/impl/index_write.cpp +++ b/faiss/impl/index_write.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -392,13 +393,13 @@ void write_index (const Index *idx, IOWriter *f) { WRITE1 (h); write_index_header (imiq, f); write_ProductQuantizer (&imiq->pq, f); - } else if(const IndexRefineFlat * idxrf = - dynamic_cast (idx)) { + } else if(const IndexRefine * idxrf = + dynamic_cast (idx)) { uint32_t h = fourcc ("IxRF"); WRITE1 (h); write_index_header (idxrf, f); write_index (idxrf->base_index, f); - write_index (&idxrf->refine_index, f); + write_index (idxrf->refine_index, f); WRITE1 (idxrf->k_factor); } else if(const IndexIDMap * idxmap = dynamic_cast (idx)) { diff --git a/faiss/index_factory.cpp b/faiss/index_factory.cpp index 748ba68b6d..a1adfe7da4 100644 --- a/faiss/index_factory.cpp +++ b/faiss/index_factory.cpp @@ -36,6 +36,8 @@ #include #include #include +#include + #include #include @@ -64,11 +66,53 @@ struct VTChain { /// what kind of training does this coarse quantizer require? char get_trains_alone(const Index *coarse_quantizer) { return + dynamic_cast(coarse_quantizer) ? 0 : + // multi index just needs to be quantized dynamic_cast(coarse_quantizer) ? 1 : dynamic_cast(coarse_quantizer) ? 2 : - 0; + 2; // for complicated indexes, we assume they can't be used as a kmeans index +} + +bool str_ends_with(const std::string& s, const std::string& suffix) +{ + return s.rfind(suffix) == std::abs(int(s.size()-suffix.size())); } +// check if ends with suffix followed by digits +bool str_ends_with_digits(const std::string& s, const std::string& suffix) +{ + int i; + for(i = s.length() - 1; i >= 0; i--) { + if (!isdigit(s[i])) break; + } + return str_ends_with(s.substr(0, i + 1), suffix); +} + +void find_matching_parentheses(const std::string &s, int & i0, int & i1) { + int st = 0; + for (int i = 0; i < s.length(); i++) { + if (s[i] == '(') { + if (st == 0) { + i0 = i; + } + st++; + } + + if (s[i] == ')') { + st--; + if (st == 0) { + i1 = i; + return; + } + if (st < 0) { + FAISS_THROW_FMT("factory string %s: unbalanced parentheses", s.c_str()); + } + } + + } + FAISS_THROW_FMT("factory string %s: unbalanced parentheses st=%d", s.c_str(), st); + +} } // anonymous namespace @@ -78,31 +122,32 @@ Index *index_factory (int d, const char *description_in, MetricType metric) metric == METRIC_INNER_PRODUCT); VTChain vts; Index *coarse_quantizer = nullptr; - std::unique_ptr parenthesis_index; + std::string parenthesis_ivf, parenthesis_refine; Index *index = nullptr; bool add_idmap = false; - bool make_IndexRefineFlat = false; + int d_in = d; ScopeDeleter1 del_coarse_quantizer, del_index; std::string description(description_in); char *ptr; - if (description.find('(') != std::string::npos) { + // handle indexes in parentheses + while (description.find('(') != std::string::npos) { // then we make a sub-index and remove the () from the description - int i0 = description.find('('); - int i1 = description.find(')'); - FAISS_THROW_IF_NOT_MSG( - i1 != std::string::npos, "string must contain closing parenthesis"); - std::string sub_description = description.substr(i0 + 1, i1 - i0 - 1); - // printf("substring=%s\n", sub_description.c_str()); + int i0, i1; + find_matching_parentheses(description, i0, i1); - parenthesis_index.reset(index_factory(d, sub_description.c_str(), metric)); + std::string sub_description = description.substr(i0 + 1, i1 - i0 - 1); + if (str_ends_with_digits(description.substr(0, i0), "IVF")) { + parenthesis_ivf = sub_description; + } else if (str_ends_with(description.substr(0, i0), "Refine")) { + parenthesis_refine = sub_description; + } else { + FAISS_THROW_MSG("don't know what to do with parenthesis index"); + } description = description.erase(i0, i1 - i0 + 1); - - // printf("new description=%s\n", description.c_str()); - } int64_t ncentroids = -1; @@ -162,12 +207,14 @@ Index *index_factory (int d, const char *description_in, MetricType metric) // coarse quantizers } else if (!coarse_quantizer && sscanf (tok, "IVF%" PRId64 "_HNSW%d", &ncentroids, &M) == 2) { - coarse_quantizer_1 = new IndexHNSWFlat (d, M); + coarse_quantizer_1 = new IndexHNSWFlat (d, M, metric); } else if (!coarse_quantizer && sscanf (tok, "IVF%" PRId64, &ncentroids) == 1) { - if (parenthesis_index) { - coarse_quantizer_1 = parenthesis_index.release(); + if (!parenthesis_ivf.empty()) { + coarse_quantizer_1 = + index_factory(d, parenthesis_ivf.c_str(), metric); + } else if (metric == METRIC_L2) { coarse_quantizer_1 = new IndexFlatL2 (d); } else { @@ -254,11 +301,13 @@ Index *index_factory (int d, const char *description_in, MetricType metric) index_ivf->own_fields = true; index_1 = index_ivf; } else if (!index && ( - sscanf (tok, "PQ%dx4fs_%d", &M, &bbs) == 2 || - (sscanf (tok, "PQ%dx4f%c", &M, &c) == 2 && c == 's') )) { + sscanf (tok, "PQ%dx4fs_%d", &M, &bbs) == 2 || + (sscanf (tok, "PQ%dx4f%c", &M, &c) == 2 && c == 's') || + (sscanf (tok, "PQ%dx4fs%c", &M, &c) == 2 && c == 'r'))) { if (bbs == -1) { bbs = 32; } + bool by_residual = str_ends_with(stok, "fsr"); if (coarse_quantizer) { IndexIVFPQFastScan *index_ivf = new IndexIVFPQFastScan( coarse_quantizer, d, ncentroids, M, 4, metric, bbs @@ -266,13 +315,14 @@ Index *index_factory (int d, const char *description_in, MetricType metric) index_ivf->quantizer_trains_alone = get_trains_alone (coarse_quantizer); index_ivf->metric_type = metric; + index_ivf->by_residual = by_residual; index_ivf->cp.spherical = metric == METRIC_INNER_PRODUCT; del_coarse_quantizer.release (); index_ivf->own_fields = true; index_1 = index_ivf; } else { IndexPQFastScan *index_pq = new IndexPQFastScan ( - d, M, nbit, metric, bbs + d, M, 4, metric, bbs ); index_1 = index_pq; } @@ -347,7 +397,12 @@ Index *index_factory (int d, const char *description_in, MetricType metric) FAISS_THROW_IF_NOT(!coarse_quantizer); index_1 = new IndexLattice(d, M, nbit, r2); } else if (stok == "RFlat") { - make_IndexRefineFlat = true; + parenthesis_refine = "Flat"; + } else if (stok == "Refine") { + FAISS_THROW_IF_NOT_MSG( + !parenthesis_refine.empty(), + "Refine index should be provided in parentheses" + ); } else { FAISS_THROW_FMT( "could not parse token \"%s\" in %s\n", tok, description_in); @@ -404,8 +459,10 @@ Index *index_factory (int d, const char *description_in, MetricType metric) index = index_pt; } - if (make_IndexRefineFlat) { - IndexRefineFlat *index_rf = new IndexRefineFlat (index); + if (!parenthesis_refine.empty()) { + Index *refine_index = index_factory(d_in, parenthesis_refine.c_str(), metric); + IndexRefine *index_rf = new IndexRefine(index, refine_index); + index_rf->own_refine_index = true; index_rf->own_fields = true; index = index_rf; } diff --git a/faiss/python/__init__.py b/faiss/python/__init__.py index 199cdb68b4..315871aaf1 100644 --- a/faiss/python/__init__.py +++ b/faiss/python/__init__.py @@ -494,7 +494,8 @@ def replacement_function(*args): add_ref_in_constructor(IndexHNSW, 0) add_ref_in_method(IndexShards, 'add_shard', 0) add_ref_in_method(IndexBinaryShards, 'add_shard', 0) -# add_ref_in_constructor(IndexRefineFlat, 0) +add_ref_in_constructor(IndexRefineFlat, {2:[0], 1:[0]}) +add_ref_in_constructor(IndexRefine, {2:[0, 1]}) add_ref_in_constructor(IndexBinaryIVF, 0) add_ref_in_constructor(IndexBinaryFromFloat, 0) @@ -510,24 +511,6 @@ def replacement_function(*args): # seems really marginal... # remove_ref_from_method(IndexReplicas, 'removeIndex', 0) -def handle_IndexRefineFlat(the_class): - - original_init = the_class.__init__ - - def replacement_init(self, *args): - if len(args) == 2: - index, xb = args - assert xb.shape == (index.ntotal, index.d) - xb = swig_ptr(xb) - args = (index, xb) - - original_init(self, *args) - self.referenced_objects = [args[0]] - - the_class.__init__ = replacement_init - -handle_IndexRefineFlat(IndexRefineFlat) - ########################################### # GPU functions ########################################### diff --git a/faiss/python/swigfaiss.swig b/faiss/python/swigfaiss.swig index f98a916ddc..3113a13612 100644 --- a/faiss/python/swigfaiss.swig +++ b/faiss/python/swigfaiss.swig @@ -88,6 +88,8 @@ typedef uint64_t size_t; #include #include #include +#include + #include #include @@ -356,6 +358,8 @@ void gpu_sync_all_devices() %include %include + +%newobject *::get_distance_computer() const; %include %include @@ -368,6 +372,7 @@ void gpu_sync_all_devices() %include %include %include +%include %include %include %include @@ -436,6 +441,18 @@ void gpu_sync_all_devices() %template(IndexIDMap2) faiss::IndexIDMap2Template; %template(IndexBinaryIDMap2) faiss::IndexIDMap2Template; + + +%ignore faiss::BufferList::Buffer; +%ignore faiss::RangeSearchPartialResult::QueryResult; +%ignore faiss::IDSelectorBatch::set; +%ignore faiss::IDSelectorBatch::bloom; +%ignore faiss::InterruptCallback::instance; +%ignore faiss::InterruptCallback::lock; + +%include + + #ifdef GPU_WRAPPER // quiet SWIG warnings @@ -513,6 +530,7 @@ void gpu_sync_all_devices() DOWNCAST ( IndexIVF ) DOWNCAST ( IndexFlat ) DOWNCAST ( IndexRefineFlat ) + DOWNCAST ( IndexRefine ) DOWNCAST ( IndexPQFastScan ) DOWNCAST ( IndexPQ ) DOWNCAST ( IndexScalarQuantizer ) @@ -907,15 +925,6 @@ void * cast_integer_to_void_ptr (long long x) { * Range search interface *******************************************************************/ -%ignore faiss::BufferList::Buffer; -%ignore faiss::RangeSearchPartialResult::QueryResult; -%ignore faiss::IDSelectorBatch::set; -%ignore faiss::IDSelectorBatch::bloom; - -%ignore faiss::InterruptCallback::instance; -%ignore faiss::InterruptCallback::lock; -%include - %inline %{ diff --git a/faiss/utils/quantize_lut.cpp b/faiss/utils/quantize_lut.cpp index 63fa97b698..64b2b52e39 100644 --- a/faiss/utils/quantize_lut.cpp +++ b/faiss/utils/quantize_lut.cpp @@ -42,7 +42,7 @@ float round_uint8_and_mul(float *tab, size_t n) { return multiplier; } - +// there can be NaNs in tables, they should be ignored float tab_min(const float *tab, size_t n) { float min = HUGE_VAL; for(int i = 0; i < n; i++) { @@ -185,6 +185,7 @@ void quantize_LUT_and_bias( round_tab(bias, nprobe, a, bias_min, biasq); } else if (biasq) { + // LUT is 3D std::vector mins(nprobe * M); std::vector bias2(nprobe); float bias_min = tab_min(bias, nprobe); diff --git a/tests/test_autotune.py b/tests/test_autotune.py new file mode 100644 index 0000000000..623e81b28d --- /dev/null +++ b/tests/test_autotune.py @@ -0,0 +1,47 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import unittest +import faiss + + +class TestParameterSpace(unittest.TestCase): + + def test_nprobe(self): + index = faiss.index_factory(32, "IVF32,Flat") + ps = faiss.ParameterSpace() + ps.set_index_parameter(index, "nprobe", 5) + self.assertEqual(index.nprobe, 5) + + def test_nprobe_2(self): + index = faiss.index_factory(32, "IDMap,IVF32,Flat") + ps = faiss.ParameterSpace() + ps.set_index_parameter(index, "nprobe", 5) + index2 = faiss.downcast_index(index.index) + self.assertEqual(index2.nprobe, 5) + + def test_nprobe_3(self): + index = faiss.index_factory(32, "IVF32,SQ8,RFlat") + ps = faiss.ParameterSpace() + ps.set_index_parameter(index, "nprobe", 5) + index2 = faiss.downcast_index(index.base_index) + self.assertEqual(index2.nprobe, 5) + + def test_nprobe_4(self): + index = faiss.index_factory(32, "PCAR32,IVF32,SQ8,RFlat") + ps = faiss.ParameterSpace() + + ps.set_index_parameter(index, "nprobe", 5) + index2 = faiss.downcast_index(index.base_index) + index2 = faiss.downcast_index(index2.index) + self.assertEqual(index2.nprobe, 5) + + def test_efSearch(self): + index = faiss.index_factory(32, "IVF32_HNSW32,SQ8") + ps = faiss.ParameterSpace() + ps.set_index_parameter(index, "quantizer_efSearch", 5) + index2 = faiss.downcast_index(index.quantizer) + self.assertEqual(index2.hnsw.efSearch, 5) diff --git a/tests/test_factory.py b/tests/test_factory.py index 33b6e2bebc..18b5d8a4b9 100644 --- a/tests/test_factory.py +++ b/tests/test_factory.py @@ -3,12 +3,13 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -from __future__ import absolute_import, division, print_function import numpy as np import unittest import faiss +from faiss.contrib import factory_tools + class TestFactory(unittest.TestCase): @@ -75,6 +76,7 @@ def test_factory_HNSW_newstyle(self): def test_factory_fast_scan(self): index = faiss.index_factory(56, "PQ28x4fs") self.assertEqual(index.bbs, 32) + self.assertEqual(index.pq.nbits, 4) index = faiss.index_factory(56, "PQ28x4fs_64") self.assertEqual(index.bbs, 64) index = faiss.index_factory(56, "IVF50,PQ28x4fs_64", faiss.METRIC_INNER_PRODUCT) @@ -89,6 +91,45 @@ def test_parenthesis(self): quantizer = faiss.downcast_index(index.quantizer) self.assertEqual(quantizer.pq.M, 25) + def test_parenthesis_2(self): + index = faiss.index_factory(50, "PCA30,IVF32(PQ15),Flat") + index_ivf = faiss.extract_index_ivf(index) + quantizer = faiss.downcast_index(index_ivf.quantizer) + self.assertEqual(quantizer.pq.M, 15) + self.assertEqual(quantizer.d, 30) + + def test_parenthesis_refine(self): + index = faiss.index_factory(50, "IVF32,Flat,Refine(PQ25x12)") + rf = faiss.downcast_index(index.refine_index) + self.assertEqual(rf.pq.M, 25) + self.assertEqual(rf.pq.nbits, 12) + + + def test_parenthesis_refine_2(self): + # Refine applies on the whole index including pre-transforms + index = faiss.index_factory(50, "PCA32,IVF32,Flat,Refine(PQ25x12)") + rf = faiss.downcast_index(index.refine_index) + self.assertEqual(rf.pq.M, 25) + + def test_nested_parenteses(self): + index = faiss.index_factory(50, "IVF1000(IVF20,SQ4,Refine(SQ8)),Flat") + q = faiss.downcast_index(index.quantizer) + qref = faiss.downcast_index(q.refine_index) + # check we can access the scalar quantizer + self.assertEqual(qref.sq.code_size, 50) + + def test_residual(self): + index = faiss.index_factory(50, "IVF1000,PQ25x4fsr") + self.assertTrue(index.by_residual) + +class TestCodeSize(unittest.TestCase): + + def test_1(self): + self.assertEqual( + factory_tools.get_code_size(50, "IVF32,Flat,Refine(PQ25x12)"), + 50 * 4 + (25 * 12 + 7) // 8 + ) + class TestCloneSize(unittest.TestCase): diff --git a/tests/test_index.py b/tests/test_index.py index d88a3da80a..79640ff5a3 100644 --- a/tests/test_index.py +++ b/tests/test_index.py @@ -388,7 +388,7 @@ def run_search_and_reconstruct(self, index, xb, xq, k=10, eps=None): R_ref = index.reconstruct_n(0, n) D, I, R = index.search_and_reconstruct(xq, k) - self.assertTrue((D == D_ref).all()) + np.testing.assert_almost_equal(D, D_ref, decimal=5) self.assertTrue((I == I_ref).all()) self.assertEqual(R.shape[:2], I.shape) self.assertEqual(R.shape[2], d) diff --git a/tests/test_index_accuracy.py b/tests/test_index_accuracy.py index 3effd1a082..59388d4124 100644 --- a/tests/test_index_accuracy.py +++ b/tests/test_index_accuracy.py @@ -303,7 +303,7 @@ def test_parallel_mode(self): index.nprobe = 4 # hopefully more robust than 1 Dref, Iref = index.search(xq, 10) - for pm in 1, 2: + for pm in 1, 2, 3: index.parallel_mode = pm Dnew, Inew = index.search(xq, 10) @@ -692,7 +692,6 @@ def do_test(self, metric): d = 32 xt, xb, xq = get_dataset_2(d, 2000, 1000, 200) index1 = faiss.index_factory(d, "PQ4x4np", metric) - Dref, Iref = faiss.knn(xq, xb, 10, metric) index1.train(xt) @@ -703,7 +702,10 @@ def do_test(self, metric): recall1 = (I1 == Iref[:, :1]).sum() # add refine index on top - index2 = faiss.IndexRefineFlat(index1, xb) + index_flat = faiss.IndexFlat(d, metric) + index_flat.add(xb) + + index2 = faiss.IndexRefine(index1, index_flat) index2.k_factor = 10.0 D2, I2 = index2.search(xq, 10) diff --git a/tests/test_refine.py b/tests/test_refine.py new file mode 100644 index 0000000000..8b0d9222f4 --- /dev/null +++ b/tests/test_refine.py @@ -0,0 +1,53 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import numpy as np +import unittest +import faiss + +from faiss.contrib import datasets + + +class TestDistanceComputer(unittest.TestCase): + + def do_test(self, factory_string, metric_type=faiss.METRIC_L2): + ds = datasets.SyntheticDataset(32, 1000, 200, 20) + + index = faiss.index_factory(32, factory_string, metric_type) + index.train(ds.get_train()) + index.add(ds.get_database()) + xq = ds.get_queries() + Dref, Iref = index.search(xq, 10) + dc = index.get_distance_computer() + self.assertTrue(dc.this.own()) + for q in range(ds.nq): + dc.set_query(faiss.swig_ptr(xq[q])) + for j in range(10): + ref_dis = Dref[q, j] + new_dis = dc(int(Iref[q, j])) + np.testing.assert_almost_equal( + new_dis, ref_dis, decimal=5) + + def test_distance_computer_PQ(self): + self.do_test("PQ8np") + + def test_distance_computer_SQ(self): + self.do_test("SQ8") + + def test_distance_computer_SQ6(self): + self.do_test("SQ6") + + def test_distance_computer_PQbit6(self): + self.do_test("PQ8x6np") + + def test_distance_computer_PQbit6_ip(self): + self.do_test("PQ8x6np", faiss.METRIC_INNER_PRODUCT) + + def test_distance_computer_VT(self): + self.do_test("PCA20,SQ8") + + +