How to use annoy - 10 common examples

To help you get started, we’ve selected a few annoy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github hdidx / hdidx / tests / eval_annoy.py View on Github external
def main(args):
    """ Main entry.
    """

    data = Dataset(args.dataset)
    f = data.base.shape[1]

    for ntrees in args.ntrees:
        t = AnnoyIndex(f)   # Length of item vector that will be indexed
        idxpath = os.path.join(args.exp_dir, 'sift_annoy_ntrees%d.idx' % ntrees)
        if not os.path.exists(idxpath):
            logging.info("Adding items ...")
            for i in xrange(data.nbae):
                t.add_item(i, data.base[i])
                if i % 100000 == 0:
                    logging.info("\t%d/%d" % (i, data.nbae))
            logging.info("\tDone!")
            logging.info("Building indexes ...")
            t.build(ntrees)
            logging.info("\tDone!")
            t.save(idxpath)
        else:
            logging.info("Loading indexes ...")
            t.load(idxpath)
            logging.info("\tDone!")
github beringresearch / ivis / tests / data / test_knn.py View on Github external
def test_build_sparse_annoy_index(annoy_index_file):
    data = np.random.choice([0, 1], size=(10, 5))
    sparse_data = csr_matrix(data)

    index = build_annoy_index(sparse_data, annoy_index_file)
    assert os.path.exists(annoy_index_file)

    loaded_index = AnnoyIndex(5, metric='angular')
    loaded_index.load(annoy_index_file)

    assert index.f == loaded_index.f == 5
    assert index.get_n_items() == loaded_index.get_n_items() == 10
    assert index.get_nns_by_item(0, 5) == loaded_index.get_nns_by_item(0, 5)

    index.unload()
    loaded_index.unload()
github cemoody / wizlang / backend.py View on Github external
trained = "/home/ubuntu/data" 
fnv = '%s/vectors.fullwiki.1000.s50.num.npy' % trained
ffb = '%s/freebase_types_and_fullwiki.1000.s50.words' % trained
fnw = '/home/ubuntu/code/wizlang/data/freebase.words'

if os.path.exists(fnw + '.pickle'):
    aw2i, ai2w = cPickle.load(open(fnw + '.pickle'))
else:
    aw2i, ai2w = veclib.get_words(fnw)
    cPickle.dump([aw2i, ai2w], open(fnw + '.pickle','w'))
print 'loaded word index'

if USE_ANNOY:
    import annoy
    annoy_index = annoy.AnnoyIndex(1000)
    annoy_index.load("/home/ubuntu/code/wizlang/data/freebase.tree")
    print 'loaded Annoy Index'
    avl = annoy_index
else:
    avl = veclib.get_vector_lib(fnv)
    #avl = veclib.normalize(avl)
    avl = veclib.split(veclib.normalize, avl)


frac = None
if frac:
    end = int(avl.shape[0] * frac)
    avl = avl[:end]
    for i in range(end, avl.shape):
        del aw2i[ai2w[i].pop()]
github benfred / implicit / implicit / approximate_als.py View on Github external
def fit(self, Ciu, show_progress=True):
        # delay loading the annoy library in case its not installed here
        import annoy

        # train the model
        super(AnnoyAlternatingLeastSquares, self).fit(Ciu, show_progress)

        # build up an Annoy Index with all the item_factors (for calculating
        # similar items)
        if self.approximate_similar_items:
            log.debug("Building annoy similar items index")

            self.similar_items_index = annoy.AnnoyIndex(
                self.item_factors.shape[1], 'angular')
            for i, row in enumerate(self.item_factors):
                self.similar_items_index.add_item(i, row)
            self.similar_items_index.build(self.n_trees)

        # build up a separate index for the inner product (for recommend
        # methods)
        if self.approximate_recommend:
            log.debug("Building annoy recommendation index")
            self.max_norm, extra = augment_inner_product_matrix(self.item_factors)
            self.recommend_index = annoy.AnnoyIndex(extra.shape[1], 'angular')
            for i, row in enumerate(extra):
                self.recommend_index.add_item(i, row)
            self.recommend_index.build(self.n_trees)
github deepinsight / insightface / recognition / triplet_image_iter.py View on Github external
net_out = self.mx_model.get_outputs()
        embedding = net_out[0].asnumpy()
        nembedding = sklearn.preprocessing.normalize(embedding)
        if _count
github mpkuse / cartwheel_train / predict_netvlad_syn.py View on Github external
#
# Load stored Weights
tensorflow_session = tf.Session()
tensorflow_saver = tf.train.Saver()
print tcolor.OKGREEN,'Restore model from : ', PARAM_MODEL, tcolor.ENDC
tensorflow_saver.restore( tensorflow_session, PARAM_MODEL )


#
# Load ANN Index
with open( PARAM_DB_PREFIX+'/vlad_word.pickle', 'r' ) as handle:
    print 'Read : ', PARAM_DB_PREFIX+'vlad_word.pickle'
    words_db = pickle.load( handle )

t_ann = AnnoyIndex( words_db.shape[1], metric='euclidean'  )

for i in range( words_db.shape[0] ):
    t_ann.add_item( i, words_db[i,:] )

print 'Rebuild ANN Index' #TODO: Figure out why t_ann.load() does not work
t_ann.build(10)



#
# Init Renderer
app = TrainRenderer(queue_warning=False)
while True:

    im = None
    while im==None:
github explosion / sense2vec / sense2vec / sense2vec.py View on Github external
path (unicode / Path): The path to load from.
        exclude (list): Names of serialization fields to exclude.
        RETURNS (Sense2Vec): The loaded object.
        """
        path = Path(path)
        strings_path = path / "strings.json"
        index_path = path / "index.ann"
        freqs_path = path / "freqs.json"
        self.vectors = Vectors().from_disk(path)
        self.cfg.update(srsly.read_json(path / "cfg"))
        if freqs_path.exists():
            self.freqs = dict(srsly.read_json(freqs_path))
        if "strings" not in exclude and strings_path.exists():
            self.strings = StringStore().from_disk(strings_path)
        if "index" not in exclude and index_path.exists():
            self.index = AnnoyIndex(self.vectors.shape[1], self.cfg["annoy_metric"])
            self.index.load(str(index_path))
        return self
github kendricktan / iffse / search_tree.py View on Github external
import h5py
import random

from facemaps.data.database import FacialEmbeddings

from annoy import AnnoyIndex
from config import CONFIG


def get_shortcode_from_facialembeddings_id(fe_id):
    return FacialEmbeddings.get(id=fe_id).op.shortcode


# Tree settings
annoy_settings = CONFIG['annoy_tree_settings']
tree = AnnoyIndex(128, metric=annoy_settings['metric'])
tree.load(CONFIG['annoy_tree'])

# Random seed index
seed_idx = random.randint(0, len(FacialEmbeddings.select()))

print(
    'Original search: https://www.instagram.com/p/{}/'.format(get_shortcode_from_facialembeddings_id(seed_idx))
)
print('---' * 10)
print('Similar faces:')
idxs = tree.get_nns_by_item(seed_idx, 32)
shortcodes_unique = []
for i in idxs[1:]:
    s_ = get_shortcode_from_facialembeddings_id(i)
    if s_ not in shortcodes_unique:
        shortcodes_unique.append(s_)
github RaRe-Technologies / gensim / gensim / similarities / index.py View on Github external
"""
        fname_dict = fname + '.d'
        if not (os.path.exists(fname) and os.path.exists(fname_dict)):
            raise IOError(
                "Can't find index files '%s' and '%s' - Unable to restore AnnoyIndexer state." % (fname, fname_dict)
            )
        else:
            try:
                from annoy import AnnoyIndex
            except ImportError:
                raise _NOANNOY

            with utils.open(fname_dict, 'rb') as f:
                d = _pickle.loads(f.read())
            self.num_trees = d['num_trees']
            self.index = AnnoyIndex(d['f'])
            self.index.load(fname)
            self.labels = d['labels']
github spacemanidol / MSMARCO / ConversationalSearch / generateArtificialSessions.py View on Github external
def generateAnnoy(real, artificial, annoyFilename, dimensions):
    idx2vec = np.array(artificial[2])
    t = AnnoyIndex(dimensions)
    for j in range(len(artificial[2])):
        t.add_item(j,idx2vec[j])
    print('Done Adding items to AnnoyIndex')
    t.build(TREESIZE)
    print('Done Building AnnoyIndex')
    t.save(annoyFilename)
    return t

annoy

Approximate Nearest Neighbors in C++/Python optimized for memory usage and loading/saving to disk.

Apache-2.0
Latest version published 1 year ago

Package Health Score

70 / 100
Full package analysis

Similar packages