Source code for indexers.usearch_indexer

import os
import tempfile
from typing import Any, Dict, List, Tuple

import numpy as np
from usearch.index import Index

from .base import BaseIndexer


[docs] class USearchIndexer(BaseIndexer): def __init__(self, dimension: int, dtype: str = "f32"): """ dtype can be: "f32", "f16", "f64", "i8", "b1" """ super().__init__(f"USearch-{dtype}", dimension) self.dtype = dtype.lower() self.index = Index(ndim=dimension, metric="cos", dtype=self.dtype) self.metadata = [] self.temp_file = tempfile.NamedTemporaryFile(suffix=".usearch", delete=False)
[docs] def build_index(self, embeddings: List[List[float]], metadata: List[Dict[str, Any]]) -> None: if self.dtype == "i8": # Scale floats to i8 range [-128, 127] vectors = np.array(embeddings) vectors = (vectors * 127).astype(np.int8) elif self.dtype == "f16": vectors = np.array(embeddings).astype(np.float16) else: vectors = np.array(embeddings).astype(np.float32) ids = np.arange(len(vectors)) self.index.add(ids, vectors) self.metadata = metadata self.index.save(self.temp_file.name)
[docs] def search( self, query_embedding: List[float], top_k: int = 5 ) -> List[Tuple[Dict[str, Any], float]]: if self.dtype == "i8": query = (np.array(query_embedding) * 127).astype(np.int8) elif self.dtype == "f16": query = np.array(query_embedding).astype(np.float16) else: query = np.array(query_embedding).astype(np.float32) matches = self.index.search(query, top_k) results = [] for match in matches: idx = int(match.key) dist = float(match.distance) results.append((self.metadata[idx], dist)) return results
[docs] def get_size(self) -> int: if os.path.exists(self.temp_file.name): return os.path.getsize(self.temp_file.name) return 0
[docs] def cleanup(self) -> None: if os.path.exists(self.temp_file.name): os.remove(self.temp_file.name)