Source code for indexers.usearch_indexer

import os
import tempfile
from typing import Any, Dict, List, Tuple

import numpy as np
from usearch.index import Index

from .base import BaseIndexer



[docs]
class USearchIndexer(BaseIndexer):
    def __init__(self, dimension: int, dtype: str = "f32"):
        """
        dtype can be: "f32", "f16", "f64", "i8", "b1"
        """
        super().__init__(f"USearch-{dtype}", dimension)
        self.dtype = dtype.lower()
        self.index = Index(ndim=dimension, metric="cos", dtype=self.dtype)
        self.metadata = []
        self.temp_file = tempfile.NamedTemporaryFile(suffix=".usearch", delete=False)


[docs]
    def build_index(self, embeddings: List[List[float]], metadata: List[Dict[str, Any]]) -> None:
        if self.dtype == "i8":
            # Scale floats to i8 range [-128, 127]
            vectors = np.array(embeddings)
            vectors = (vectors * 127).astype(np.int8)
        elif self.dtype == "f16":
            vectors = np.array(embeddings).astype(np.float16)
        else:
            vectors = np.array(embeddings).astype(np.float32)

        ids = np.arange(len(vectors))
        self.index.add(ids, vectors)
        self.metadata = metadata
        self.index.save(self.temp_file.name)



[docs]
    def search(
        self, query_embedding: List[float], top_k: int = 5
    ) -> List[Tuple[Dict[str, Any], float]]:
        if self.dtype == "i8":
            query = (np.array(query_embedding) * 127).astype(np.int8)
        elif self.dtype == "f16":
            query = np.array(query_embedding).astype(np.float16)
        else:
            query = np.array(query_embedding).astype(np.float32)

        matches = self.index.search(query, top_k)

        results = []
        for match in matches:
            idx = int(match.key)
            dist = float(match.distance)
            results.append((self.metadata[idx], dist))
        return results



[docs]
    def get_size(self) -> int:
        if os.path.exists(self.temp_file.name):
            return os.path.getsize(self.temp_file.name)
        return 0



[docs]
    def cleanup(self) -> None:
        if os.path.exists(self.temp_file.name):
            os.remove(self.temp_file.name)