Spaces:

AyoubZolodick
/

rag_lite

Paused

rag_lite / src /raglite /_query_adapter.py

EL GHAFRAOUI AYOUB

54f5afe about 1 year ago

8.87 kB

	"""Compute and update an optimal query adapter."""

	import numpy as np
	from sqlmodel import Session, col, select
	from tqdm.auto import tqdm

	from raglite._config import RAGLiteConfig
	from raglite._database import Chunk, ChunkEmbedding, Eval, IndexMetadata, create_database_engine
	from raglite._embed import embed_sentences
	from raglite._search import vector_search


	def update_query_adapter( # noqa: PLR0915, C901
	*,
	max_triplets: int = 4096,
	max_triplets_per_eval: int = 64,
	optimize_top_k: int = 40,
	config: RAGLiteConfig \| None = None,
	) -> None:
	"""Compute an optimal query adapter and update the database with it.

	This function computes an optimal linear transform A, called a 'query adapter', that is used to
	transform a query embedding q as A @ q before searching for the nearest neighbouring chunks in
	order to improve the quality of the search results.

	Given a set of triplets (qᵢ, pᵢ, nᵢ), we want to find the query adapter A that increases the
	score pᵢ'qᵢ of the positive chunk pᵢ and decreases the score nᵢ'qᵢ of the negative chunk nᵢ.

	If the nearest neighbour search uses the dot product as its relevance score, we can find the
	optimal query adapter by solving the following relaxed Procrustes optimisation problem with a
	bound on the Frobenius norm of A:

	A* = argmax Σᵢ pᵢ' (A qᵢ) - nᵢ' (A qᵢ)
	Σᵢ (pᵢ - nᵢ)' A qᵢ
	trace[ (P - N) A Q' ] where Q := [q₁'; ...; qₖ']
	P := [p₁'; ...; pₖ']
	N := [n₁'; ...; nₖ']
	trace[ Q' (P - N) A ]
	trace[ M A ] where M := Q' (P - N)
	s.t. \|\|A\|\|_F == 1
	= M' / \|\|M\|\|_F

	If the nearest neighbour search uses the cosine similarity as its relevance score, we can find
	the optimal query adapter by solving the following orthogonal Procrustes optimisation problem
	with an orthogonality constraint on A:

	A* = argmax Σᵢ pᵢ' (A qᵢ) - nᵢ' (A qᵢ)
	Σᵢ (pᵢ - nᵢ)' A qᵢ
	trace[ (P - N) A Q' ]
	trace[ Q' (P - N) A ]
	trace[ M A ]
	trace[ U Σ V' A ] where U Σ V' := M is the SVD of M
	trace[ Σ V' A U ]
	s.t. A'A == 𝕀
	= V U'

	Additionally, we want to limit the effect of A* so that it adjusts q just enough to invert
	incorrectly ordered (q, p, n) triplets, but not so much as to affect the correctly ordered ones.
	To achieve this, we'll rewrite M as α(M / s) + (1 - α)𝕀, where s scales M to the same norm as 𝕀,
	and choose the smallest α that ranks (q, p, n) correctly. If α = 0, the relevance score gap
	between an incorrect (p, n) pair would be B := (p - n)' q < 0. If α = 1, the relevance score gap
	would be A := (p - n)' (p - n) / \|\|p - n\|\| > 0. For a target relevance score gap of say
	C := 5% * A, the optimal α is then given by αA + (1 - α)B = C => α = (B - C) / (B - A).
	"""
	config = config or RAGLiteConfig()
	config_no_query_adapter = RAGLiteConfig(
	{config.__dict__, "vector_search_query_adapter": False}
	)
	engine = create_database_engine(config)
	with Session(engine) as session:
	# Get random evals from the database.
	chunk_embedding = session.exec(select(ChunkEmbedding).limit(1)).first()
	if chunk_embedding is None:
	error_message = "First run `insert_document()` to insert documents."
	raise ValueError(error_message)
	evals = session.exec(
	select(Eval).order_by(Eval.id).limit(max(8, max_triplets // max_triplets_per_eval))
	).all()
	if len(evals) * max_triplets_per_eval < len(chunk_embedding.embedding):
	error_message = "First run `insert_evals()` to generate sufficient evals."
	raise ValueError(error_message)
	# Loop over the evals to generate (q, p, n) triplets.
	Q = np.zeros((0, len(chunk_embedding.embedding))) # noqa: N806
	P = np.zeros_like(Q) # noqa: N806
	N = np.zeros_like(Q) # noqa: N806
	for eval_ in tqdm(
	evals, desc="Extracting triplets from evals", unit="eval", dynamic_ncols=True
	):
	# Embed the question.
	question_embedding = embed_sentences([eval_.question], config=config)
	# Retrieve chunks that would be used to answer the question.
	chunk_ids, _ = vector_search(
	question_embedding, num_results=optimize_top_k, config=config_no_query_adapter
	)
	retrieved_chunks = session.exec(select(Chunk).where(col(Chunk.id).in_(chunk_ids))).all()
	# Extract (q, p, n) triplets by comparing the retrieved chunks with the eval.
	num_triplets = 0
	for i, retrieved_chunk in enumerate(retrieved_chunks):
	# Select irrelevant chunks.
	if retrieved_chunk.id not in eval_.chunk_ids:
	# Look up all positive chunks (each represented by the mean of its multi-vector
	# embedding) that are ranked lower than this negative one (represented by the
	# embedding in the multi-vector embedding that best matches the query).
	p_mean = [
	np.mean(chunk.embedding_matrix, axis=0, keepdims=True)
	for chunk in retrieved_chunks[i + 1 :]
	if chunk is not None and chunk.id in eval_.chunk_ids
	]
	n_top = retrieved_chunk.embedding_matrix[
	np.argmax(retrieved_chunk.embedding_matrix @ question_embedding.T),
	np.newaxis,
	:,
	]
	# Filter out any (p, n, q) triplets for which the mean positive embedding ranks
	# higher than the top negative one.
	p_mean = [p_e for p_e in p_mean if (n_top - p_e) @ question_embedding.T > 0]
	if not p_mean:
	continue
	# Stack the (p, n, q) triplets.
	p = np.vstack(p_mean)
	n = np.repeat(n_top, p.shape[0], axis=0)
	q = np.repeat(question_embedding, p.shape[0], axis=0)
	num_triplets += p.shape[0]
	# Append the (query, positive, negative) tuples to the Q, P, N matrices.
	Q = np.vstack([Q, q]) # noqa: N806
	P = np.vstack([P, p]) # noqa: N806
	N = np.vstack([N, n]) # noqa: N806
	# Check if we have sufficient triplets for this eval.
	if num_triplets >= max_triplets_per_eval:
	break
	# Check if we have sufficient triplets to compute the query adapter.
	if Q.shape[0] > max_triplets:
	Q, P, N = Q[:max_triplets, :], P[:max_triplets, :], N[:max_triplets, :] # noqa: N806
	break
	# Normalise the rows of Q, P, N.
	Q /= np.linalg.norm(Q, axis=1, keepdims=True) # noqa: N806
	P /= np.linalg.norm(P, axis=1, keepdims=True) # noqa: N806
	N /= np.linalg.norm(N, axis=1, keepdims=True) # noqa: N806
	# Compute the optimal weighted query adapter A*.
	# TODO: Matmul in float16 is extremely slow compared to single or double precision, why?
	gap_before = np.sum((P - N) * Q, axis=1)
	gap_after = 2 * (1 - np.sum(P * N, axis=1)) / np.linalg.norm(P - N, axis=1)
	gap_target = 0.05 * gap_after
	α = (gap_before - gap_target) / (gap_before - gap_after) # noqa: PLC2401
	MT = (α[:, np.newaxis] * (P - N)).T @ Q # noqa: N806
	s = np.linalg.norm(MT, ord="fro") / np.sqrt(MT.shape[0])
	MT = np.mean(α) * (MT / s) + np.mean(1 - α) * np.eye(Q.shape[1]) # noqa: N806
	if config.vector_search_index_metric == "dot":
	# Use the relaxed Procrustes solution.
	A_star = MT / np.linalg.norm(MT, ord="fro") # noqa: N806
	elif config.vector_search_index_metric == "cosine":
	# Use the orthogonal Procrustes solution.
	U, _, VT = np.linalg.svd(MT, full_matrices=False) # noqa: N806
	A_star = U @ VT # noqa: N806
	else:
	error_message = f"Unsupported ANN metric: {config.vector_search_index_metric}"
	raise ValueError(error_message)
	# Store the optimal query adapter in the database.
	index_metadata = session.get(IndexMetadata, "default") or IndexMetadata(id="default")
	index_metadata.metadata_ = {**index_metadata.metadata_, "query_adapter": A_star}
	session.add(index_metadata)
	session.commit()