pgvector: Embeddings and vector similarity
Storing OpenAI embeddings in Postgres with pgvector
async function memoryToEmbedding(memory) {
const embedding = openai.createEmbedding({
model: 'text-embedding-ada-002',
input: memory,
});
return embedding;
}
const title = 'First post!'
const body = 'Hello world!'
// Generate a vector using OpenAI
const embeddingResponse = await openai.createEmbedding({
model: 'text-embedding-ada-002',
input: body,
})
const [{ embedding }] = embeddingResponse.data.data
// Store the vector in Postgres
const { data, error } = await supabase.from('posts').insert({
title,
body,
embedding,
})
Need to create match_documents
function: Database Functions | Supabase Docs
Storing OpenAI embeddings in Postgres with pgvector
create or replace function match_documents (
query_embedding vector(1536),
match_threshold float,
match_count int
)
returns table (
id bigint,
content text,
similarity float
)
language sql stable
as $$
select
documents.id,
documents.content,
1 - (documents.embedding <=> query_embedding) as similarity
from documents
where 1 - (documents.embedding <=> query_embedding) > match_threshold
order by similarity desc
limit match_count;
$$;
pgvector introduces 3 new operators that can be used to calculate similarity: -
<->
Euclidean distance -<#>
Negative inner product -<=>
Cosine distance
async function getRelevantMemories(queryString, limit = 5) {
// turn the queryString into an embedding
const embeddingResponse = await openai.createEmbedding({
model: 'text-embedding-ada-002',
input: queryString.toString(),
})
const [{ embedding }] = embeddingResponse.data.data
// query the database for the most relevant memories
const { data, error } = await supabase.rpc('match_documents', {
query_embedding: embedding,
match_threshold: 0.78,
match_count: limit
});
if (error) {
console.error("Error fetching relevant user memory:", error);
return null;
}
return data
}