pgvector: Embeddings and vector similarity
Storing OpenAI embeddings in Postgres with pgvector
async function memoryToEmbedding ( memory ) {
const embedding = openai . createEmbedding ({
model : 'text-embedding-ada-002' ,
input : memory ,
}) ;
return embedding ;
}
const title = 'First post!'
const body = 'Hello world!'
// Generate a vector using OpenAI
const embeddingResponse = await openai . createEmbedding ({
model : 'text-embedding-ada-002' ,
input : body ,
})
const [{ embedding }] = embeddingResponse . data . data
// Store the vector in Postgres
const { data , error } = await supabase . from ( 'posts' ) . insert ({
title ,
body ,
embedding ,
})
Need to create match_documents
function: Database Functions | Supabase Docs
Storing OpenAI embeddings in Postgres with pgvector
create or replace function match_documents (
query_embedding vector( 1536 ),
match_threshold float ,
match_count int
)
returns table (
id bigint ,
content text ,
similarity float
)
language sql stable
as $$
select
documents . id ,
documents . content ,
1 - ( documents . embedding <=> query_embedding) as similarity
from documents
where 1 - ( documents . embedding <=> query_embedding) > match_threshold
order by similarity desc
limit match_count;
$$;
pgvector introduces 3 new operators that can be used to calculate similarity:
- <->
Euclidean distance
- <#>
Negative inner product
- <=>
Cosine distance
async function getRelevantMemories ( queryString , limit = 5 ) {
// turn the queryString into an embedding
const embeddingResponse = await openai . createEmbedding ({
model : 'text-embedding-ada-002' ,
input : queryString . toString () ,
})
const [{ embedding }] = embeddingResponse . data . data
// query the database for the most relevant memories
const { data , error } = await supabase . rpc ( 'match_documents' , {
query_embedding : embedding ,
match_threshold : 0.78 ,
match_count : limit
}) ;
if (error) {
console . error ( "Error fetching relevant user memory:" , error) ;
return null ;
}
return data
}
GitHub - ejfox/coachartie_discord