dataset = []
with open('cat-facts.txt', 'r') as file:
  dataset = file.readlines()
  print(f'Loaded {len(dataset)} entries')

dataset[0:5]  # Display the first 5 entries

Loaded 151 entries

['On average, cats spend 2/3 of every day sleeping. That means a nine-year-old cat has been awake for only three years of its life.\n',
 'Unlike dogs, cats do not have a sweet tooth. Scientists believe this is due to a mutation in a key taste receptor.\n',
 'When a cat chases its prey, it keeps its head level. Dogs and humans bob their heads up and down.\n',
 'The technical term for a cat’s hairball is a “bezoar.”\n',
 'A group of cats is called a “clowder.”\n']

import ollama

EMBEDDING_MODEL = 'hf.co/CompendiumLabs/bge-base-en-v1.5-gguf'
LANGUAGE_MODEL = 'hf.co/bartowski/Llama-3.2-1B-Instruct-GGUF'

# Each element in the VECTOR_DB will be a tuple (chunk, embedding)
VECTOR_DB = []

def add_chunk_to_database(chunk):
  embedding = ollama.embed(model=EMBEDDING_MODEL, input=chunk)['embeddings'][0]
  VECTOR_DB.append((chunk, embedding))

# Let us check if the vectorization has already been done
try:
  with open('vector_db.txt', 'r') as file:
    for line in file:
      chunk, embedding_str = line.strip().split('\t')
      embedding = list(map(float, embedding_str.split(',')))
      VECTOR_DB.append((chunk, embedding))
  print(f'Loaded {len(VECTOR_DB)} entries from vector_db.txt')
  vectorized_dataset_loaded = True
except FileNotFoundError:
  print('vector_db.txt not found, proceeding to vectorize the dataset')
  for i, chunk in enumerate(dataset):
    add_chunk_to_database(chunk)
    print(f'Added chunk {i+1}/{len(dataset)} to the database')
  vectorized_dataset_loaded = False
    
  print(f'Added {len(VECTOR_DB)} chunks to the database')

Loaded 151 entries from vector_db.txt

print(f"The vectors in this list of tuples are {(type(VECTOR_DB[0][1]))}")

The vectors in this list of tuples are <class 'list'>

# Save the vector database to a txt file
if not vectorized_dataset_loaded:
  print('Saving the vector database to vector_db.txt')
  with open('vector_db.txt', 'w') as file:
    for chunk, embedding in VECTOR_DB:
      file.write(f"{chunk.strip()}\t{','.join(map(str, embedding))}\n")

from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def cosine_sim_sklearn_vecs(a, b):
    """
    Calculate cosine similarity between two vectors using sklearn.
    """
    a = np.array(a).reshape(1, -1)
    b = np.array(b).reshape(1, -1)
    return cosine_similarity(a, b)[0, 0]

def retrieve(query, top_n=3):
    # Calculate the embedding for the query
    query_emb = ollama.embed(model=EMBEDDING_MODEL, input=query)['embeddings'][0]
    # Calculate cosine similarity for each chunk in the vector database
    similarities = [(chunk, cosine_sim_sklearn_vecs(query_emb, emb)) for chunk, emb in VECTOR_DB]
    # Sort the similarities in descending order
    similarities.sort(key=lambda x: x[1], reverse=True)
    return similarities[:top_n]

# input_query = input('Ask me a question: ')
input_query = "I'm George. I have a pet called Muezza. What is my favorite type of cat?"
print(f'Input query: {input_query}')
retrieved_knowledge = retrieve(input_query)

print('Retrieved knowledge:')
for chunk, similarity in retrieved_knowledge:
    print(f' - (similarity: {similarity:.2f}) {chunk}')

instruction_prompt = (
    "You are a helpful chatbot.\n"
    "Use only the following pieces of context to answer the question. "
    "Don't make up any new information:\n"
    + "\n".join([f' - {chunk}' for chunk, _ in retrieved_knowledge])
)

Input query: I'm George. I have a pet called Muezza. What is my favorite type of cat?
Retrieved knowledge:
 - (similarity: 0.79) Mohammed loved cats and reportedly his favorite cat, Muezza, was a tabby. Legend says that tabby cats have an “M” for Mohammed on top of their heads because Mohammad would often rest his hand on the cat’s head.
 - (similarity: 0.76) If you name is George, you are more likely to have parrots as pets. However, nonetheless, your favorite type of cat is probably going to be a persian cat.
 - (similarity: 0.67) The most popular pedigreed cat is the Persian cat, followed by the Main Coon cat and the Siamese cat.
Retrieved knowledge:
 - (similarity: 0.79) Mohammed loved cats and reportedly his favorite cat, Muezza, was a tabby. Legend says that tabby cats have an “M” for Mohammed on top of their heads because Mohammad would often rest his hand on the cat’s head.
 - (similarity: 0.76) If you name is George, you are more likely to have parrots as pets. However, nonetheless, your favorite type of cat is probably going to be a persian cat.
 - (similarity: 0.67) The most popular pedigreed cat is the Persian cat, followed by the Main Coon cat and the Siamese cat.

stream = ollama.chat(
  model=LANGUAGE_MODEL,
  messages=[
    {'role': 'system', 'content': instruction_prompt},
    {'role': 'user', 'content': input_query},
  ],
  stream=True,
)

response = ""
for chunk in stream:
    response += chunk['message']['content']
print("Chatbot response:")
print(response)

Chatbot response:
You are correct that your name is George, not Mohammed.

Since you mentioned that Muezza is a tabby cat, it's likely that your favorite type of cat is a Persian cat. That's in line with the popular pedigreed cats mentioned: Persians, Main Coon cats, and Siamese cats.

from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Load a cross-encoder reranker model (e.g., 'cross-encoder/ms-marco-MiniLM-L-6-v2')
# Actually, 'cross-encoder/ms-marco-MiniLM-L-6-v2' specifically does not work on M2 machines,
# so we use 'cross-encoder/ms-marco-MiniLM-L-12-v2 (took a while to find out this information)
reranker_model_name = 'cross-encoder/ms-marco-MiniLM-L-12-v2'
reranker_tokenizer = AutoTokenizer.from_pretrained(reranker_model_name)
reranker_model = AutoModelForSequenceClassification.from_pretrained(reranker_model_name)

def rerank(query, retrieved_chunks, top_k=3):
    pairs = [(query, chunk) for chunk, _ in retrieved_chunks]
    inputs = reranker_tokenizer.batch_encode_plus(pairs, padding=True, truncation=True, return_tensors='pt')
    with torch.no_grad():
        scores = reranker_model(**inputs).logits.squeeze(-1).tolist()
    reranked = sorted(zip([chunk for chunk, _ in retrieved_chunks], scores), key=lambda x: x[1], reverse=True)
    return reranked[:top_k]

# Retrieve and rerank
input_query = "I'm George. I have a pet called Muezza. What is my favorite type of cat?"
retrieved_knowledge = retrieve(input_query, top_n=5)  # Retrieve more chunks to rerank
reranked_knowledge = rerank(input_query, retrieved_knowledge)

print('Reranked knowledge:')
for chunk, score in reranked_knowledge:
    print(f' - (score: {score:.2f}) {chunk}')

instruction_prompt = (
    "You are a helpful chatbot.\n"
    "Use only the following pieces of context to answer the question. "
    "Don't make up any new information:\n"
    + "\n".join([f' - {chunk}' for chunk, _ in reranked_knowledge])
)

Reranked knowledge:
 - (score: 5.13) If you name is George, you are more likely to have parrots as pets. However, nonetheless, your favorite type of cat is probably going to be a persian cat.
 - (score: 4.26) Mohammed loved cats and reportedly his favorite cat, Muezza, was a tabby. Legend says that tabby cats have an “M” for Mohammed on top of their heads because Mohammad would often rest his hand on the cat’s head.
 - (score: -5.20) The most popular pedigreed cat is the Persian cat, followed by the Main Coon cat and the Siamese cat.

stream = ollama.chat(
  model=LANGUAGE_MODEL,
  messages=[
    {'role': 'system', 'content': instruction_prompt},
    {'role': 'user', 'content': input_query},
  ],
  stream=True,
)

response = ""
for chunk in stream:
    response += chunk['message']['content']
print("Chatbot response:")
print(response)

Chatbot response:
As George, your favorite type of cat is probably a Persian cat! That's what legend says, anyway...

RAG Demo

Demo of implementing RAG¶

Implementing vector database¶

Information Retrival¶

Improving Retrieval with Reranking¶