Retrieval Augmented Generation using Llama Index
Part 1: Improving Fine-tuned Model using RAG¶

1. Download the PDF¶
PDFs are downloaded once and saved in "pdfs" folder.
To donwload from other url, uncomment the codes below
# import os
# import requests
# from bs4 import BeautifulSoup
# # URL of the page to scrape (your provided URL)
# url = 'https://www.emaanlibrary.com/book/tafseer-ibn-kathir-in-english-114-surahs-complete/?ebook-category=ruqya&latest=1'
# # Send HTTP request to get the page content
# response = requests.get(url)
# # Parse the HTML content with BeautifulSoup
# soup = BeautifulSoup(response.content, 'html.parser')
# # Find all <a> tags with href links ending in .pdf
# pdf_links = soup.find_all('a', href=True)
# pdf_urls = []
# # Loop through all links and filter out the ones that are PDFs
# for link in pdf_links:
# href = link['href']
# if href.endswith('.pdf'):
# pdf_urls.append(href if 'http' in href else f'https://www.emaanlibrary.com{href}')
# # Create a folder to store downloaded PDFs
# if not os.path.exists('pdfs'):
# os.makedirs('pdfs')
# # Download each PDF
# for pdf_url in pdf_urls:
# pdf_name = pdf_url.split("/")[-1] # Extract the filename from the URL
# pdf_path = os.path.join('pdfs', pdf_name)
# # Send request to download the PDF
# response = requests.get(pdf_url)
# # Write PDF content to a file
# with open(pdf_path, 'wb') as pdf_file:
# pdf_file.write(response.content)
# print(f'Downloaded: {pdf_name}')
2. Import Library¶
# !pip install llama-index
# !pip install llama-index-embeddings-huggingface
# !pip install peft
# !pip install auto-gptq
# !pip install optimum
# !pip install bitsandbytes
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import StorageContext, load_index_from_storage
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor
import torch
from transformers import AutoTokenizer
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
torch.cuda.is_available()
True
3. Define Llama Index Settings and Vector Database¶
The Settings is a bundle of commonly used resources used during the indexing and querying stage in a LlamaIndex workflow/application.
https://docs.llamaindex.ai/en/stable/module_guides/supporting_modules/settings/
We initially tried Settings.chunk_size = 120 and Settings.chunk_overlap = 20. However, this process lead to different number of token accross different documents.
We then tried token based chunking. https://docs.llamaindex.ai/en/stable/module_guides/loading/node_parsers/
import any embedding model on HF hub (https://huggingface.co/spaces/mteb/leaderboard)
# import any embedding model on HF hub (https://huggingface.co/spaces/mteb/leaderboard)
# 384 dimension
# Settings.embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"") # alternative model
# ---- SETUP ----
# Embedding model config
model_name = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
Settings.embed_model = HuggingFaceEmbedding(
model_name=model_name,
# show_progress_bar=True
)
Settings.llm = None
# Tokenizer
embed_tokenizer = AutoTokenizer.from_pretrained(model_name)
Settings.chunk_size = 128
Settings.chunk_overlap = 32
LLM is explicitly disabled. Using MockLLM.
4. Read Preprocess data¶
SimpleDirectoryReader is the simplest way to load data from local files into LlamaIndex. For production use cases it's more likely that you'll want to use one of the many Readers available on LlamaHub, but SimpleDirectoryReader is a great way to get started. https://docs.llamaindex.ai/en/stable/module_guides/loading/simpledirectoryreader/
tqdm is used to tract progress
Sample Page of Tafsir¶
Sample page of a Hadith¶
From the above we can see that the 114 tafsir dosuments we used have less words per page compared to the words count of two Hadith documents we are going to embed.
4.1 Load documents using SimpleDirectoryReader¶
We fill first embed the documents using chunk size of 256 with 32 overlapping. These numbers are chosen after several trials and errors.
from tqdm import tqdm
from time import perf_counter as timer
start_time = timer()
reader = SimpleDirectoryReader(
input_dir = "pdfs",
required_exts=[".pdf"],
# exclude=["en_Sahih_Al-Bukhari.pdf", "en_Sahih_Muslim.pdf"],
recursive = False)
documents = []
# for docs in tqdm(reader.iter_data()):
for docs in reader.iter_data():
documents.extend(docs)
end_time = timer()
print(f"[INFO] Time taken: {end_time-start_time:.5f} seconds.")
[INFO] Time taken: 515.89639 seconds.
4.2 Use sentencizer¶
A better way to chunk documents is to create sentences using Natural language and later join them together to get the context. This process ensures that during chunking the sentences aren’t separated—they're all jumbled together.
from spacy.lang.en import English # see https://spacy.io/usage for install instructions
nlp = English()
# Add a sentencizer pipeline, see https://spacy.io/api/sentencizer/
nlp.add_pipe("sentencizer")
<spacy.pipeline.sentencizer.Sentencizer at 0x7fd17deeba50>
4.3 Extract relevant info from loaded documents¶
We will create chunk and extract information from each chunk.
docs = []
# for ii in tqdm(range(len(documents))):
for ii in range(len(documents)):
file_name = documents[ii].metadata['file_name']
page = documents[ii].metadata['page_label']
content = documents[ii].text.replace('\n', '').replace(' ', ' ').replace('\'', '').replace('...', '.')
sentences = [str(sent) for sent in list(nlp(content).sents)]
page_characters = len(content)
page_tokens = len(embed_tokenizer.encode(content, truncation=False))
docs.append(
{"file_name": file_name,
"page": page,
"content":content,
"sentences":sentences,
"chunk_characters": page_characters,
"chunk_tokens": page_tokens}
)
Token indices sequence length is longer than the specified maximum sequence length for this model (532 > 512). Running this sequence through the model will result in indexing errors
import random
random.sample(docs, 1)
[{'file_name': '004Nisa.pdf', 'page': '68', 'content': ' 68 Prohibiting the Daughter-in-Law for Marriage Allah said, . .وَ The wives of your sons who (spring) from your own loins, Therefore, you are prohibited to marry the wives of your own sons, but not the wives of your adopted sons, as adoption was common practice in Jahiliyyah . Allah said, 2782 2802 So when Zayd had accomplished his desire from her (i.e. divorced her), We gave her to you in marriage, so that (in future) there may be no difficulty to the believers in respect of (the marriage of) the wives of their adopted sons when the latter have no desire to keep them (i.e. they had divorced them). (33:37) Ibn Jurayj said, "I asked Ata about Allahs statement, وَ ﺹْ The wives of your sons who (spring) from your own loins), He said, `We were told that when the Prophet married the ex-wife of Zayd (who was the Prophets adopted son before Islam prohibited this practice), the idolators in Makkah criticized him. Allah sent down the Ayat : وَ The wives of your sons who (spring) from your own loins, ', 'sentences': [' 68 Prohibiting the Daughter-in-Law for Marriage Allah said, .', '.وَ The wives of your sons who (spring) from your own loins, Therefore, you are prohibited to marry the wives of your own sons, but not the wives of your adopted sons, as adoption was common practice in Jahiliyyah .', 'Allah said, 2782 2802 So when Zayd had accomplished his desire from her (i.e. divorced her), We gave her to you in marriage, so that (in future) there may be no difficulty to the believers in respect of (the marriage of) the wives of their adopted sons when the latter have no desire to keep them (i.e. they had divorced them). (', '33:37) Ibn Jurayj said, "I asked Ata about Allahs statement, وَ ﺹْ The wives of your sons who (spring) from your own loins), He said, `We were told that when the Prophet married the ex-wife of Zayd (who was the Prophets adopted son before Islam prohibited this practice), the idolators in Makkah criticized him.', 'Allah sent down the Ayat : وَ The wives of your sons who (spring) from your own loins,'], 'chunk_characters': 1009, 'chunk_tokens': 289}]
4.4 Create Pandas dataframe¶
import pandas as pd
import numpy as np
df = pd.DataFrame(docs)
df.tail()
file_name | page | content | sentences | chunk_characters | chunk_tokens | |
---|---|---|---|---|---|---|
12050 | en_Sahih_Muslim.pdf | 1796 | SAHIH MUSLIM BOOK 43: THE BOOK OF COMMENTARY (... | [SAHIH MUSLIM BOOK 43: THE BOOK OF COMMENTARY ... | 1954 | 538 |
12051 | en_Sahih_Muslim.pdf | 1797 | SAHIH MUSLIM BOOK 43: THE BOOK OF COMMENTARY (... | [SAHIH MUSLIM BOOK 43: THE BOOK OF COMMENTARY ... | 1637 | 453 |
12052 | en_Sahih_Muslim.pdf | 1798 | SAHIH MUSLIM BOOK 43: THE BOOK OF COMMENTARY (... | [SAHIH MUSLIM BOOK 43: THE BOOK OF COMMENTARY ... | 1762 | 454 |
12053 | en_Sahih_Muslim.pdf | 1799 | SAHIH MUSLIM BOOK 43: THE BOOK OF COMMENTARY (... | [SAHIH MUSLIM BOOK 43: THE BOOK OF COMMENTARY ... | 2221 | 612 |
12054 | en_Sahih_Muslim.pdf | 1800 | SAHIH MUSLIM BOOK 43: THE BOOK OF COMMENTARY (... | [SAHIH MUSLIM BOOK 43: THE BOOK OF COMMENTARY ... | 731 | 220 |
df.describe()
chunk_characters | chunk_tokens | |
---|---|---|
count | 12055.000000 | 12055.000000 |
mean | 1349.954791 | 370.211530 |
std | 663.509350 | 179.260177 |
min | 26.000000 | 12.000000 |
25% | 902.000000 | 250.000000 |
50% | 1096.000000 | 297.000000 |
75% | 1706.000000 | 471.000000 |
max | 4002.000000 | 1109.000000 |
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 6))
plt.plot(df.index, df["chunk_tokens"], color='k', label="Token Count per Chunk")
plt.axhline(df['chunk_tokens'].mean(), color='red', linestyle='--', label='Mean Token Count')
plt.xlabel("Chunk Index")
plt.ylabel("Number of Tokens per Chunk")
plt.legend()
plt.grid(False)
plt.show()
Note¶
We can see that the token size in these last documents are higher than the LLM model can handle. Therefore, we will procee the data using manual sentence chunking.
We created sentences in our dataframe so that we can use manual chunking procedure.
5. Manual Chunking of Documents¶
We created sentences in our dataframe so that we can use manual chunking procedure.
import re
def create_chunk(docs, slice_size=10):
docs_and_chunks = []
# for item in tqdm(docs, desc="Creating Chunks"):
for item in docs:
sentence_chunks = [item["sentences"][i:i + slice_size]
for i in range(0, len(item["sentences"]), slice_size)]
item["sentence_chunks"] = str(sentence_chunks)
item["num_chunks"] = len(sentence_chunks)
for sentence_chunk in sentence_chunks:
chunk_text = re.sub(r'\.([A-Z])', r'. \1', " ".join(sentence_chunk).strip())
page_tokens = len(embed_tokenizer.encode(chunk_text, truncation=False))
chunk_dict = {
"document_name": item["file_name"],
"page_number": item["page"],
"sentence_chunk": chunk_text,
"chunk_char_count": len(chunk_text),
"chunk_word_count": len(chunk_text.split()),
"chunk_token_count": page_tokens
}
docs_and_chunks.append(chunk_dict)
return docs_and_chunks
chunked_docs = create_chunk(docs, slice_size=10)
random.sample(chunked_docs, 1)
[{'document_name': '059Hashr.pdf', 'page_number': '49', 'sentence_chunk': '49 And indeed, there are stones out of which rivers gush forth, and indeed, there are of them (stones) which split asunder so that water flows from them, and indeed, there are of them which fall down for fear of Allah. ( 2:74) Glorifying Allah the Exalted by mentioning His Names and Attributes Allah the Exalted said, هُ afii64060 He is Allah, beside Whom La ilaha illa Huwa , the All-Knower of the unseen and the seen. He is the Most Gracious, the Most Merciful. Allah states that He Alone is worthy of worship, there is no Lord or God for the existence, except Him. All that is being worshipped instead of Allah are false deities. Allah is the All-Knower in the unseen and the seen, He knows all that pertains to the creations that we see, and those we cannot see. Nothing in heaven or on earth ever escapes His knowledge, no matter how great or insignificant, big or small, including ants in darkness. Allahs statement, هُ He is the Most Gracious, the Most Merciful). was duly explained before at the very beginning of this Tafsir, so it is not necessary to repeat it here, and it asserts that Allah is the Owner of the wide encompassing mercy that entails all of His creation. He is Ar-Rahman and Ar-Rahim of this life and the Hereafter.', 'chunk_char_count': 1243, 'chunk_word_count': 226, 'chunk_token_count': 332}]
chunked_df = pd.DataFrame(chunked_docs)
chunked_df.tail()
document_name | page_number | sentence_chunk | chunk_char_count | chunk_word_count | chunk_token_count | |
---|---|---|---|---|---|---|
19142 | en_Sahih_Muslim.pdf | 1796 | 16), there was a gap of four years.1796 / 1800 | 46 | 10 | 16 |
19143 | en_Sahih_Muslim.pdf | 1797 | SAHIH MUSLIM BOOK 43: THE BOOK OF COMMENTARY (... | 1640 | 279 | 452 |
19144 | en_Sahih_Muslim.pdf | 1798 | SAHIH MUSLIM BOOK 43: THE BOOK OF COMMENTARY (... | 1765 | 309 | 455 |
19145 | en_Sahih_Muslim.pdf | 1799 | SAHIH MUSLIM BOOK 43: THE BOOK OF COMMENTARY (... | 2224 | 393 | 610 |
19146 | en_Sahih_Muslim.pdf | 1800 | SAHIH MUSLIM BOOK 43: THE BOOK OF COMMENTARY (... | 732 | 129 | 220 |
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 6))
plt.plot(chunked_df.index, chunked_df["chunk_token_count"], color='k', label="Token Count per Chunk")
plt.axhline(chunked_df['chunk_token_count'].mean(), color='red', linestyle='--', label='Mean Token Count')
plt.xlabel("Chunk Index")
plt.ylabel("Number of Tokens per Chunk")
plt.legend()
plt.grid(False)
plt.show()
chunked_df.describe()
chunk_char_count | chunk_word_count | chunk_token_count | |
---|---|---|---|
count | 19147.000000 | 19147.000000 | 19147.000000 |
mean | 849.320468 | 156.005588 | 233.877892 |
std | 477.952229 | 86.271716 | 128.194973 |
min | 0.000000 | 0.000000 | 2.000000 |
25% | 547.000000 | 102.000000 | 156.000000 |
50% | 875.000000 | 161.000000 | 242.000000 |
75% | 1107.000000 | 203.000000 | 300.000000 |
max | 2896.000000 | 527.000000 | 858.000000 |
5.1 Data filtration¶
Some of the chunked has very minimal token. We fill remove those from dataset
min_token_df = chunked_df[chunked_df["chunk_token_count"] < 7]
print(len(min_token_df))
min_token_df
245
document_name | page_number | sentence_chunk | chunk_char_count | chunk_word_count | chunk_token_count | |
---|---|---|---|---|---|---|
17 | 001Fateh.pdf | 11 | 0 | 0 | 2 | |
74 | 001Fateh.pdf | 51 | When | 4 | 1 | 3 |
139 | 002BaqarahI.pdf | 30 | 0 | 0 | 2 | |
161 | 002BaqarahI.pdf | 44 | Do | 2 | 1 | 3 |
271 | 002BaqarahI.pdf | 125 | 0 | 0 | 2 | |
... | ... | ... | ... | ... | ... | ... |
16724 | en_Sahih_Muslim.pdf | 515 | 515 / 1800 | 10 | 3 | 6 |
17258 | en_Sahih_Muslim.pdf | 802 | 802 / 1800 | 10 | 3 | 6 |
17457 | en_Sahih_Muslim.pdf | 903 | 903 / 1800 | 10 | 3 | 6 |
18367 | en_Sahih_Muslim.pdf | 1368 | 1368 / 1800 | 11 | 3 | 6 |
18825 | en_Sahih_Muslim.pdf | 1626 | 1626 / 1800 | 11 | 3 | 6 |
245 rows × 6 columns
import pandas as pd
def filter_min_tokens(df, min_tokens=7):
return df[df["chunk_token_count"] >= min_tokens].reset_index(drop=True)
filtered_df = filter_min_tokens(chunked_df, min_tokens= 7)
filtered_df.head()
document_name | page_number | sentence_chunk | chunk_char_count | chunk_word_count | chunk_token_count | |
---|---|---|---|---|---|---|
0 | 001Fateh.pdf | 1 | Revealed in Makkah The Meaning of Al-Fatehah a... | 641 | 116 | 188 |
1 | 001Fateh.pdf | 2 | 2 The prayer (i.e., Al-Fatihah ) is divided in... | 1014 | 176 | 288 |
2 | 001Fateh.pdf | 2 | According to the majority of the reciters of A... | 281 | 49 | 72 |
3 | 001Fateh.pdf | 3 | 3 The Number of Words and Letters in Al-Fatih... | 1107 | 197 | 300 |
4 | 001Fateh.pdf | 4 | 4 Also, Abu Jafar, Muhammad bin Jarir At-Tabar... | 941 | 176 | 253 |
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 6))
plt.plot(filtered_df.index, filtered_df["chunk_token_count"], color='k', label="Token Count per Chunk")
plt.axhline(filtered_df['chunk_token_count'].mean(), color='red', linestyle='--', label='Mean Token Count')
plt.xlabel("Chunk Index")
plt.ylabel("Number of Tokens per Chunk")
plt.legend()
plt.grid(False)
plt.show()
5.2 Saving the chunked dataframe for re-use¶
# Save the DataFrame
filtered_df.to_csv('./pdfs/filtered_df.csv', index=False)
6. Vector Store Index¶
An Index is a data structure that allows us to quickly retrieve relevant context for a user query. For LlamaIndex, it's the core foundation for retrieval-augmented generation (RAG) use-cases. Vector Stores are a key component of retrieval-augmented generation (RAG) and so you will end up using them in nearly every application you make using LlamaIndex, either directly or indirectly. https://docs.llamaindex.ai/en/stable/module_guides/storing/vector_stores/
# Loading saved dataframe
filtered_df = pd.read_csv("./pdfs/filtered_df.csv")
filtered_df.head()
6.1 Creating LlamaIndex Document¶
from llama_index.core.schema import Document
import os
print(os.getcwd())
filtered_documents = [
Document(
text=row["sentence_chunk"],
metadata={
"document_name": row["document_name"],
"page_number": row["page_number"],
"chunk_char_count": row["chunk_char_count"],
"chunk_word_count": row["chunk_word_count"],
"chunk_token_count": row["chunk_token_count"]
}
)
for _, row in filtered_df.iterrows()
]
filtered_documents[:1]
6.2 Store data in faiss Store Index¶
https://docs.llamaindex.ai/en/stable/examples/vector_stores/FaissIndexDemo/
import faiss
# dimensions of text-ada-embedding-002
# https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2
d = 768
faiss_index = faiss.IndexFlatL2(d)
from llama_index.core import (
SimpleDirectoryReader,
load_index_from_storage,
VectorStoreIndex,
StorageContext,
)
from llama_index.vector_stores.faiss import FaissVectorStore
from IPython.display import Markdown, display
vector_store = FaissVectorStore(faiss_index=faiss_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
filtered_documents, storage_context=storage_context
)
# save index to disk
index.storage_context.persist(persist_dir="pdfs/faiss_db")
7. Reload Saved Documents¶
import faiss
# Reload the same embedding model
model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
Settings.embed_model = HuggingFaceEmbedding(model_name=model_name)
# load index from disk
vector_store = FaissVectorStore.from_persist_dir("pdfs/faiss_db")
storage_context = StorageContext.from_defaults(
vector_store=vector_store, persist_dir="pdfs/faiss_db"
)
index = load_index_from_storage(storage_context=storage_context)
7.1 Set Up Search Function¶
# set number of docs to retreive
top_k = 5
# configure retriever
retriever = VectorIndexRetriever(
index=index,
similarity_top_k=top_k,
)
# assemble query engine
query_engine = RetrieverQueryEngine(
retriever=retriever,
node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.4)],
)
7.2 Example: Retrieve Relevant Docs¶
manual_questions = ["What is the minimum amount of wealth a person need to own to pay Jakat",
"When did battle of badr occured?",
"Who will enter the paradise first?"
]
query_list = manual_questions
# query documents
query = query_list[0]
response = query_engine.query(query)
# reformat response
context = "Context:\n"
top_k = min(top_k, len(response.source_nodes)) # avoid index overflow
for i in range(top_k):
node = response.source_nodes[i].node # Get the actual node object
text = node.text
metadata = node.metadata # This is a dictionary
context += f"--- Source {i+1} ---\n"
context += f"Text: {text}\n"
context += f"Metadata: {metadata}\n\n"
print(context)
Context: --- Source 1 --- Text: The minimum standard of surplus wealth over which zakat is charged is known as Nisab. It differs with different kinds of property, the most im -portant being 200 dirhams or 521 tolas (nearly 21 oz.) In case of silver, and 20 mithqals or 71 tolas (nearly 3 oz.) in case of gold. Metadata: {'document_name': 'en_Sahih_Muslim.pdf', 'page_number': 541, 'chunk_char_count': 1933, 'chunk_word_count': 350, 'chunk_token_count': 553} --- Source 2 --- Text: SAHIH MUSLIM BOOK 05: THE BOOK OF ZAKAT (KITAB AL-ZAKAT)thus a payment on the accumulated wealth. Leaving aside animals and agricultural yield, zakat is paid at almost a uniform rate of two and a half %. The minimum standard of surplus wealth over which zakat is charged is known as Nisab. Metadata: {'document_name': 'en_Sahih_Muslim.pdf', 'page_number': 541, 'chunk_char_count': 1933, 'chunk_word_count': 350, 'chunk_token_count': 553} --- Source 3 --- Text: The people of Jahiliyyah used to give the males, but not the females, a share in the inheritance. Therefore, Allah commands that both males and females take a share in the inheritance, although the portion of the males is twice as much as that of the females. There is a distinction because men need money to spend on their dependants, Metadata: {'document_name': '004Nisa.pdf', 'page_number': 30, 'chunk_char_count': 335, 'chunk_word_count': 60, 'chunk_token_count': 86} --- Source 4 --- Text: See how much I am in debt to others." When the debt was checked, it amounted to approximately eighty-six thousand. Umar said, "If the property of Umars family covers the debt, then pay the debt thereof; otherwise request it from Bani Adi bin Kab, and if that too is not sufficient, ask for it from Quraish tribe, and do not ask for it from any one else, and pay this debt on my behalf." Metadata: {'document_name': 'en_Sahih_Al-Bukhari.pdf', 'page_number': 867, 'chunk_char_count': 1045, 'chunk_word_count': 193, 'chunk_token_count': 275} --- Source 5 --- Text: one Jadha is to be paid; and if the number is between seventy-six to ninety (camels), two Bint Labuns are to be paid; and if they are from ninety-one to one-hundred-and twenty (camels), two Hiqqas are to be paid; and if they are over one-hundred and-twenty (camels), Metadata: {'document_name': 'en_Sahih_Al-Bukhari.pdf', 'page_number': 335, 'chunk_char_count': 2827, 'chunk_word_count': 512, 'chunk_token_count': 858}
8. Import LLM for generating answer¶
Now we will use a Large Language Model to generate answer using user query with/without context retrieved from our Vector Store.
# Run the following to empty the VRAM:
import gc
import torch
# del model, tokenizer, pipe
# Flush memory
gc.collect()
torch.cuda.empty_cache()
# load fine-tuned model from hub
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
# model_name = "TheBloke/Mistral-7B-Instruct-v0.2-GPTQ"
model_name = "Qwen/Qwen2.5-3B-Instruct"
model = AutoModelForCausalLM.from_pretrained(model_name,
device_map="auto",
trust_remote_code=False,
revision="main")
# load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token
Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.
Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]
8.1 Check the model size and memory requirements¶
def get_model_num_params(model: torch.nn.Module):
return sum([param.numel() for param in model.parameters()])
get_model_num_params(model)
3085938688
def get_model_mem_size(model: torch.nn.Module):
"""
Get how much memory a PyTorch model takes up.
See: https://discuss.pytorch.org/t/gpu-memory-that-model-uses/56822
"""
# Get model parameters and buffer sizes
mem_params = sum([param.nelement() * param.element_size() for param in model.parameters()])
mem_buffers = sum([buf.nelement() * buf.element_size() for buf in model.buffers()])
# Calculate various model sizes
model_mem_bytes = mem_params + mem_buffers # in bytes
model_mem_mb = model_mem_bytes / (1024**2) # in megabytes
model_mem_gb = model_mem_bytes / (1024**3) # in gigabytes
return {"model_mem_bytes": model_mem_bytes,
"model_mem_mb": round(model_mem_mb, 2),
"model_mem_gb": round(model_mem_gb, 2)}
get_model_mem_size(model)
{'model_mem_bytes': 12343755008, 'model_mem_mb': 11771.92, 'model_mem_gb': 11.5}
9. Text Generation using LLM¶
# List of queries
manual_questions = ["Who are the people bound to pay Jakat?",
"When did battle of badr occured?",
"Name the person who will enter the paradise first?",
]
chat_gpt = [
"What rights does Islam grant to women regarding property and inheritance?",
"Are women allowed to seek education in Islam?",
"Can women work and earn a living in Islam?",
"Do women have the right to choose their spouse in Islam?",
"What is the Islamic stance on domestic violence?",
"Are men and women equal in spiritual matters in Islam?",
"Do women have the right to participate in public and political life in Islam?",
"What does Islam say about women's dress code?",
"Can women lead prayers in Islam?",
"How does Islam address the issue of gender equality?"
]
query_list = manual_questions + chat_gpt
9.1 Text generation without context¶
model.eval()
# Choose query index 4 (number --5) from the query list
query = query_list[4]
# Created a prompt template
base_prompt = f"""
Provide a concise answer based on the Quran, tafsir, and hadith.
Do not include the reasoning, just the answer. Make sure the answer is factually correct based on islamic
faith.
\n{query}
"""
# Create prompt template for instruction-tuned model
dialogue_template = [
{"role": "user",
"content": base_prompt}
]
# Apply the chat template
prompt = tokenizer.apply_chat_template(conversation=dialogue_template,
tokenize=False,
add_generation_prompt=True)
input_ids = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(
**input_ids,
do_sample=True,
temperature = 0.7,
max_new_tokens = 256)
# Turn the output tokens into text
output_text = tokenizer.decode(outputs[0])
print(output_text)
<|im_start|>system You are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|> <|im_start|>user Provide a concise answer based on the Quran, tafsir, and hadith. Do not include the reasoning, just the answer. Make sure the answer is factually correct based on islamic faith. Are women allowed to seek education in Islam? <|im_end|> <|im_start|>assistant Yes, women are allowed to seek education in Islam.<|im_end|>
9.2 Text generation with context (RAG)¶
# query documents
query = query_list[4]
response = query_engine.query(query)
# container for all document info
doc_info = []
top_k = min(top_k, len(response.source_nodes))
for i in range(top_k):
node = response.source_nodes[i].node
text = node.text
metadata = node.metadata
file_name = metadata.get('document_name', f'doc_{i}').replace('.pdf', '')
page = str(metadata.get('page_number', 'unknown'))
score = round(response.source_nodes[i].score, 4)
doc_info.append({
"score": score,
"content": text.strip(),
"page": page,
"file_name": file_name
})
# sort by ascending similarity score
doc_info = sorted(doc_info, key=lambda x: x["score"], reverse=True)
# now extract individual fields
scores = [str(item["score"]) for item in doc_info]
contents = [item["content"] for item in doc_info]
pages = [item["page"] for item in doc_info]
file_names = [item["file_name"] for item in doc_info]
# final context dictionary
context = {
"scores": ', '.join(scores),
"content": "\n".join(contents),
"page": ', '.join(pages),
"file_name": ', '.join(file_names)
}
# print
print(context)
{'scores': '0.6944, 0.6922, 0.6839, 0.6538, 0.6398', 'content': 'Then Allah revealed: وَ And tell the believing women to lower their gaze.). And Allah says: . وَ And tell the believing women to lower their gaze, meaning, from that which Allah has forbidden them to look at, apart from their husbands. ( Some ) scholars said that it is permissible for women to look at non-Mahram men without desire, as it was recorded in the Sahih that the Messenger of Allah was\nis suitable here, after Allah mentioned these prohibitions. وَ 2821 وَا 4:25 And whoever of you have not the means wherewith to wed free believing women, they may wed believing girls from among those whom your right hands possess, and Allah has full knowledge about your faith, you are one from another.\nAllah knows best. Abu Jafar bin Jarir (At-Tabari) said, after mentioning that there is Ijma that marrying women from the People of the Scripture is allowed, "Umar disliked this practice so that the Muslims do not refrain from marrying Muslim women, or for similar reasons.\nThis is prohibited for all women, but more so in the case of the women of Ahl Adh-Dhimmah , because there is nothing to prevent them from doing that, but Muslim women know that it is unlawful and so, would be deterred from doing it. The Messenger of Allah said: 62760 No woman should describe another woman to her husband so that it is as if he is looking at her.\nIt also indicates that it is permissible to divorce a woman before consummating the marriage with her. اﻥ believing women), this refers to what is usually the case, although there is no difference between a believing (Muslim) woman and a woman of the People of the Book in this regard, according to scholarly consensus. Ibn Abbas, may Allah be pleased with him, Sa`id bin Al-Musayyib,', 'page': '69, 75, 138, 73, 110', 'file_name': '024Nur, 004Nisa, 002BaqarahIi, 024Nur, 033Ahzab'}
model.eval()
query = query_list[4]
base_prompt = f"""
Please see and following context given below.
\n{context['content']}
Answer the following query based on Quran, tafsir and hadith of Islamic faith.
Don't return the thinking, only return the answer.
Make sure your answers are as concise as possible and factually correct based on islamic faith.
\n{query}
"""
# Create prompt template for instruction-tuned model
dialogue_template = [
{"role": "user",
"content": base_prompt}
]
# Apply the chat template
prompt = tokenizer.apply_chat_template(conversation=dialogue_template,
tokenize=False,
add_generation_prompt=True)
input_ids = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(
**input_ids,
do_sample=True,
temperature = 0.7,
max_new_tokens = 256)
# Turn the output tokens into text
output_text = tokenizer.decode(outputs[0])
print(output_text)
<|im_start|>system You are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|> <|im_start|>user Please see and following context given below. Then Allah revealed: وَ And tell the believing women to lower their gaze.). And Allah says: . وَ And tell the believing women to lower their gaze, meaning, from that which Allah has forbidden them to look at, apart from their husbands. ( Some ) scholars said that it is permissible for women to look at non-Mahram men without desire, as it was recorded in the Sahih that the Messenger of Allah was is suitable here, after Allah mentioned these prohibitions. وَ 2821 وَا 4:25 And whoever of you have not the means wherewith to wed free believing women, they may wed believing girls from among those whom your right hands possess, and Allah has full knowledge about your faith, you are one from another. Allah knows best. Abu Jafar bin Jarir (At-Tabari) said, after mentioning that there is Ijma that marrying women from the People of the Scripture is allowed, "Umar disliked this practice so that the Muslims do not refrain from marrying Muslim women, or for similar reasons. This is prohibited for all women, but more so in the case of the women of Ahl Adh-Dhimmah , because there is nothing to prevent them from doing that, but Muslim women know that it is unlawful and so, would be deterred from doing it. The Messenger of Allah said: 62760 No woman should describe another woman to her husband so that it is as if he is looking at her. It also indicates that it is permissible to divorce a woman before consummating the marriage with her. اﻥ believing women), this refers to what is usually the case, although there is no difference between a believing (Muslim) woman and a woman of the People of the Book in this regard, according to scholarly consensus. Ibn Abbas, may Allah be pleased with him, Sa`id bin Al-Musayyib, Answer the following query based on Quran, tafsir and hadith of Islamic faith. Don't return the thinking, only return the answer. Make sure your answers are as concise as possible and factually correct based on islamic faith. Are women allowed to seek education in Islam? <|im_end|> <|im_start|>assistant Yes, women are allowed to seek education in Islam.<|im_end|>
Part 2: Evaluation of RAG¶
Well, our LLM and Retrieval system is working. Next step is to evaluate the results.
see this blog about LaaJ: https://arize.com/blog-course/llm-as-a-judge/¶
How can LLM-as-a-Judge be used for RAG Applications?
Contextual relevance and faithfulness are two of the most widely-used metrics for assessing the accuracy and relevance of retrieved files of documents when leveraging LLM RAG.
1. Evaluation of LLM results without RAG¶
# List of queries
manual_questions = ["Who are the people bound to pay Jakat?",
"When did battle of badr occured?",
"Name the person who will enter the paradise first?",
]
chat_gpt = [
"What rights does Islam grant to women regarding property and inheritance?",
"Are women allowed to seek education in Islam?",
"Can women work and earn a living in Islam?",
"Do women have the right to choose their spouse in Islam?",
"What is the Islamic stance on domestic violence?",
"Are men and women equal in spiritual matters in Islam?",
"Do women have the right to participate in public and political life in Islam?",
"What does Islam say about women's dress code?",
"Can women lead prayers in Islam?",
"How does Islam address the issue of gender equality?"
]
query_list = manual_questions + chat_gpt
## Generate a dataframe for all the queries from query_list
# A dataframe to store query, answer, hyperparameters, and Rating
eval_df = []
# Hyperparameters
temperature = 0.6
max_new_tokens = 256
for ii in range(len(query_list)):
query = query_list[ii]
base_prompt = f"""
Provide a concise answer based on the Quran, tafsir, and hadith.
Do not include the reasoning, just the answer. Make sure the answer is factually correct based on islamic
faith.
\n{query}
"""
# Create prompt template for instruction-tuned model
dialogue_template = [
{"role": "user",
"content": base_prompt}
]
# Apply the chat template
prompt = tokenizer.apply_chat_template(conversation=dialogue_template,
tokenize=False,
add_generation_prompt=True)
# Tokenize the prompt
input_ids = tokenizer(prompt, return_tensors="pt").to("cuda")
prompt_len = input_ids['input_ids'].shape[-1] # Length of the prompt
outputs = model.generate(
**input_ids,
do_sample=True,
temperature = temperature,
max_new_tokens = max_new_tokens)
# Extract only the generated tokens (ignoring the context and prompt part)
generated_tokens = outputs[0][prompt_len:]
# Decode the generated response
output_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
eval_df.append({
"query": query,
"answer": output_text.replace('<|im_end|>', '').replace('<|im_start|>', ''),
"temperature": temperature,
"max_new_tokens":max_new_tokens
})
eval_df = pd.DataFrame(eval_df)
eval_df
query | answer | temperature | max_new_tokens | |
---|---|---|---|---|
0 | Who are the people bound to pay Jakat? | The people bound to pay Zakat are those who po... | 0.6 | 256 |
1 | When did battle of badr occured? | The Battle of Badr occurred in March 624 CE. | 0.6 | 256 |
2 | Name the person who will enter the paradise fi... | The person who will enter Paradise first is Ma... | 0.6 | 256 |
3 | What rights does Islam grant to women regardin... | Islam grants women rights in property and inhe... | 0.6 | 256 |
4 | Are women allowed to seek education in Islam? | Yes, women are allowed to seek education in Is... | 0.6 | 256 |
5 | Can women work and earn a living in Islam? | Yes, women can work and earn a living in Islam. | 0.6 | 256 |
6 | Do women have the right to choose their spouse... | Yes, women have the right to choose their spou... | 0.6 | 256 |
7 | What is the Islamic stance on domestic violence? | The Islamic stance is that domestic violence i... | 0.6 | 256 |
8 | Are men and women equal in spiritual matters i... | In Islam, men and women are equal spiritually,... | 0.6 | 256 |
9 | Do women have the right to participate in publ... | Yes, according to Islamic teachings as interpr... | 0.6 | 256 |
10 | What does Islam say about women's dress code? | Islam encourages modesty in dress, but specifi... | 0.6 | 256 |
11 | Can women lead prayers in Islam? | No, women are not permitted to lead prayers in... | 0.6 | 256 |
12 | How does Islam address the issue of gender equ... | Islam addresses the issue of gender equality t... | 0.6 | 256 |
import pandas as pd
# Show all rows
pd.set_option('display.max_rows', None)
# Show full width of each column
pd.set_option('display.max_colwidth', None)
# Now print the selected columns
# print(eval_df[['query', 'answer']])
pd.reset_option('display.max_rows')
pd.reset_option('display.max_colwidth')
### Evaluation using Deepseek
2. Evaluation of LLM results with RAG¶
# List of queries
manual_questions = ["Which sahaba did Prophet Muhammad (ﷺ) help to become free from being a slave by planting 300+ date palm trees?",
"When did battle of badr occured?",
"Name the person who will enter the paradise first?",
]
chat_gpt = [
"What rights does Islam grant to women regarding property and inheritance?",
"Are women allowed to seek education in Islam?",
"Can women work and earn a living in Islam?",
"Do women have the right to choose their spouse in Islam?",
"What is the Islamic stance on domestic violence?",
"Are men and women equal in spiritual matters in Islam?",
"Do women have the right to participate in public and political life in Islam?",
"What does Islam say about women's dress code?",
"Can women lead prayers in Islam?",
"How does Islam address the issue of gender equality?"
]
query_list = manual_questions + chat_gpt
## Generate a dataframe for all the queries from query_list
# A dataframe to store query, answer, hyperparameters, and Rating
eval_df = []
# Hyperparameters
temperature = 0.6
max_new_tokens = 256
top_k = 5
for ii in range(len(query_list)):
query = query_list[ii]
response = query_engine.query(query)
# container for all document info
doc_info = []
top_k = min(top_k, len(response.source_nodes))
for i in range(top_k):
node = response.source_nodes[i].node
text = node.text
metadata = node.metadata
file_name = metadata.get('document_name', f'doc_{i}').replace('.pdf', '')
page = str(metadata.get('page_number', 'unknown'))
score = round(response.source_nodes[i].score, 4)
doc_info.append({
"score": score,
"content": text.strip(),
"page": page,
"file_name": file_name
})
# sort by ascending similarity score
doc_info = sorted(doc_info, key=lambda x: x["score"], reverse=True)
# now extract individual fields
scores = [str(item["score"]) for item in doc_info]
contents = [item["content"] for item in doc_info]
pages = [item["page"] for item in doc_info]
file_names = [item["file_name"] for item in doc_info]
# final context dictionary
context = {
"scores": ', '.join(scores),
"content": "\n".join(contents),
"page": ', '.join(pages),
"file_name": ', '.join(file_names)
}
base_prompt = f"""
You are an expert in Islamic knowledge, specializing in the Quran, authentic Hadith, classical Tafsir,
and Islamic history. Answer the question supported from the context below. Be concise, accurate, and factual.
Avoid speculation, reasoning, or unsupported generalities.
Focus on clarity, authenticity, and relevance and be concise in answering.
Context:
{context['content']}
Question:
{query}
Answer:
"""
# Create prompt template for instruction-tuned model
dialogue_template = [
{"role": "user",
"content": base_prompt}
]
# Apply the chat template
prompt = tokenizer.apply_chat_template(conversation=dialogue_template,
tokenize=False,
add_generation_prompt=True)
# Tokenize the prompt
input_ids = tokenizer(prompt, return_tensors="pt").to("cuda")
prompt_len = input_ids['input_ids'].shape[-1] # Length of the prompt
outputs = model.generate(
**input_ids,
do_sample=True,
temperature = temperature,
max_new_tokens = max_new_tokens)
# Extract only the generated tokens (ignoring the context and prompt part)
generated_tokens = outputs[0][prompt_len:]
# Decode the generated response
output_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
eval_df.append({
"query": query,
"answer": output_text.replace('<|im_end|>', '').replace('<|im_start|>', ''),
"temperature": temperature,
"max_new_tokens":max_new_tokens,
"context_items": context['content'],
"scores":context['scores'],
"document": context['file_name'],
"page_number": context['page']
})
eval_df = pd.DataFrame(eval_df)
# eval_df
import pandas as pd
# Show all rows
pd.set_option('display.max_rows', None)
# Show full width of each column
pd.set_option('display.max_colwidth', None)
# Now print the selected columns
# print(eval_df[['query', 'answer']])
pd.reset_option('display.max_rows')
pd.reset_option('display.max_colwidth')
Evaluation results with chatgpt3.5¶
Comments
Post a Comment