In [1]:
Copied!
desc = """
### Question Answering with Retrieval
Chain that answers questions with embeedding based retrieval. [[Code](https://github.com/srush/MiniChain/blob/main/examples/qa.py)]
(Adapted from [OpenAI Notebook](https://github.com/openai/openai-cookbook/blob/main/examples/Question_answering_using_embeddings.ipynb).)
"""
desc = """
### Question Answering with Retrieval
Chain that answers questions with embeedding based retrieval. [[Code](https://github.com/srush/MiniChain/blob/main/examples/qa.py)]
(Adapted from [OpenAI Notebook](https://github.com/openai/openai-cookbook/blob/main/examples/Question_answering_using_embeddings.ipynb).)
"""
$
In [2]:
Copied!
import datasets
import numpy as np
from minichain import prompt, show, OpenAIEmbed, OpenAI
from manifest import Manifest
import datasets
import numpy as np
from minichain import prompt, show, OpenAIEmbed, OpenAI
from manifest import Manifest
We use Hugging Face Datasets as the database by assigning a FAISS index.
In [3]:
Copied!
olympics = datasets.load_from_disk("olympics.data")
olympics.add_faiss_index("embeddings")
olympics = datasets.load_from_disk("olympics.data")
olympics.add_faiss_index("embeddings")
Out[3]:
Dataset({ features: ['title', 'heading', 'content', 'tokens', 'embeddings'], num_rows: 3964 })
Fast KNN retieval prompt
In [4]:
Copied!
@prompt(OpenAIEmbed())
def get_neighbors(model, inp, k):
embedding = model(inp)
res = olympics.get_nearest_examples("embeddings", np.array(embedding), k)
return res.examples["content"]
@prompt(OpenAIEmbed())
def get_neighbors(model, inp, k):
embedding = model(inp)
res = olympics.get_nearest_examples("embeddings", np.array(embedding), k)
return res.examples["content"]
In [5]:
Copied!
@prompt(OpenAI(),
template_file="qa.pmpt.tpl")
def get_result(model, query, neighbors):
return model(dict(question=query, docs=neighbors))
@prompt(OpenAI(),
template_file="qa.pmpt.tpl")
def get_result(model, query, neighbors):
return model(dict(question=query, docs=neighbors))
In [6]:
Copied!
def qa(query):
n = get_neighbors(query, 3)
return get_result(query, n)
def qa(query):
n = get_neighbors(query, 3)
return get_result(query, n)
$
In [7]:
Copied!
questions = ["Who won the 2020 Summer Olympics men's high jump?",
"Why was the 2020 Summer Olympics originally postponed?",
"In the 2020 Summer Olympics, how many gold medals did the country which won the most medals win?",
"What is the total number of medals won by France?",
"What is the tallest mountain in the world?"]
questions = ["Who won the 2020 Summer Olympics men's high jump?",
"Why was the 2020 Summer Olympics originally postponed?",
"In the 2020 Summer Olympics, how many gold medals did the country which won the most medals win?",
"What is the total number of medals won by France?",
"What is the tallest mountain in the world?"]
In [8]:
Copied!
gradio = show(qa,
examples=questions,
subprompts=[get_neighbors, get_result],
description=desc,
code=open("qa.py", "r").read().split("$")[1].strip().strip("#").strip(),
)
if __name__ == "__main__":
gradio.launch()
gradio = show(qa,
examples=questions,
subprompts=[get_neighbors, get_result],
description=desc,
code=open("qa.py", "r").read().split("$")[1].strip().strip("#").strip(),
)
if __name__ == "__main__":
gradio.launch()
Running on local URL: http://127.0.0.1:7861 To create a public link, set `share=True` in `launch()`.
In [ ]:
Copied!
show_log("qa.log")