llama gpt knowledgebot
1.0.0
This project implements a question-answering system that leverages pre-trained models and semantic similarity search to provide informative responses based on a given context.
app = Flask(__name__) # Creates a Flask application instance.
To generate text embeddings for both user input and the corpus, we use a pre-trained Sentence Transformer model:
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(corpus) # Generates embeddings for each document in the corpus
d = len(embeddings[0]) # Dimensionality of the embedding vectors.
nlist = 10 # Number of neighbors to consider during search.
newindex = faiss.IndexFlatL2(d) # Creates a FAISS index optimized for efficient similarity search.
newindex.train(embeddings) # Trains the index on the generated embeddings.
newindex.add(embeddings) # Adds the corpus embeddings to the index.
gptj = gpt4all.GPT4All("llama-2-7b-chat.ggmlv3.q4_0.bin") # Loads the GPT-4 model for text generation.
xq = model.encode([user_input]) # Generates an embedding for the user's input.
k = 1 # Specifies the number of nearest neighbors to retrieve.
D, I = newindex.search(xq, k) # Searches the FAISS index for the nearest neighbor (most similar document) to the user input.
most_similar_document = corpus[I[0][0]] # Extracts the most relevant document from the corpus based on the search results.
context = " ".join(most_similar_document) # Concatenates the retrieved document content into a single string.
question = user_input # Stores the user's input as the question.
input_text = f"Context: {context}nnQuestion: {question}nnAnswer:" # Prepares the input text for GPT-4, combining context and question with placeholders.
max_tokens = 100 # Sets a maximum token limit for GPT-4 input (adjustable).
answer = gptj.generate(input_text) # Generates the response text using the GPT-4 model with the prepared context and question.
def generate_response_threaded(user_input):
response_thread = threading.Thread(target=generate_response, args=(user_input,))
response_thread.start()
response_thread.join()
if __name__ == "__main__":
app.run(debug=True) # Starts the Flask application in debug mode, allowing for automatic code reloading during development.