Hi all,
I am new to Pinecone and learning through out the way. I am creating a PDF reader application with LangChain and Pinecone. In one section of my code where I want to split the PDFs user upload into chunks and store them into Pinecone. But I only want to create a new embedding where user upload a new PDF. And I keep getting this error: AttributeError: ‘Index’ object has no attribute ‘exists’.
Can someone please help me. Thank you!
Here is the section of my code where handles this:
read PDF
if pdf is not None:
pdf_reader = PdfReader(pdf)
# split document into chunks
# also can use text split: good for PDFs that do not contains charts and visuals
sections = []
for page in pdf_reader.pages:
# Split the page text by paragraphs (assuming two newlines indicate a new paragraph)
page_sections = page.extract_text().split('\n\n')
sections.extend(page_sections)
chunks = sections
# st.write(chunks)
## embeddings
# Set up Pinecone
pinecone.init(api_key=pinecone_api_key, environment='gcp-starter')
index_name = 'langchainresearch'
if index_name not in pinecone.list_indexes():
pinecone.create_index(index_name, dimension=1536, metric="cosine") # Adjust the dimension as per your embeddings
index = pinecone.Index(index_name)
file_name = pdf.name[:-4]
# Check if embeddings are already stored in Pinecone
file_id = hash(file_name)
if index.exists(id=file_id):
# Fetch embeddings from Pinecone
VectorStore = index.fetch(ids=[file_id])[file_id]
st.write('Embeddings Loaded from Pinecone')
else:
# Compute embeddings
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
# Store embeddings in Pinecone
vectors = VectorStore.get_all_vectors()
index.upsert(vectors={(file_id, vectors)})
st.write('Embeddings Computation Completed and Stored in Pinecone')
# Create chat history
# Pinecone Setup for Chat History
chat_history_index_name = 'chat_history'
if chat_history_index_name not in pinecone.list_indexes():
pinecone.create_index(chat_history_index_name, dimension=1) # Dimension is 1 as we're not storing vectors here
chat_history_index = pinecone.Index(chat_history_index_name)
# Create or Load Chat History from Pinecone
if pdf:
# Check if chat history exists in Pinecone
if chat_history_index.exists(id=pdf.name):
# Fetch chat history from Pinecone
chat_history = chat_history_index.fetch(ids=[pdf.name])[pdf.name]
st.write('Chat History Loaded from Pinecone')
else:
# Initialize empty chat history
chat_history = []
10 posts - 3 participants