So I need help pushing vectors into the db, currently something is wrong with the await function here, I am getting an error.
Pastebin link for my file that uploads to pinecode: import { Index, Pinecone, PineconeRecord, RecordMetadata,} from "@ - Pastebin.com
This is the error I’m getting right now:
Object literal may only specify known properties, and ‘vectors’ does not exist in type ‘PineconeRecord’.
Code if pastebin doesn’t work:
import {
Index,
Pinecone,
PineconeRecord,
RecordMetadata,
} from “@pinecone-database/pinecone”;
import { downloadFromS3 } from “./s3.server”;
import { PDFLoader } from “@langchain/community/document_loaders/fs/pdf”;
import {
Document,
RecursiveCharacterTextSplitter,
} from “@pinecone-database/doc-splitter”;
import { getEmbeddings } from “./embeddings”;
import { Vector } from “@pinecone-database/pinecone/dist/pinecone-generated-ts-fetch”;
import md5 from “md5”;
import { convertToAscii } from “./utils”;
let pinecone: Pinecone | null = null;
const api = process.env.PINECONE_API_KEY || “”;
export const getPineconeClient = () => {
if (!pinecone) {
pinecone = new Pinecone({
apiKey: api,
});
}
return pinecone;
};
type PDFPage = {
pageContent: string;
metadata: {
loc: { pageNumber: number };
};
};
export async function loadS3IntoPinecone(fileKey: string) {
try {
// 1. Obtain the PDF
console.log(“Downloading PDF from S3…”);
const file_name = await downloadFromS3(fileKey);
if (!file_name) {
throw new Error("File not found");
}
// 2. Download and read PDF
console.log("Reading PDF...");
const loader = new PDFLoader(file_name as string);
const pages = (await loader.load()) as PDFPage[];
// 3. Split and segment the PDF
console.log("Splitting PDF...");
const documents = await Promise.all(pages.map(prepareDocument));
// 4. Vectorize and embed individual docs
console.log("Embedding documents...");
const vectors = await Promise.all(documents.flat().map(embedDocument));
// 5. Upload to Pinecone
const client = await getPineconeClient();
const pineconeIndex = client.Index("teachtalk");
console.log("inserting vectors into pinecone");
const namespace = convertToAscii(fileKey);
// Push vectors to Pinecone index
await pineconeIndex.upsert({
vectors: vectors as PineconeRecord<RecordMetadata>[],
namespace: namespace,
});
console.log("Upload complete");
} catch (error) {
console.error(“Error in loadS3IntoPinecone”, error);
}
}
async function embedDocument(doc: Document): Promise {
try {
const embeddings = await getEmbeddings(doc.pageContent);
const hash = md5(doc.pageContent);
return {
id: hash,
values: embeddings,
metadata: {
text: doc.metadata.text,
pageNumber: doc.metadata.pageNumber,
},
} as PineconeRecord;
} catch (error) {
console.error(“Error in embedding document”, error);
throw error; // Ensure errors are propagated
}
}
// Converts to bytes then to a string
export const truncateStringByBytes = (str: string, bytes: number) => {
const enc = new TextEncoder();
return new TextDecoder(“utf-8”).decode(enc.encode(str).slice(0, bytes));
};
async function prepareDocument(page: PDFPage): Promise<Document> {
let { pageContent, metadata } = page;
// Replace empty line with space
pageContent = pageContent.replace(/\n/g, “”);
// Split the docs
const splitter = new RecursiveCharacterTextSplitter();
const docs = await splitter.splitDocuments([
new Document({
pageContent,
metadata: {
pageNumber: metadata.loc.pageNumber,
text: truncateStringByBytes(pageContent, 36000),
},
}),
]);
return docs;
}
1 post - 1 participant