diff --git a/packages/components/nodes/agents/CSVAgent/CSVAgent.ts b/packages/components/nodes/agents/CSVAgent/CSVAgent.ts index e9f2c726..80581682 100644 --- a/packages/components/nodes/agents/CSVAgent/CSVAgent.ts +++ b/packages/components/nodes/agents/CSVAgent/CSVAgent.ts @@ -7,6 +7,9 @@ import { getBaseClasses } from '../../../src/utils' import { LoadPyodide, finalSystemPrompt, systemPrompt } from './core' import { checkInputs, Moderation } from '../../moderation/Moderation' import { formatResponse } from '../../outputparsers/OutputParserHelpers' +import path from 'path' +import { getStoragePath } from '../../../src' +import fs from 'fs' class CSV_Agents implements INode { label: string @@ -88,19 +91,34 @@ class CSV_Agents implements INode { const callbacks = await additionalCallbacks(nodeData, options) let files: string[] = [] - - if (csvFileBase64.startsWith('[') && csvFileBase64.endsWith(']')) { - files = JSON.parse(csvFileBase64) - } else { - files = [csvFileBase64] - } - let base64String = '' - for (const file of files) { - const splitDataURI = file.split(',') - splitDataURI.pop() - base64String += splitDataURI.pop() ?? '' + if (csvFileBase64.startsWith('FILE-STORAGE::')) { + const fileName = csvFileBase64.replace('FILE-STORAGE::', '') + if (fileName.startsWith('[') && fileName.endsWith(']')) { + files = JSON.parse(fileName) + } else { + files = [fileName] + } + const chatflowid = options.chatflowid + + for (const file of files) { + const fileInStorage = path.join(getStoragePath(), chatflowid, file) + const fileData = fs.readFileSync(fileInStorage) + base64String += fileData.toString('base64') + } + } else { + if (csvFileBase64.startsWith('[') && csvFileBase64.endsWith(']')) { + files = JSON.parse(csvFileBase64) + } else { + files = [csvFileBase64] + } + + for (const file of files) { + const splitDataURI = file.split(',') + splitDataURI.pop() + base64String += splitDataURI.pop() ?? '' + } } const pyodide = await LoadPyodide() diff --git a/packages/components/nodes/chains/ApiChain/OpenAPIChain.ts b/packages/components/nodes/chains/ApiChain/OpenAPIChain.ts index e5c11eb3..3d211333 100644 --- a/packages/components/nodes/chains/ApiChain/OpenAPIChain.ts +++ b/packages/components/nodes/chains/ApiChain/OpenAPIChain.ts @@ -5,6 +5,9 @@ import { getBaseClasses } from '../../../src/utils' import { ConsoleCallbackHandler, CustomChainHandler, additionalCallbacks } from '../../../src/handler' import { checkInputs, Moderation, streamResponse } from '../../moderation/Moderation' import { formatResponse } from '../../outputparsers/OutputParserHelpers' +import { getStoragePath } from '../../../src' +import fs from 'fs' +import path from 'path' class OpenApiChain_Chains implements INode { label: string @@ -64,12 +67,12 @@ class OpenApiChain_Chains implements INode { ] } - async init(nodeData: INodeData): Promise { - return await initChain(nodeData) + async init(nodeData: INodeData, _: string, options: ICommonObject): Promise { + return await initChain(nodeData, options) } async run(nodeData: INodeData, input: string, options: ICommonObject): Promise { - const chain = await initChain(nodeData) + const chain = await initChain(nodeData, options) const loggerHandler = new ConsoleCallbackHandler(options.logger) const callbacks = await additionalCallbacks(nodeData, options) const moderations = nodeData.inputs?.inputModeration as Moderation[] @@ -94,7 +97,7 @@ class OpenApiChain_Chains implements INode { } } -const initChain = async (nodeData: INodeData) => { +const initChain = async (nodeData: INodeData, options: ICommonObject) => { const model = nodeData.inputs?.model as ChatOpenAI const headers = nodeData.inputs?.headers as string const yamlLink = nodeData.inputs?.yamlLink as string @@ -105,10 +108,18 @@ const initChain = async (nodeData: INodeData) => { if (yamlLink) { yamlString = yamlLink } else { - const splitDataURI = yamlFileBase64.split(',') - splitDataURI.pop() - const bf = Buffer.from(splitDataURI.pop() || '', 'base64') - yamlString = bf.toString('utf-8') + if (yamlFileBase64.startsWith('FILE-STORAGE::')) { + const file = yamlFileBase64.replace('FILE-STORAGE::', '') + const chatflowid = options.chatflowid + const fileInStorage = path.join(getStoragePath(), chatflowid, file) + const fileData = fs.readFileSync(fileInStorage) + yamlString = fileData.toString() + } else { + const splitDataURI = yamlFileBase64.split(',') + splitDataURI.pop() + const bf = Buffer.from(splitDataURI.pop() || '', 'base64') + yamlString = bf.toString('utf-8') + } } return await createOpenAPIChain(yamlString, { diff --git a/packages/components/nodes/documentloaders/Csv/Csv.ts b/packages/components/nodes/documentloaders/Csv/Csv.ts index a6170b2d..8a1f6d0a 100644 --- a/packages/components/nodes/documentloaders/Csv/Csv.ts +++ b/packages/components/nodes/documentloaders/Csv/Csv.ts @@ -1,6 +1,9 @@ -import { INode, INodeData, INodeParams } from '../../../src/Interface' +import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface' import { TextSplitter } from 'langchain/text_splitter' import { CSVLoader } from 'langchain/document_loaders/fs/csv' +import path from 'path' +import { getStoragePath } from '../../../src' +import fs from 'fs' class Csv_DocumentLoaders implements INode { label: string @@ -53,7 +56,7 @@ class Csv_DocumentLoaders implements INode { ] } - async init(nodeData: INodeData): Promise { + async init(nodeData: INodeData, _: string, options: ICommonObject): Promise { const textSplitter = nodeData.inputs?.textSplitter as TextSplitter const csvFileBase64 = nodeData.inputs?.csvFile as string const columnName = nodeData.inputs?.columnName as string @@ -62,25 +65,50 @@ class Csv_DocumentLoaders implements INode { let alldocs = [] let files: string[] = [] - if (csvFileBase64.startsWith('[') && csvFileBase64.endsWith(']')) { - files = JSON.parse(csvFileBase64) - } else { - files = [csvFileBase64] - } - - for (const file of files) { - const splitDataURI = file.split(',') - splitDataURI.pop() - const bf = Buffer.from(splitDataURI.pop() || '', 'base64') - const blob = new Blob([bf]) - const loader = new CSVLoader(blob, columnName.trim().length === 0 ? undefined : columnName.trim()) - - if (textSplitter) { - const docs = await loader.loadAndSplit(textSplitter) - alldocs.push(...docs) + if (csvFileBase64.startsWith('FILE-STORAGE::')) { + const fileName = csvFileBase64.replace('FILE-STORAGE::', '') + if (fileName.startsWith('[') && fileName.endsWith(']')) { + files = JSON.parse(fileName) } else { - const docs = await loader.load() - alldocs.push(...docs) + files = [fileName] + } + const chatflowid = options.chatflowid + + for (const file of files) { + const fileInStorage = path.join(getStoragePath(), chatflowid, file) + const fileData = fs.readFileSync(fileInStorage) + const blob = new Blob([fileData]) + const loader = new CSVLoader(blob, columnName.trim().length === 0 ? undefined : columnName.trim()) + + if (textSplitter) { + const docs = await loader.loadAndSplit(textSplitter) + alldocs.push(...docs) + } else { + const docs = await loader.load() + alldocs.push(...docs) + } + } + } else { + if (csvFileBase64.startsWith('[') && csvFileBase64.endsWith(']')) { + files = JSON.parse(csvFileBase64) + } else { + files = [csvFileBase64] + } + + for (const file of files) { + const splitDataURI = file.split(',') + splitDataURI.pop() + const bf = Buffer.from(splitDataURI.pop() || '', 'base64') + const blob = new Blob([bf]) + const loader = new CSVLoader(blob, columnName.trim().length === 0 ? undefined : columnName.trim()) + + if (textSplitter) { + const docs = await loader.loadAndSplit(textSplitter) + alldocs.push(...docs) + } else { + const docs = await loader.load() + alldocs.push(...docs) + } } } diff --git a/packages/components/nodes/documentloaders/Docx/Docx.ts b/packages/components/nodes/documentloaders/Docx/Docx.ts index 26883ada..c8c75e95 100644 --- a/packages/components/nodes/documentloaders/Docx/Docx.ts +++ b/packages/components/nodes/documentloaders/Docx/Docx.ts @@ -1,6 +1,9 @@ -import { INode, INodeData, INodeParams } from '../../../src/Interface' +import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface' import { TextSplitter } from 'langchain/text_splitter' import { DocxLoader } from 'langchain/document_loaders/fs/docx' +import path from 'path' +import { getStoragePath } from '../../../src' +import fs from 'fs' class Docx_DocumentLoaders implements INode { label: string @@ -45,7 +48,7 @@ class Docx_DocumentLoaders implements INode { ] } - async init(nodeData: INodeData): Promise { + async init(nodeData: INodeData, _: string, options: ICommonObject): Promise { const textSplitter = nodeData.inputs?.textSplitter as TextSplitter const docxFileBase64 = nodeData.inputs?.docxFile as string const metadata = nodeData.inputs?.metadata @@ -53,25 +56,50 @@ class Docx_DocumentLoaders implements INode { let alldocs = [] let files: string[] = [] - if (docxFileBase64.startsWith('[') && docxFileBase64.endsWith(']')) { - files = JSON.parse(docxFileBase64) - } else { - files = [docxFileBase64] - } - - for (const file of files) { - const splitDataURI = file.split(',') - splitDataURI.pop() - const bf = Buffer.from(splitDataURI.pop() || '', 'base64') - const blob = new Blob([bf]) - const loader = new DocxLoader(blob) - - if (textSplitter) { - const docs = await loader.loadAndSplit(textSplitter) - alldocs.push(...docs) + if (docxFileBase64.startsWith('FILE-STORAGE::')) { + const fileName = docxFileBase64.replace('FILE-STORAGE::', '') + if (fileName.startsWith('[') && fileName.endsWith(']')) { + files = JSON.parse(fileName) } else { - const docs = await loader.load() - alldocs.push(...docs) + files = [fileName] + } + const chatflowid = options.chatflowid + + for (const file of files) { + const fileInStorage = path.join(getStoragePath(), chatflowid, file) + const fileData = fs.readFileSync(fileInStorage) + const blob = new Blob([fileData]) + const loader = new DocxLoader(blob) + + if (textSplitter) { + const docs = await loader.loadAndSplit(textSplitter) + alldocs.push(...docs) + } else { + const docs = await loader.load() + alldocs.push(...docs) + } + } + } else { + if (docxFileBase64.startsWith('[') && docxFileBase64.endsWith(']')) { + files = JSON.parse(docxFileBase64) + } else { + files = [docxFileBase64] + } + + for (const file of files) { + const splitDataURI = file.split(',') + splitDataURI.pop() + const bf = Buffer.from(splitDataURI.pop() || '', 'base64') + const blob = new Blob([bf]) + const loader = new DocxLoader(blob) + + if (textSplitter) { + const docs = await loader.loadAndSplit(textSplitter) + alldocs.push(...docs) + } else { + const docs = await loader.load() + alldocs.push(...docs) + } } } diff --git a/packages/components/nodes/documentloaders/Json/Json.ts b/packages/components/nodes/documentloaders/Json/Json.ts index 43051251..b204c594 100644 --- a/packages/components/nodes/documentloaders/Json/Json.ts +++ b/packages/components/nodes/documentloaders/Json/Json.ts @@ -1,6 +1,9 @@ -import { INode, INodeData, INodeParams } from '../../../src/Interface' +import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface' import { TextSplitter } from 'langchain/text_splitter' import { JSONLoader } from 'langchain/document_loaders/fs/json' +import { getStoragePath } from '../../../src' +import fs from 'fs' +import path from 'path' class Json_DocumentLoaders implements INode { label: string @@ -53,7 +56,7 @@ class Json_DocumentLoaders implements INode { ] } - async init(nodeData: INodeData): Promise { + async init(nodeData: INodeData, _: string, options: ICommonObject): Promise { const textSplitter = nodeData.inputs?.textSplitter as TextSplitter const jsonFileBase64 = nodeData.inputs?.jsonFile as string const pointersName = nodeData.inputs?.pointersName as string @@ -68,25 +71,51 @@ class Json_DocumentLoaders implements INode { let alldocs = [] let files: string[] = [] - if (jsonFileBase64.startsWith('[') && jsonFileBase64.endsWith(']')) { - files = JSON.parse(jsonFileBase64) - } else { - files = [jsonFileBase64] - } - - for (const file of files) { - const splitDataURI = file.split(',') - splitDataURI.pop() - const bf = Buffer.from(splitDataURI.pop() || '', 'base64') - const blob = new Blob([bf]) - const loader = new JSONLoader(blob, pointers.length != 0 ? pointers : undefined) - - if (textSplitter) { - const docs = await loader.loadAndSplit(textSplitter) - alldocs.push(...docs) + //FILE-STORAGE::["CONTRIBUTING.md","LICENSE.md","README.md"] + if (jsonFileBase64.startsWith('FILE-STORAGE::')) { + const fileName = jsonFileBase64.replace('FILE-STORAGE::', '') + if (fileName.startsWith('[') && fileName.endsWith(']')) { + files = JSON.parse(fileName) } else { - const docs = await loader.load() - alldocs.push(...docs) + files = [fileName] + } + const chatflowid = options.chatflowid + + for (const file of files) { + const fileInStorage = path.join(getStoragePath(), chatflowid, file) + const fileData = fs.readFileSync(fileInStorage) + const blob = new Blob([fileData]) + const loader = new JSONLoader(blob, pointers.length != 0 ? pointers : undefined) + + if (textSplitter) { + const docs = await loader.loadAndSplit(textSplitter) + alldocs.push(...docs) + } else { + const docs = await loader.load() + alldocs.push(...docs) + } + } + } else { + if (jsonFileBase64.startsWith('[') && jsonFileBase64.endsWith(']')) { + files = JSON.parse(jsonFileBase64) + } else { + files = [jsonFileBase64] + } + + for (const file of files) { + const splitDataURI = file.split(',') + splitDataURI.pop() + const bf = Buffer.from(splitDataURI.pop() || '', 'base64') + const blob = new Blob([bf]) + const loader = new JSONLoader(blob, pointers.length != 0 ? pointers : undefined) + + if (textSplitter) { + const docs = await loader.loadAndSplit(textSplitter) + alldocs.push(...docs) + } else { + const docs = await loader.load() + alldocs.push(...docs) + } } } diff --git a/packages/components/nodes/documentloaders/Jsonlines/Jsonlines.ts b/packages/components/nodes/documentloaders/Jsonlines/Jsonlines.ts index fcc2fae9..14517dbd 100644 --- a/packages/components/nodes/documentloaders/Jsonlines/Jsonlines.ts +++ b/packages/components/nodes/documentloaders/Jsonlines/Jsonlines.ts @@ -1,6 +1,9 @@ -import { INode, INodeData, INodeParams } from '../../../src/Interface' +import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface' import { TextSplitter } from 'langchain/text_splitter' import { JSONLinesLoader } from 'langchain/document_loaders/fs/json' +import { getStoragePath } from '../../../src' +import fs from 'fs' +import path from 'path' class Jsonlines_DocumentLoaders implements INode { label: string @@ -52,7 +55,7 @@ class Jsonlines_DocumentLoaders implements INode { ] } - async init(nodeData: INodeData): Promise { + async init(nodeData: INodeData, _: string, options: ICommonObject): Promise { const textSplitter = nodeData.inputs?.textSplitter as TextSplitter const jsonLinesFileBase64 = nodeData.inputs?.jsonlinesFile as string const pointerName = nodeData.inputs?.pointerName as string @@ -62,26 +65,51 @@ class Jsonlines_DocumentLoaders implements INode { let files: string[] = [] let pointer = '/' + pointerName.trim() - - if (jsonLinesFileBase64.startsWith('[') && jsonLinesFileBase64.endsWith(']')) { - files = JSON.parse(jsonLinesFileBase64) - } else { - files = [jsonLinesFileBase64] - } - - for (const file of files) { - const splitDataURI = file.split(',') - splitDataURI.pop() - const bf = Buffer.from(splitDataURI.pop() || '', 'base64') - const blob = new Blob([bf]) - const loader = new JSONLinesLoader(blob, pointer) - - if (textSplitter) { - const docs = await loader.loadAndSplit(textSplitter) - alldocs.push(...docs) + //FILE-STORAGE::["CONTRIBUTING.md","LICENSE.md","README.md"] + if (jsonLinesFileBase64.startsWith('FILE-STORAGE::')) { + const fileName = jsonLinesFileBase64.replace('FILE-STORAGE::', '') + if (fileName.startsWith('[') && fileName.endsWith(']')) { + files = JSON.parse(fileName) } else { - const docs = await loader.load() - alldocs.push(...docs) + files = [fileName] + } + const chatflowid = options.chatflowid + + for (const file of files) { + const fileInStorage = path.join(getStoragePath(), chatflowid, file) + const fileData = fs.readFileSync(fileInStorage) + const blob = new Blob([fileData]) + const loader = new JSONLinesLoader(blob, pointer) + + if (textSplitter) { + const docs = await loader.loadAndSplit(textSplitter) + alldocs.push(...docs) + } else { + const docs = await loader.load() + alldocs.push(...docs) + } + } + } else { + if (jsonLinesFileBase64.startsWith('[') && jsonLinesFileBase64.endsWith(']')) { + files = JSON.parse(jsonLinesFileBase64) + } else { + files = [jsonLinesFileBase64] + } + + for (const file of files) { + const splitDataURI = file.split(',') + splitDataURI.pop() + const bf = Buffer.from(splitDataURI.pop() || '', 'base64') + const blob = new Blob([bf]) + const loader = new JSONLinesLoader(blob, pointer) + + if (textSplitter) { + const docs = await loader.loadAndSplit(textSplitter) + alldocs.push(...docs) + } else { + const docs = await loader.load() + alldocs.push(...docs) + } } } diff --git a/packages/components/nodes/documentloaders/Pdf/Pdf.ts b/packages/components/nodes/documentloaders/Pdf/Pdf.ts index a9f6ab23..d21587a9 100644 --- a/packages/components/nodes/documentloaders/Pdf/Pdf.ts +++ b/packages/components/nodes/documentloaders/Pdf/Pdf.ts @@ -1,6 +1,9 @@ -import { INode, INodeData, INodeParams } from '../../../src/Interface' +import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface' import { TextSplitter } from 'langchain/text_splitter' import { PDFLoader } from 'langchain/document_loaders/fs/pdf' +import { getStoragePath } from '../../../src' +import fs from 'fs' +import path from 'path' class Pdf_DocumentLoaders implements INode { label: string @@ -68,53 +71,44 @@ class Pdf_DocumentLoaders implements INode { ] } - async init(nodeData: INodeData): Promise { + async init(nodeData: INodeData, _: string, options: ICommonObject): Promise { const textSplitter = nodeData.inputs?.textSplitter as TextSplitter const pdfFileBase64 = nodeData.inputs?.pdfFile as string const usage = nodeData.inputs?.usage as string const metadata = nodeData.inputs?.metadata const legacyBuild = nodeData.inputs?.legacyBuild as boolean - let alldocs = [] + let alldocs: any[] = [] let files: string[] = [] - if (pdfFileBase64.startsWith('[') && pdfFileBase64.endsWith(']')) { - files = JSON.parse(pdfFileBase64) - } else { - files = [pdfFileBase64] - } - - for (const file of files) { - const splitDataURI = file.split(',') - splitDataURI.pop() - const bf = Buffer.from(splitDataURI.pop() || '', 'base64') - if (usage === 'perFile') { - const loader = new PDFLoader(new Blob([bf]), { - splitPages: false, - pdfjs: () => - // @ts-ignore - legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') - }) - if (textSplitter) { - const docs = await loader.loadAndSplit(textSplitter) - alldocs.push(...docs) - } else { - const docs = await loader.load() - alldocs.push(...docs) - } + //FILE-STORAGE::["CONTRIBUTING.md","LICENSE.md","README.md"] + if (pdfFileBase64.startsWith('FILE-STORAGE::')) { + const fileName = pdfFileBase64.replace('FILE-STORAGE::', '') + if (fileName.startsWith('[') && fileName.endsWith(']')) { + files = JSON.parse(fileName) } else { - const loader = new PDFLoader(new Blob([bf]), { - pdfjs: () => - // @ts-ignore - legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') - }) - if (textSplitter) { - const docs = await loader.loadAndSplit(textSplitter) - alldocs.push(...docs) - } else { - const docs = await loader.load() - alldocs.push(...docs) - } + files = [fileName] + } + const chatflowid = options.chatflowid + + for (const file of files) { + const fileInStorage = path.join(getStoragePath(), chatflowid, file) + const fileData = fs.readFileSync(fileInStorage) + const bf = Buffer.from(fileData) + await this.extractDocs(usage, bf, legacyBuild, textSplitter, alldocs) + } + } else { + if (pdfFileBase64.startsWith('[') && pdfFileBase64.endsWith(']')) { + files = JSON.parse(pdfFileBase64) + } else { + files = [pdfFileBase64] + } + + for (const file of files) { + const splitDataURI = file.split(',') + splitDataURI.pop() + const bf = Buffer.from(splitDataURI.pop() || '', 'base64') + await this.extractDocs(usage, bf, legacyBuild, textSplitter, alldocs) } } @@ -136,6 +130,37 @@ class Pdf_DocumentLoaders implements INode { return alldocs } + + private async extractDocs(usage: string, bf: Buffer, legacyBuild: boolean, textSplitter: TextSplitter, alldocs: any[]) { + if (usage === 'perFile') { + const loader = new PDFLoader(new Blob([bf]), { + splitPages: false, + pdfjs: () => + // @ts-ignore + legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') + }) + if (textSplitter) { + const docs = await loader.loadAndSplit(textSplitter) + alldocs.push(...docs) + } else { + const docs = await loader.load() + alldocs.push(...docs) + } + } else { + const loader = new PDFLoader(new Blob([bf]), { + pdfjs: () => + // @ts-ignore + legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') + }) + if (textSplitter) { + const docs = await loader.loadAndSplit(textSplitter) + alldocs.push(...docs) + } else { + const docs = await loader.load() + alldocs.push(...docs) + } + } + } } module.exports = { nodeClass: Pdf_DocumentLoaders } diff --git a/packages/components/nodes/documentloaders/Text/Text.ts b/packages/components/nodes/documentloaders/Text/Text.ts index c43f913c..fe63bcaf 100644 --- a/packages/components/nodes/documentloaders/Text/Text.ts +++ b/packages/components/nodes/documentloaders/Text/Text.ts @@ -1,8 +1,10 @@ -import { INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface' +import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface' import { TextSplitter } from 'langchain/text_splitter' import { TextLoader } from 'langchain/document_loaders/fs/text' import { Document } from '@langchain/core/documents' -import { handleEscapeCharacters } from '../../../src' +import { getStoragePath, handleEscapeCharacters } from '../../../src' +import fs from 'fs' +import path from 'path' class Text_DocumentLoaders implements INode { label: string @@ -63,7 +65,7 @@ class Text_DocumentLoaders implements INode { ] } - async init(nodeData: INodeData): Promise { + async init(nodeData: INodeData, _: string, options: ICommonObject): Promise { const textSplitter = nodeData.inputs?.textSplitter as TextSplitter const txtFileBase64 = nodeData.inputs?.txtFile as string const metadata = nodeData.inputs?.metadata @@ -72,25 +74,51 @@ class Text_DocumentLoaders implements INode { let alldocs = [] let files: string[] = [] - if (txtFileBase64.startsWith('[') && txtFileBase64.endsWith(']')) { - files = JSON.parse(txtFileBase64) - } else { - files = [txtFileBase64] - } - - for (const file of files) { - const splitDataURI = file.split(',') - splitDataURI.pop() - const bf = Buffer.from(splitDataURI.pop() || '', 'base64') - const blob = new Blob([bf]) - const loader = new TextLoader(blob) - - if (textSplitter) { - const docs = await loader.loadAndSplit(textSplitter) - alldocs.push(...docs) + //FILE-STORAGE::["CONTRIBUTING.md","LICENSE.md","README.md"] + if (txtFileBase64.startsWith('FILE-STORAGE::')) { + const fileName = txtFileBase64.replace('FILE-STORAGE::', '') + if (fileName.startsWith('[') && fileName.endsWith(']')) { + files = JSON.parse(fileName) } else { - const docs = await loader.load() - alldocs.push(...docs) + files = [fileName] + } + const chatflowid = options.chatflowid + + for (const file of files) { + const fileInStorage = path.join(getStoragePath(), chatflowid, file) + const fileData = fs.readFileSync(fileInStorage) + const blob = new Blob([fileData]) + const loader = new TextLoader(blob) + + if (textSplitter) { + const docs = await loader.loadAndSplit(textSplitter) + alldocs.push(...docs) + } else { + const docs = await loader.load() + alldocs.push(...docs) + } + } + } else { + if (txtFileBase64.startsWith('[') && txtFileBase64.endsWith(']')) { + files = JSON.parse(txtFileBase64) + } else { + files = [txtFileBase64] + } + + for (const file of files) { + const splitDataURI = file.split(',') + splitDataURI.pop() + const bf = Buffer.from(splitDataURI.pop() || '', 'base64') + const blob = new Blob([bf]) + const loader = new TextLoader(blob) + + if (textSplitter) { + const docs = await loader.loadAndSplit(textSplitter) + alldocs.push(...docs) + } else { + const docs = await loader.load() + alldocs.push(...docs) + } } } diff --git a/packages/components/nodes/tools/OpenAPIToolkit/OpenAPIToolkit.ts b/packages/components/nodes/tools/OpenAPIToolkit/OpenAPIToolkit.ts index 0537ae67..a1526c0b 100644 --- a/packages/components/nodes/tools/OpenAPIToolkit/OpenAPIToolkit.ts +++ b/packages/components/nodes/tools/OpenAPIToolkit/OpenAPIToolkit.ts @@ -4,6 +4,9 @@ import { OpenApiToolkit } from 'langchain/agents' import { JsonSpec, JsonObject } from './core' import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface' import { getCredentialData, getCredentialParam } from '../../../src' +import { getStoragePath } from '../../../src' +import fs from 'fs' +import path from 'path' class OpenAPIToolkit_Tools implements INode { label: string @@ -56,11 +59,21 @@ class OpenAPIToolkit_Tools implements INode { const credentialData = await getCredentialData(nodeData.credential ?? '', options) const openAPIToken = getCredentialParam('openAPIToken', credentialData, nodeData) - const splitDataURI = yamlFileBase64.split(',') - splitDataURI.pop() - const bf = Buffer.from(splitDataURI.pop() || '', 'base64') - const utf8String = bf.toString('utf-8') - const data = load(utf8String) as JsonObject + let data: JsonObject + if (yamlFileBase64.startsWith('FILE-STORAGE::')) { + const file = yamlFileBase64.replace('FILE-STORAGE::', '') + const chatflowid = options.chatflowid + const fileInStorage = path.join(getStoragePath(), chatflowid, file) + const fileData = fs.readFileSync(fileInStorage) + const utf8String = fileData.toString('utf-8') + data = load(utf8String) as JsonObject + } else { + const splitDataURI = yamlFileBase64.split(',') + splitDataURI.pop() + const bf = Buffer.from(splitDataURI.pop() || '', 'base64') + const utf8String = bf.toString('utf-8') + data = load(utf8String) as JsonObject + } if (!data) { throw new Error('Failed to load OpenAPI spec') } diff --git a/packages/components/nodes/vectorstores/Vectara/Vectara.ts b/packages/components/nodes/vectorstores/Vectara/Vectara.ts index db62e85c..8baa6ed9 100644 --- a/packages/components/nodes/vectorstores/Vectara/Vectara.ts +++ b/packages/components/nodes/vectorstores/Vectara/Vectara.ts @@ -11,6 +11,9 @@ import { Document } from '@langchain/core/documents' import { Embeddings } from '@langchain/core/embeddings' import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface' import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils' +import { getStoragePath } from '../../../src' +import fs from 'fs' +import path from 'path' class Vectara_VectorStores implements INode { label: string @@ -182,20 +185,37 @@ class Vectara_VectorStores implements INode { } } - let files: string[] = [] - if (fileBase64.startsWith('[') && fileBase64.endsWith(']')) { - files = JSON.parse(fileBase64) - } else { - files = [fileBase64] - } - const vectaraFiles: VectaraFile[] = [] - for (const file of files) { - const splitDataURI = file.split(',') - splitDataURI.pop() - const bf = Buffer.from(splitDataURI.pop() || '', 'base64') - const blob = new Blob([bf]) - vectaraFiles.push({ blob: blob, fileName: getFileName(file) }) + let files: string[] = [] + if (fileBase64.startsWith('FILE-STORAGE::')) { + const fileName = fileBase64.replace('FILE-STORAGE::', '') + if (fileName.startsWith('[') && fileName.endsWith(']')) { + files = JSON.parse(fileName) + } else { + files = [fileName] + } + const chatflowid = options.chatflowid + + for (const file of files) { + const fileInStorage = path.join(getStoragePath(), chatflowid, file) + const fileData = fs.readFileSync(fileInStorage) + const blob = new Blob([fileData]) + vectaraFiles.push({ blob: blob, fileName: getFileName(file) }) + } + } else { + if (fileBase64.startsWith('[') && fileBase64.endsWith(']')) { + files = JSON.parse(fileBase64) + } else { + files = [fileBase64] + } + + for (const file of files) { + const splitDataURI = file.split(',') + splitDataURI.pop() + const bf = Buffer.from(splitDataURI.pop() || '', 'base64') + const blob = new Blob([bf]) + vectaraFiles.push({ blob: blob, fileName: getFileName(file) }) + } } try { diff --git a/packages/components/nodes/vectorstores/Vectara/Vectara_Upload.ts b/packages/components/nodes/vectorstores/Vectara/Vectara_Upload.ts index 614fc1ca..378c8962 100644 --- a/packages/components/nodes/vectorstores/Vectara/Vectara_Upload.ts +++ b/packages/components/nodes/vectorstores/Vectara/Vectara_Upload.ts @@ -1,6 +1,9 @@ import { VectaraStore, VectaraLibArgs, VectaraFilter, VectaraContextConfig, VectaraFile } from '@langchain/community/vectorstores/vectara' import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface' import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils' +import path from 'path' +import { getStoragePath } from '../../../src' +import fs from 'fs' class VectaraUpload_VectorStores implements INode { label: string @@ -129,20 +132,37 @@ class VectaraUpload_VectorStores implements INode { vectaraFilter.contextConfig = vectaraContextConfig let files: string[] = [] - - if (fileBase64.startsWith('[') && fileBase64.endsWith(']')) { - files = JSON.parse(fileBase64) - } else { - files = [fileBase64] - } - const vectaraFiles: VectaraFile[] = [] - for (const file of files) { - const splitDataURI = file.split(',') - splitDataURI.pop() - const bf = Buffer.from(splitDataURI.pop() || '', 'base64') - const blob = new Blob([bf]) - vectaraFiles.push({ blob: blob, fileName: getFileName(file) }) + + if (fileBase64.startsWith('FILE-STORAGE::')) { + const fileName = fileBase64.replace('FILE-STORAGE::', '') + if (fileName.startsWith('[') && fileName.endsWith(']')) { + files = JSON.parse(fileName) + } else { + files = [fileName] + } + const chatflowid = options.chatflowid + + for (const file of files) { + const fileInStorage = path.join(getStoragePath(), chatflowid, file) + const fileData = fs.readFileSync(fileInStorage) + const blob = new Blob([fileData]) + vectaraFiles.push({ blob: blob, fileName: getFileName(file) }) + } + } else { + if (fileBase64.startsWith('[') && fileBase64.endsWith(']')) { + files = JSON.parse(fileBase64) + } else { + files = [fileBase64] + } + + for (const file of files) { + const splitDataURI = file.split(',') + splitDataURI.pop() + const bf = Buffer.from(splitDataURI.pop() || '', 'base64') + const blob = new Blob([bf]) + vectaraFiles.push({ blob: blob, fileName: getFileName(file) }) + } } const vectorStore = new VectaraStore(vectaraArgs) diff --git a/packages/server/.env.example b/packages/server/.env.example index 5f22cafd..7b188f58 100644 --- a/packages/server/.env.example +++ b/packages/server/.env.example @@ -5,7 +5,7 @@ PORT=3000 # APIKEY_PATH=/your_api_key_path/.flowise # SECRETKEY_PATH=/your_api_key_path/.flowise # LOG_PATH=/your_log_path/.flowise/logs -# BLOB_STORAGE_PATH=/your_database_path/.flowise/storage +# BLOB_STORAGE_PATH=/your_storage_path/.flowise/storage # NUMBER_OF_PROXIES= 1 diff --git a/packages/server/src/services/chatflows/index.ts b/packages/server/src/services/chatflows/index.ts index 78c5c695..78483ff2 100644 --- a/packages/server/src/services/chatflows/index.ts +++ b/packages/server/src/services/chatflows/index.ts @@ -17,6 +17,7 @@ import { utilGetUploadsConfig } from '../../utils/getUploadsConfig' import { ChatMessage } from '../../database/entities/ChatMessage' import { ChatMessageFeedback } from '../../database/entities/ChatMessageFeedback' import { UpsertHistory } from '../../database/entities/UpsertHistory' +import { containsBase64File, updateFlowDataWithFilePaths } from '../../utils/fileRepository' // Check if chatflow valid for streaming const checkIfChatflowIsValidForStreaming = async (chatflowId: string): Promise => { @@ -184,8 +185,24 @@ const getChatflowById = async (chatflowId: string): Promise => { const saveChatflow = async (newChatFlow: ChatFlow): Promise => { try { const appServer = getRunningExpressApp() - const newDbChatflow = await appServer.AppDataSource.getRepository(ChatFlow).create(newChatFlow) - const dbResponse = await appServer.AppDataSource.getRepository(ChatFlow).save(newDbChatflow) + let dbResponse: ChatFlow + if (containsBase64File(newChatFlow)) { + // we need a 2-step process, as we need to save the chatflow first and then update the file paths + // this is because we need the chatflow id to create the file paths + + // step 1 - save with empty flowData + const incomingFlowData = newChatFlow.flowData + newChatFlow.flowData = JSON.stringify({}) + const chatflow = appServer.AppDataSource.getRepository(ChatFlow).create(newChatFlow) + const step1Results = await appServer.AppDataSource.getRepository(ChatFlow).save(chatflow) + + // step 2 - convert base64 to file paths and update the chatflow + step1Results.flowData = updateFlowDataWithFilePaths(step1Results.id, incomingFlowData) + dbResponse = await appServer.AppDataSource.getRepository(ChatFlow).save(step1Results) + } else { + const chatflow = appServer.AppDataSource.getRepository(ChatFlow).create(newChatFlow) + dbResponse = await appServer.AppDataSource.getRepository(ChatFlow).save(chatflow) + } await appServer.telemetry.sendTelemetry('chatflow_created', { version: await getAppVersion(), chatflowId: dbResponse.id, @@ -200,6 +217,9 @@ const saveChatflow = async (newChatFlow: ChatFlow): Promise => { const updateChatflow = async (chatflow: ChatFlow, updateChatFlow: ChatFlow): Promise => { try { const appServer = getRunningExpressApp() + if (containsBase64File(updateChatFlow)) { + updateChatFlow.flowData = updateFlowDataWithFilePaths(chatflow.id, updateChatFlow.flowData) + } const newDbChatflow = await appServer.AppDataSource.getRepository(ChatFlow).merge(chatflow, updateChatFlow) const dbResponse = await appServer.AppDataSource.getRepository(ChatFlow).save(newDbChatflow) // chatFlowPool is initialized only when a flow is opened diff --git a/packages/server/src/utils/fileRepository.ts b/packages/server/src/utils/fileRepository.ts new file mode 100644 index 00000000..ab40ab76 --- /dev/null +++ b/packages/server/src/utils/fileRepository.ts @@ -0,0 +1,113 @@ +import { ChatFlow } from '../database/entities/ChatFlow' +import path from 'path' +import { getStoragePath } from 'flowise-components' +import fs from 'fs' +import { IReactFlowObject } from '../Interface' + +export const containsBase64File = (chatflow: ChatFlow) => { + const parsedFlowData: IReactFlowObject = JSON.parse(chatflow.flowData) + const re = new RegExp('^data.*;base64', 'i') + let found = false + const nodes = parsedFlowData.nodes + for (const node of nodes) { + if (node.data.category !== 'Document Loaders') { + continue + } + const inputs = node.data.inputs + if (inputs) { + const keys = Object.getOwnPropertyNames(inputs) + for (let i = 0; i < keys.length; i++) { + const input = inputs[keys[i]] + if (!input) { + continue + } + if (typeof input !== 'string') { + continue + } + if (input.startsWith('[')) { + try { + const files = JSON.parse(input) + for (let j = 0; j < files.length; j++) { + const file = files[j] + if (re.test(file)) { + found = true + break + } + } + } catch (e) { + continue + } + } + if (re.test(input)) { + found = true + break + } + } + } + } + return found +} + +function addFileToStorage(file: string, chatflowid: string, fileNames: string[]) { + const dir = path.join(getStoragePath(), chatflowid) + if (!fs.existsSync(dir)) { + fs.mkdirSync(dir, { recursive: true }) + } + + const splitDataURI = file.split(',') + const filename = splitDataURI.pop()?.split(':')[1] ?? '' + const bf = Buffer.from(splitDataURI.pop() || '', 'base64') + + const filePath = path.join(dir, filename) + fs.writeFileSync(filePath, bf) + fileNames.push(filename) + return 'FILE-STORAGE::' + JSON.stringify(fileNames) +} + +export const updateFlowDataWithFilePaths = (chatflowid: string, flowData: string) => { + try { + const parsedFlowData: IReactFlowObject = JSON.parse(flowData) + const re = new RegExp('^data.*;base64', 'i') + const nodes = parsedFlowData.nodes + for (let j = 0; j < nodes.length; j++) { + const node = nodes[j] + if (node.data.category !== 'Document Loaders') { + continue + } + if (node.data.inputs) { + const inputs = node.data.inputs + const keys = Object.getOwnPropertyNames(inputs) + for (let i = 0; i < keys.length; i++) { + const fileNames: string[] = [] + const key = keys[i] + const input = inputs?.[key] + if (!input) { + continue + } + if (typeof input !== 'string') { + continue + } + if (input.startsWith('[')) { + try { + const files = JSON.parse(input) + for (let j = 0; j < files.length; j++) { + const file = files[j] + if (re.test(file)) { + node.data.inputs[key] = addFileToStorage(file, chatflowid, fileNames) + } + } + } catch (e) { + continue + } + } else if (re.test(input)) { + node.data.inputs[key] = addFileToStorage(input, chatflowid, fileNames) + } + } + } + } + + return JSON.stringify(parsedFlowData) + } catch (e) { + return '' + } +} diff --git a/packages/server/src/utils/index.ts b/packages/server/src/utils/index.ts index cfbb2535..15aa626a 100644 --- a/packages/server/src/utils/index.ts +++ b/packages/server/src/utils/index.ts @@ -243,6 +243,15 @@ export const getEndingNodes = (nodeDependencies: INodeDependencies, graph: INode */ export const getFileName = (fileBase64: string): string => { let fileNames = [] + if (fileBase64.startsWith('FILE-STORAGE::')) { + const names = fileBase64.substring(14) + if (names.includes('[') && names.includes(']')) { + const files = JSON.parse(names) + return files.join(', ') + } else { + return fileBase64.substring(14) + } + } if (fileBase64.startsWith('[') && fileBase64.endsWith(']')) { const files = JSON.parse(fileBase64) for (const file of files) { diff --git a/packages/ui/src/utils/genericHelper.js b/packages/ui/src/utils/genericHelper.js index ed445acd..bc6ebeb0 100644 --- a/packages/ui/src/utils/genericHelper.js +++ b/packages/ui/src/utils/genericHelper.js @@ -234,6 +234,15 @@ export const convertDateStringToDateObject = (dateString) => { export const getFileName = (fileBase64) => { let fileNames = [] + if (fileBase64.startsWith('FILE-STORAGE::')) { + const names = fileBase64.substring(14) + if (names.includes('[') && names.includes(']')) { + const files = JSON.parse(names) + return files.join(', ') + } else { + return fileBase64.substring(14) + } + } if (fileBase64.startsWith('[') && fileBase64.endsWith(']')) { const files = JSON.parse(fileBase64) for (const file of files) {