Feature/externalize files from chatflow - do not save as base64 (#1976)

* initial commit. Externalizing the file base64 string from flowData * csv - docloader - Externalizing the file base64 string from flowData * csv - docloader - Externalizing the file base64 string from flowData * DocX - docloader - Externalizing the file base64 string from flowData * Json - docloader - Externalizing the file base64 string from flowData * Jsonlines - docloader - Externalizing the file base64 string from flowData * PDF - docloader - Externalizing the file base64 string from flowData * Vectara - vector store - Externalizing the file base64 string from flowData * OpenAPIToolkit - tools - Externalizing the file base64 string from flowData * OpenAPIChain - chain - Externalizing the file base64 string from flowData * lint fixes * datasource enabled - initial commit * CSVAgent - agents - Externalizing the file base64 string from flowData * Externalizing the file base64 string from flowData * Externalizing the file base64 string from flowData * add pnpm-lock.yaml * update filerepository to add try catch * Rename FileRepository.ts to fileRepository.ts --------- Co-authored-by: Henry <hzj94@hotmail.com> Co-authored-by: Henry Heng <henryheng@flowiseai.com>
2024-04-04 21:41:06 +05:30 · 2024-04-04 21:41:06 +05:30 · 658fa3984e
parent eed7de6df5
commit 658fa3984e
16 changed files with 593 additions and 194 deletions
--- a/packages/components/nodes/agents/CSVAgent/CSVAgent.ts
+++ b/packages/components/nodes/agents/CSVAgent/CSVAgent.ts
@ -7,6 +7,9 @@ import { getBaseClasses } from '../../../src/utils'
 import { LoadPyodide, finalSystemPrompt, systemPrompt } from './core'
 import { checkInputs, Moderation } from '../../moderation/Moderation'
 import { formatResponse } from '../../outputparsers/OutputParserHelpers'
+import path from 'path'
+import { getStoragePath } from '../../../src'
+import fs from 'fs'

 class CSV_Agents implements INode {
    label: string
@ -88,19 +91,34 @@ class CSV_Agents implements INode {
        const callbacks = await additionalCallbacks(nodeData, options)

        let files: string[] = []
-
-        if (csvFileBase64.startsWith('[') && csvFileBase64.endsWith(']')) {
-            files = JSON.parse(csvFileBase64)
-        } else {
-            files = [csvFileBase64]
-        }
-
        let base64String = ''

-        for (const file of files) {
-            const splitDataURI = file.split(',')
-            splitDataURI.pop()
-            base64String += splitDataURI.pop() ?? ''
+        if (csvFileBase64.startsWith('FILE-STORAGE::')) {
+            const fileName = csvFileBase64.replace('FILE-STORAGE::', '')
+            if (fileName.startsWith('[') && fileName.endsWith(']')) {
+                files = JSON.parse(fileName)
+            } else {
+                files = [fileName]
+            }
+            const chatflowid = options.chatflowid
+
+            for (const file of files) {
+                const fileInStorage = path.join(getStoragePath(), chatflowid, file)
+                const fileData = fs.readFileSync(fileInStorage)
+                base64String += fileData.toString('base64')
+            }
+        } else {
+            if (csvFileBase64.startsWith('[') && csvFileBase64.endsWith(']')) {
+                files = JSON.parse(csvFileBase64)
+            } else {
+                files = [csvFileBase64]
+            }
+
+            for (const file of files) {
+                const splitDataURI = file.split(',')
+                splitDataURI.pop()
+                base64String += splitDataURI.pop() ?? ''
+            }
        }

        const pyodide = await LoadPyodide()
--- a/packages/components/nodes/chains/ApiChain/OpenAPIChain.ts
+++ b/packages/components/nodes/chains/ApiChain/OpenAPIChain.ts
@ -5,6 +5,9 @@ import { getBaseClasses } from '../../../src/utils'
 import { ConsoleCallbackHandler, CustomChainHandler, additionalCallbacks } from '../../../src/handler'
 import { checkInputs, Moderation, streamResponse } from '../../moderation/Moderation'
 import { formatResponse } from '../../outputparsers/OutputParserHelpers'
+import { getStoragePath } from '../../../src'
+import fs from 'fs'
+import path from 'path'

 class OpenApiChain_Chains implements INode {
    label: string
@ -64,12 +67,12 @@ class OpenApiChain_Chains implements INode {
        ]
    }

-    async init(nodeData: INodeData): Promise<any> {
-        return await initChain(nodeData)
+    async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
+        return await initChain(nodeData, options)
    }

    async run(nodeData: INodeData, input: string, options: ICommonObject): Promise<string | object> {
-        const chain = await initChain(nodeData)
+        const chain = await initChain(nodeData, options)
        const loggerHandler = new ConsoleCallbackHandler(options.logger)
        const callbacks = await additionalCallbacks(nodeData, options)
        const moderations = nodeData.inputs?.inputModeration as Moderation[]
@ -94,7 +97,7 @@ class OpenApiChain_Chains implements INode {
    }
 }

-const initChain = async (nodeData: INodeData) => {
+const initChain = async (nodeData: INodeData, options: ICommonObject) => {
    const model = nodeData.inputs?.model as ChatOpenAI
    const headers = nodeData.inputs?.headers as string
    const yamlLink = nodeData.inputs?.yamlLink as string
@ -105,10 +108,18 @@ const initChain = async (nodeData: INodeData) => {
    if (yamlLink) {
        yamlString = yamlLink
    } else {
-        const splitDataURI = yamlFileBase64.split(',')
-        splitDataURI.pop()
-        const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
-        yamlString = bf.toString('utf-8')
+        if (yamlFileBase64.startsWith('FILE-STORAGE::')) {
+            const file = yamlFileBase64.replace('FILE-STORAGE::', '')
+            const chatflowid = options.chatflowid
+            const fileInStorage = path.join(getStoragePath(), chatflowid, file)
+            const fileData = fs.readFileSync(fileInStorage)
+            yamlString = fileData.toString()
+        } else {
+            const splitDataURI = yamlFileBase64.split(',')
+            splitDataURI.pop()
+            const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
+            yamlString = bf.toString('utf-8')
+        }
    }

    return await createOpenAPIChain(yamlString, {
--- a/packages/components/nodes/documentloaders/Csv/Csv.ts
+++ b/packages/components/nodes/documentloaders/Csv/Csv.ts
@ -1,6 +1,9 @@
-import { INode, INodeData, INodeParams } from '../../../src/Interface'
+import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
 import { TextSplitter } from 'langchain/text_splitter'
 import { CSVLoader } from 'langchain/document_loaders/fs/csv'
+import path from 'path'
+import { getStoragePath } from '../../../src'
+import fs from 'fs'

 class Csv_DocumentLoaders implements INode {
    label: string
@ -53,7 +56,7 @@ class Csv_DocumentLoaders implements INode {
        ]
    }

-    async init(nodeData: INodeData): Promise<any> {
+    async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
        const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
        const csvFileBase64 = nodeData.inputs?.csvFile as string
        const columnName = nodeData.inputs?.columnName as string
@ -62,25 +65,50 @@ class Csv_DocumentLoaders implements INode {
        let alldocs = []
        let files: string[] = []

-        if (csvFileBase64.startsWith('[') && csvFileBase64.endsWith(']')) {
-            files = JSON.parse(csvFileBase64)
-        } else {
-            files = [csvFileBase64]
-        }
-
-        for (const file of files) {
-            const splitDataURI = file.split(',')
-            splitDataURI.pop()
-            const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
-            const blob = new Blob([bf])
-            const loader = new CSVLoader(blob, columnName.trim().length === 0 ? undefined : columnName.trim())
-
-            if (textSplitter) {
-                const docs = await loader.loadAndSplit(textSplitter)
-                alldocs.push(...docs)
+        if (csvFileBase64.startsWith('FILE-STORAGE::')) {
+            const fileName = csvFileBase64.replace('FILE-STORAGE::', '')
+            if (fileName.startsWith('[') && fileName.endsWith(']')) {
+                files = JSON.parse(fileName)
            } else {
-                const docs = await loader.load()
-                alldocs.push(...docs)
+                files = [fileName]
+            }
+            const chatflowid = options.chatflowid
+
+            for (const file of files) {
+                const fileInStorage = path.join(getStoragePath(), chatflowid, file)
+                const fileData = fs.readFileSync(fileInStorage)
+                const blob = new Blob([fileData])
+                const loader = new CSVLoader(blob, columnName.trim().length === 0 ? undefined : columnName.trim())
+
+                if (textSplitter) {
+                    const docs = await loader.loadAndSplit(textSplitter)
+                    alldocs.push(...docs)
+                } else {
+                    const docs = await loader.load()
+                    alldocs.push(...docs)
+                }
+            }
+        } else {
+            if (csvFileBase64.startsWith('[') && csvFileBase64.endsWith(']')) {
+                files = JSON.parse(csvFileBase64)
+            } else {
+                files = [csvFileBase64]
+            }
+
+            for (const file of files) {
+                const splitDataURI = file.split(',')
+                splitDataURI.pop()
+                const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
+                const blob = new Blob([bf])
+                const loader = new CSVLoader(blob, columnName.trim().length === 0 ? undefined : columnName.trim())
+
+                if (textSplitter) {
+                    const docs = await loader.loadAndSplit(textSplitter)
+                    alldocs.push(...docs)
+                } else {
+                    const docs = await loader.load()
+                    alldocs.push(...docs)
+                }
            }
        }

--- a/packages/components/nodes/documentloaders/Docx/Docx.ts
+++ b/packages/components/nodes/documentloaders/Docx/Docx.ts
@ -1,6 +1,9 @@
-import { INode, INodeData, INodeParams } from '../../../src/Interface'
+import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
 import { TextSplitter } from 'langchain/text_splitter'
 import { DocxLoader } from 'langchain/document_loaders/fs/docx'
+import path from 'path'
+import { getStoragePath } from '../../../src'
+import fs from 'fs'

 class Docx_DocumentLoaders implements INode {
    label: string
@ -45,7 +48,7 @@ class Docx_DocumentLoaders implements INode {
        ]
    }

-    async init(nodeData: INodeData): Promise<any> {
+    async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
        const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
        const docxFileBase64 = nodeData.inputs?.docxFile as string
        const metadata = nodeData.inputs?.metadata
@ -53,25 +56,50 @@ class Docx_DocumentLoaders implements INode {
        let alldocs = []
        let files: string[] = []

-        if (docxFileBase64.startsWith('[') && docxFileBase64.endsWith(']')) {
-            files = JSON.parse(docxFileBase64)
-        } else {
-            files = [docxFileBase64]
-        }
-
-        for (const file of files) {
-            const splitDataURI = file.split(',')
-            splitDataURI.pop()
-            const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
-            const blob = new Blob([bf])
-            const loader = new DocxLoader(blob)
-
-            if (textSplitter) {
-                const docs = await loader.loadAndSplit(textSplitter)
-                alldocs.push(...docs)
+        if (docxFileBase64.startsWith('FILE-STORAGE::')) {
+            const fileName = docxFileBase64.replace('FILE-STORAGE::', '')
+            if (fileName.startsWith('[') && fileName.endsWith(']')) {
+                files = JSON.parse(fileName)
            } else {
-                const docs = await loader.load()
-                alldocs.push(...docs)
+                files = [fileName]
+            }
+            const chatflowid = options.chatflowid
+
+            for (const file of files) {
+                const fileInStorage = path.join(getStoragePath(), chatflowid, file)
+                const fileData = fs.readFileSync(fileInStorage)
+                const blob = new Blob([fileData])
+                const loader = new DocxLoader(blob)
+
+                if (textSplitter) {
+                    const docs = await loader.loadAndSplit(textSplitter)
+                    alldocs.push(...docs)
+                } else {
+                    const docs = await loader.load()
+                    alldocs.push(...docs)
+                }
+            }
+        } else {
+            if (docxFileBase64.startsWith('[') && docxFileBase64.endsWith(']')) {
+                files = JSON.parse(docxFileBase64)
+            } else {
+                files = [docxFileBase64]
+            }
+
+            for (const file of files) {
+                const splitDataURI = file.split(',')
+                splitDataURI.pop()
+                const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
+                const blob = new Blob([bf])
+                const loader = new DocxLoader(blob)
+
+                if (textSplitter) {
+                    const docs = await loader.loadAndSplit(textSplitter)
+                    alldocs.push(...docs)
+                } else {
+                    const docs = await loader.load()
+                    alldocs.push(...docs)
+                }
            }
        }

--- a/packages/components/nodes/documentloaders/Json/Json.ts
+++ b/packages/components/nodes/documentloaders/Json/Json.ts
@ -1,6 +1,9 @@
-import { INode, INodeData, INodeParams } from '../../../src/Interface'
+import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
 import { TextSplitter } from 'langchain/text_splitter'
 import { JSONLoader } from 'langchain/document_loaders/fs/json'
+import { getStoragePath } from '../../../src'
+import fs from 'fs'
+import path from 'path'

 class Json_DocumentLoaders implements INode {
    label: string
@ -53,7 +56,7 @@ class Json_DocumentLoaders implements INode {
        ]
    }

-    async init(nodeData: INodeData): Promise<any> {
+    async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
        const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
        const jsonFileBase64 = nodeData.inputs?.jsonFile as string
        const pointersName = nodeData.inputs?.pointersName as string
@ -68,25 +71,51 @@ class Json_DocumentLoaders implements INode {
        let alldocs = []
        let files: string[] = []

-        if (jsonFileBase64.startsWith('[') && jsonFileBase64.endsWith(']')) {
-            files = JSON.parse(jsonFileBase64)
-        } else {
-            files = [jsonFileBase64]
-        }
-
-        for (const file of files) {
-            const splitDataURI = file.split(',')
-            splitDataURI.pop()
-            const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
-            const blob = new Blob([bf])
-            const loader = new JSONLoader(blob, pointers.length != 0 ? pointers : undefined)
-
-            if (textSplitter) {
-                const docs = await loader.loadAndSplit(textSplitter)
-                alldocs.push(...docs)
+        //FILE-STORAGE::["CONTRIBUTING.md","LICENSE.md","README.md"]
+        if (jsonFileBase64.startsWith('FILE-STORAGE::')) {
+            const fileName = jsonFileBase64.replace('FILE-STORAGE::', '')
+            if (fileName.startsWith('[') && fileName.endsWith(']')) {
+                files = JSON.parse(fileName)
            } else {
-                const docs = await loader.load()
-                alldocs.push(...docs)
+                files = [fileName]
+            }
+            const chatflowid = options.chatflowid
+
+            for (const file of files) {
+                const fileInStorage = path.join(getStoragePath(), chatflowid, file)
+                const fileData = fs.readFileSync(fileInStorage)
+                const blob = new Blob([fileData])
+                const loader = new JSONLoader(blob, pointers.length != 0 ? pointers : undefined)
+
+                if (textSplitter) {
+                    const docs = await loader.loadAndSplit(textSplitter)
+                    alldocs.push(...docs)
+                } else {
+                    const docs = await loader.load()
+                    alldocs.push(...docs)
+                }
+            }
+        } else {
+            if (jsonFileBase64.startsWith('[') && jsonFileBase64.endsWith(']')) {
+                files = JSON.parse(jsonFileBase64)
+            } else {
+                files = [jsonFileBase64]
+            }
+
+            for (const file of files) {
+                const splitDataURI = file.split(',')
+                splitDataURI.pop()
+                const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
+                const blob = new Blob([bf])
+                const loader = new JSONLoader(blob, pointers.length != 0 ? pointers : undefined)
+
+                if (textSplitter) {
+                    const docs = await loader.loadAndSplit(textSplitter)
+                    alldocs.push(...docs)
+                } else {
+                    const docs = await loader.load()
+                    alldocs.push(...docs)
+                }
            }
        }

--- a/packages/components/nodes/documentloaders/Jsonlines/Jsonlines.ts
+++ b/packages/components/nodes/documentloaders/Jsonlines/Jsonlines.ts
@ -1,6 +1,9 @@
-import { INode, INodeData, INodeParams } from '../../../src/Interface'
+import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
 import { TextSplitter } from 'langchain/text_splitter'
 import { JSONLinesLoader } from 'langchain/document_loaders/fs/json'
+import { getStoragePath } from '../../../src'
+import fs from 'fs'
+import path from 'path'

 class Jsonlines_DocumentLoaders implements INode {
    label: string
@ -52,7 +55,7 @@ class Jsonlines_DocumentLoaders implements INode {
        ]
    }

-    async init(nodeData: INodeData): Promise<any> {
+    async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
        const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
        const jsonLinesFileBase64 = nodeData.inputs?.jsonlinesFile as string
        const pointerName = nodeData.inputs?.pointerName as string
@ -62,26 +65,51 @@ class Jsonlines_DocumentLoaders implements INode {
        let files: string[] = []

        let pointer = '/' + pointerName.trim()
-
-        if (jsonLinesFileBase64.startsWith('[') && jsonLinesFileBase64.endsWith(']')) {
-            files = JSON.parse(jsonLinesFileBase64)
-        } else {
-            files = [jsonLinesFileBase64]
-        }
-
-        for (const file of files) {
-            const splitDataURI = file.split(',')
-            splitDataURI.pop()
-            const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
-            const blob = new Blob([bf])
-            const loader = new JSONLinesLoader(blob, pointer)
-
-            if (textSplitter) {
-                const docs = await loader.loadAndSplit(textSplitter)
-                alldocs.push(...docs)
+        //FILE-STORAGE::["CONTRIBUTING.md","LICENSE.md","README.md"]
+        if (jsonLinesFileBase64.startsWith('FILE-STORAGE::')) {
+            const fileName = jsonLinesFileBase64.replace('FILE-STORAGE::', '')
+            if (fileName.startsWith('[') && fileName.endsWith(']')) {
+                files = JSON.parse(fileName)
            } else {
-                const docs = await loader.load()
-                alldocs.push(...docs)
+                files = [fileName]
+            }
+            const chatflowid = options.chatflowid
+
+            for (const file of files) {
+                const fileInStorage = path.join(getStoragePath(), chatflowid, file)
+                const fileData = fs.readFileSync(fileInStorage)
+                const blob = new Blob([fileData])
+                const loader = new JSONLinesLoader(blob, pointer)
+
+                if (textSplitter) {
+                    const docs = await loader.loadAndSplit(textSplitter)
+                    alldocs.push(...docs)
+                } else {
+                    const docs = await loader.load()
+                    alldocs.push(...docs)
+                }
+            }
+        } else {
+            if (jsonLinesFileBase64.startsWith('[') && jsonLinesFileBase64.endsWith(']')) {
+                files = JSON.parse(jsonLinesFileBase64)
+            } else {
+                files = [jsonLinesFileBase64]
+            }
+
+            for (const file of files) {
+                const splitDataURI = file.split(',')
+                splitDataURI.pop()
+                const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
+                const blob = new Blob([bf])
+                const loader = new JSONLinesLoader(blob, pointer)
+
+                if (textSplitter) {
+                    const docs = await loader.loadAndSplit(textSplitter)
+                    alldocs.push(...docs)
+                } else {
+                    const docs = await loader.load()
+                    alldocs.push(...docs)
+                }
            }
        }

--- a/packages/components/nodes/documentloaders/Pdf/Pdf.ts
+++ b/packages/components/nodes/documentloaders/Pdf/Pdf.ts
@ -1,6 +1,9 @@
-import { INode, INodeData, INodeParams } from '../../../src/Interface'
+import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
 import { TextSplitter } from 'langchain/text_splitter'
 import { PDFLoader } from 'langchain/document_loaders/fs/pdf'
+import { getStoragePath } from '../../../src'
+import fs from 'fs'
+import path from 'path'

 class Pdf_DocumentLoaders implements INode {
    label: string
@ -68,53 +71,44 @@ class Pdf_DocumentLoaders implements INode {
        ]
    }

-    async init(nodeData: INodeData): Promise<any> {
+    async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
        const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
        const pdfFileBase64 = nodeData.inputs?.pdfFile as string
        const usage = nodeData.inputs?.usage as string
        const metadata = nodeData.inputs?.metadata
        const legacyBuild = nodeData.inputs?.legacyBuild as boolean

-        let alldocs = []
+        let alldocs: any[] = []
        let files: string[] = []

-        if (pdfFileBase64.startsWith('[') && pdfFileBase64.endsWith(']')) {
-            files = JSON.parse(pdfFileBase64)
-        } else {
-            files = [pdfFileBase64]
-        }
-
-        for (const file of files) {
-            const splitDataURI = file.split(',')
-            splitDataURI.pop()
-            const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
-            if (usage === 'perFile') {
-                const loader = new PDFLoader(new Blob([bf]), {
-                    splitPages: false,
-                    pdfjs: () =>
-                        // @ts-ignore
-                        legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
-                })
-                if (textSplitter) {
-                    const docs = await loader.loadAndSplit(textSplitter)
-                    alldocs.push(...docs)
-                } else {
-                    const docs = await loader.load()
-                    alldocs.push(...docs)
-                }
+        //FILE-STORAGE::["CONTRIBUTING.md","LICENSE.md","README.md"]
+        if (pdfFileBase64.startsWith('FILE-STORAGE::')) {
+            const fileName = pdfFileBase64.replace('FILE-STORAGE::', '')
+            if (fileName.startsWith('[') && fileName.endsWith(']')) {
+                files = JSON.parse(fileName)
            } else {
-                const loader = new PDFLoader(new Blob([bf]), {
-                    pdfjs: () =>
-                        // @ts-ignore
-                        legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
-                })
-                if (textSplitter) {
-                    const docs = await loader.loadAndSplit(textSplitter)
-                    alldocs.push(...docs)
-                } else {
-                    const docs = await loader.load()
-                    alldocs.push(...docs)
-                }
+                files = [fileName]
+            }
+            const chatflowid = options.chatflowid
+
+            for (const file of files) {
+                const fileInStorage = path.join(getStoragePath(), chatflowid, file)
+                const fileData = fs.readFileSync(fileInStorage)
+                const bf = Buffer.from(fileData)
+                await this.extractDocs(usage, bf, legacyBuild, textSplitter, alldocs)
+            }
+        } else {
+            if (pdfFileBase64.startsWith('[') && pdfFileBase64.endsWith(']')) {
+                files = JSON.parse(pdfFileBase64)
+            } else {
+                files = [pdfFileBase64]
+            }
+
+            for (const file of files) {
+                const splitDataURI = file.split(',')
+                splitDataURI.pop()
+                const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
+                await this.extractDocs(usage, bf, legacyBuild, textSplitter, alldocs)
            }
        }

@ -136,6 +130,37 @@ class Pdf_DocumentLoaders implements INode {

        return alldocs
    }
+
+    private async extractDocs(usage: string, bf: Buffer, legacyBuild: boolean, textSplitter: TextSplitter, alldocs: any[]) {
+        if (usage === 'perFile') {
+            const loader = new PDFLoader(new Blob([bf]), {
+                splitPages: false,
+                pdfjs: () =>
+                    // @ts-ignore
+                    legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
+            })
+            if (textSplitter) {
+                const docs = await loader.loadAndSplit(textSplitter)
+                alldocs.push(...docs)
+            } else {
+                const docs = await loader.load()
+                alldocs.push(...docs)
+            }
+        } else {
+            const loader = new PDFLoader(new Blob([bf]), {
+                pdfjs: () =>
+                    // @ts-ignore
+                    legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
+            })
+            if (textSplitter) {
+                const docs = await loader.loadAndSplit(textSplitter)
+                alldocs.push(...docs)
+            } else {
+                const docs = await loader.load()
+                alldocs.push(...docs)
+            }
+        }
+    }
 }

 module.exports = { nodeClass: Pdf_DocumentLoaders }
--- a/packages/components/nodes/documentloaders/Text/Text.ts
+++ b/packages/components/nodes/documentloaders/Text/Text.ts
@ -1,8 +1,10 @@
-import { INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
+import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
 import { TextSplitter } from 'langchain/text_splitter'
 import { TextLoader } from 'langchain/document_loaders/fs/text'
 import { Document } from '@langchain/core/documents'
-import { handleEscapeCharacters } from '../../../src'
+import { getStoragePath, handleEscapeCharacters } from '../../../src'
+import fs from 'fs'
+import path from 'path'

 class Text_DocumentLoaders implements INode {
    label: string
@ -63,7 +65,7 @@ class Text_DocumentLoaders implements INode {
        ]
    }

-    async init(nodeData: INodeData): Promise<any> {
+    async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
        const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
        const txtFileBase64 = nodeData.inputs?.txtFile as string
        const metadata = nodeData.inputs?.metadata
@ -72,25 +74,51 @@ class Text_DocumentLoaders implements INode {
        let alldocs = []
        let files: string[] = []

-        if (txtFileBase64.startsWith('[') && txtFileBase64.endsWith(']')) {
-            files = JSON.parse(txtFileBase64)
-        } else {
-            files = [txtFileBase64]
-        }
-
-        for (const file of files) {
-            const splitDataURI = file.split(',')
-            splitDataURI.pop()
-            const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
-            const blob = new Blob([bf])
-            const loader = new TextLoader(blob)
-
-            if (textSplitter) {
-                const docs = await loader.loadAndSplit(textSplitter)
-                alldocs.push(...docs)
+        //FILE-STORAGE::["CONTRIBUTING.md","LICENSE.md","README.md"]
+        if (txtFileBase64.startsWith('FILE-STORAGE::')) {
+            const fileName = txtFileBase64.replace('FILE-STORAGE::', '')
+            if (fileName.startsWith('[') && fileName.endsWith(']')) {
+                files = JSON.parse(fileName)
            } else {
-                const docs = await loader.load()
-                alldocs.push(...docs)
+                files = [fileName]
+            }
+            const chatflowid = options.chatflowid
+
+            for (const file of files) {
+                const fileInStorage = path.join(getStoragePath(), chatflowid, file)
+                const fileData = fs.readFileSync(fileInStorage)
+                const blob = new Blob([fileData])
+                const loader = new TextLoader(blob)
+
+                if (textSplitter) {
+                    const docs = await loader.loadAndSplit(textSplitter)
+                    alldocs.push(...docs)
+                } else {
+                    const docs = await loader.load()
+                    alldocs.push(...docs)
+                }
+            }
+        } else {
+            if (txtFileBase64.startsWith('[') && txtFileBase64.endsWith(']')) {
+                files = JSON.parse(txtFileBase64)
+            } else {
+                files = [txtFileBase64]
+            }
+
+            for (const file of files) {
+                const splitDataURI = file.split(',')
+                splitDataURI.pop()
+                const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
+                const blob = new Blob([bf])
+                const loader = new TextLoader(blob)
+
+                if (textSplitter) {
+                    const docs = await loader.loadAndSplit(textSplitter)
+                    alldocs.push(...docs)
+                } else {
+                    const docs = await loader.load()
+                    alldocs.push(...docs)
+                }
            }
        }

--- a/packages/components/nodes/tools/OpenAPIToolkit/OpenAPIToolkit.ts
+++ b/packages/components/nodes/tools/OpenAPIToolkit/OpenAPIToolkit.ts
@ -4,6 +4,9 @@ import { OpenApiToolkit } from 'langchain/agents'
 import { JsonSpec, JsonObject } from './core'
 import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
 import { getCredentialData, getCredentialParam } from '../../../src'
+import { getStoragePath } from '../../../src'
+import fs from 'fs'
+import path from 'path'

 class OpenAPIToolkit_Tools implements INode {
    label: string
@ -56,11 +59,21 @@ class OpenAPIToolkit_Tools implements INode {
        const credentialData = await getCredentialData(nodeData.credential ?? '', options)
        const openAPIToken = getCredentialParam('openAPIToken', credentialData, nodeData)

-        const splitDataURI = yamlFileBase64.split(',')
-        splitDataURI.pop()
-        const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
-        const utf8String = bf.toString('utf-8')
-        const data = load(utf8String) as JsonObject
+        let data: JsonObject
+        if (yamlFileBase64.startsWith('FILE-STORAGE::')) {
+            const file = yamlFileBase64.replace('FILE-STORAGE::', '')
+            const chatflowid = options.chatflowid
+            const fileInStorage = path.join(getStoragePath(), chatflowid, file)
+            const fileData = fs.readFileSync(fileInStorage)
+            const utf8String = fileData.toString('utf-8')
+            data = load(utf8String) as JsonObject
+        } else {
+            const splitDataURI = yamlFileBase64.split(',')
+            splitDataURI.pop()
+            const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
+            const utf8String = bf.toString('utf-8')
+            data = load(utf8String) as JsonObject
+        }
        if (!data) {
            throw new Error('Failed to load OpenAPI spec')
        }
--- a/packages/components/nodes/vectorstores/Vectara/Vectara.ts
+++ b/packages/components/nodes/vectorstores/Vectara/Vectara.ts
@ -11,6 +11,9 @@ import { Document } from '@langchain/core/documents'
 import { Embeddings } from '@langchain/core/embeddings'
 import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
 import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
+import { getStoragePath } from '../../../src'
+import fs from 'fs'
+import path from 'path'

 class Vectara_VectorStores implements INode {
    label: string
@ -182,20 +185,37 @@ class Vectara_VectorStores implements INode {
                }
            }

-            let files: string[] = []
-            if (fileBase64.startsWith('[') && fileBase64.endsWith(']')) {
-                files = JSON.parse(fileBase64)
-            } else {
-                files = [fileBase64]
-            }
-
            const vectaraFiles: VectaraFile[] = []
-            for (const file of files) {
-                const splitDataURI = file.split(',')
-                splitDataURI.pop()
-                const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
-                const blob = new Blob([bf])
-                vectaraFiles.push({ blob: blob, fileName: getFileName(file) })
+            let files: string[] = []
+            if (fileBase64.startsWith('FILE-STORAGE::')) {
+                const fileName = fileBase64.replace('FILE-STORAGE::', '')
+                if (fileName.startsWith('[') && fileName.endsWith(']')) {
+                    files = JSON.parse(fileName)
+                } else {
+                    files = [fileName]
+                }
+                const chatflowid = options.chatflowid
+
+                for (const file of files) {
+                    const fileInStorage = path.join(getStoragePath(), chatflowid, file)
+                    const fileData = fs.readFileSync(fileInStorage)
+                    const blob = new Blob([fileData])
+                    vectaraFiles.push({ blob: blob, fileName: getFileName(file) })
+                }
+            } else {
+                if (fileBase64.startsWith('[') && fileBase64.endsWith(']')) {
+                    files = JSON.parse(fileBase64)
+                } else {
+                    files = [fileBase64]
+                }
+
+                for (const file of files) {
+                    const splitDataURI = file.split(',')
+                    splitDataURI.pop()
+                    const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
+                    const blob = new Blob([bf])
+                    vectaraFiles.push({ blob: blob, fileName: getFileName(file) })
+                }
            }

            try {
--- a/packages/components/nodes/vectorstores/Vectara/Vectara_Upload.ts
+++ b/packages/components/nodes/vectorstores/Vectara/Vectara_Upload.ts
@ -1,6 +1,9 @@
 import { VectaraStore, VectaraLibArgs, VectaraFilter, VectaraContextConfig, VectaraFile } from '@langchain/community/vectorstores/vectara'
 import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
 import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
+import path from 'path'
+import { getStoragePath } from '../../../src'
+import fs from 'fs'

 class VectaraUpload_VectorStores implements INode {
    label: string
@ -129,20 +132,37 @@ class VectaraUpload_VectorStores implements INode {
        vectaraFilter.contextConfig = vectaraContextConfig

        let files: string[] = []
-
-        if (fileBase64.startsWith('[') && fileBase64.endsWith(']')) {
-            files = JSON.parse(fileBase64)
-        } else {
-            files = [fileBase64]
-        }
-
        const vectaraFiles: VectaraFile[] = []
-        for (const file of files) {
-            const splitDataURI = file.split(',')
-            splitDataURI.pop()
-            const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
-            const blob = new Blob([bf])
-            vectaraFiles.push({ blob: blob, fileName: getFileName(file) })
+
+        if (fileBase64.startsWith('FILE-STORAGE::')) {
+            const fileName = fileBase64.replace('FILE-STORAGE::', '')
+            if (fileName.startsWith('[') && fileName.endsWith(']')) {
+                files = JSON.parse(fileName)
+            } else {
+                files = [fileName]
+            }
+            const chatflowid = options.chatflowid
+
+            for (const file of files) {
+                const fileInStorage = path.join(getStoragePath(), chatflowid, file)
+                const fileData = fs.readFileSync(fileInStorage)
+                const blob = new Blob([fileData])
+                vectaraFiles.push({ blob: blob, fileName: getFileName(file) })
+            }
+        } else {
+            if (fileBase64.startsWith('[') && fileBase64.endsWith(']')) {
+                files = JSON.parse(fileBase64)
+            } else {
+                files = [fileBase64]
+            }
+
+            for (const file of files) {
+                const splitDataURI = file.split(',')
+                splitDataURI.pop()
+                const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
+                const blob = new Blob([bf])
+                vectaraFiles.push({ blob: blob, fileName: getFileName(file) })
+            }
        }

        const vectorStore = new VectaraStore(vectaraArgs)
--- a/packages/server/.env.example
+++ b/packages/server/.env.example
@ -5,7 +5,7 @@ PORT=3000
 # APIKEY_PATH=/your_api_key_path/.flowise
 # SECRETKEY_PATH=/your_api_key_path/.flowise
 # LOG_PATH=/your_log_path/.flowise/logs
-# BLOB_STORAGE_PATH=/your_database_path/.flowise/storage
+# BLOB_STORAGE_PATH=/your_storage_path/.flowise/storage

 # NUMBER_OF_PROXIES= 1

--- a/packages/server/src/services/chatflows/index.ts
+++ b/packages/server/src/services/chatflows/index.ts
@ -17,6 +17,7 @@ import { utilGetUploadsConfig } from '../../utils/getUploadsConfig'
 import { ChatMessage } from '../../database/entities/ChatMessage'
 import { ChatMessageFeedback } from '../../database/entities/ChatMessageFeedback'
 import { UpsertHistory } from '../../database/entities/UpsertHistory'
+import { containsBase64File, updateFlowDataWithFilePaths } from '../../utils/fileRepository'

 // Check if chatflow valid for streaming
 const checkIfChatflowIsValidForStreaming = async (chatflowId: string): Promise<any> => {
@ -184,8 +185,24 @@ const getChatflowById = async (chatflowId: string): Promise<any> => {
 const saveChatflow = async (newChatFlow: ChatFlow): Promise<any> => {
    try {
        const appServer = getRunningExpressApp()
-        const newDbChatflow = await appServer.AppDataSource.getRepository(ChatFlow).create(newChatFlow)
-        const dbResponse = await appServer.AppDataSource.getRepository(ChatFlow).save(newDbChatflow)
+        let dbResponse: ChatFlow
+        if (containsBase64File(newChatFlow)) {
+            // we need a 2-step process, as we need to save the chatflow first and then update the file paths
+            // this is because we need the chatflow id to create the file paths
+
+            // step 1 - save with empty flowData
+            const incomingFlowData = newChatFlow.flowData
+            newChatFlow.flowData = JSON.stringify({})
+            const chatflow = appServer.AppDataSource.getRepository(ChatFlow).create(newChatFlow)
+            const step1Results = await appServer.AppDataSource.getRepository(ChatFlow).save(chatflow)
+
+            // step 2 - convert base64 to file paths and update the chatflow
+            step1Results.flowData = updateFlowDataWithFilePaths(step1Results.id, incomingFlowData)
+            dbResponse = await appServer.AppDataSource.getRepository(ChatFlow).save(step1Results)
+        } else {
+            const chatflow = appServer.AppDataSource.getRepository(ChatFlow).create(newChatFlow)
+            dbResponse = await appServer.AppDataSource.getRepository(ChatFlow).save(chatflow)
+        }
        await appServer.telemetry.sendTelemetry('chatflow_created', {
            version: await getAppVersion(),
            chatflowId: dbResponse.id,
@ -200,6 +217,9 @@ const saveChatflow = async (newChatFlow: ChatFlow): Promise<any> => {
 const updateChatflow = async (chatflow: ChatFlow, updateChatFlow: ChatFlow): Promise<any> => {
    try {
        const appServer = getRunningExpressApp()
+        if (containsBase64File(updateChatFlow)) {
+            updateChatFlow.flowData = updateFlowDataWithFilePaths(chatflow.id, updateChatFlow.flowData)
+        }
        const newDbChatflow = await appServer.AppDataSource.getRepository(ChatFlow).merge(chatflow, updateChatFlow)
        const dbResponse = await appServer.AppDataSource.getRepository(ChatFlow).save(newDbChatflow)
        // chatFlowPool is initialized only when a flow is opened
--- a/packages/server/src/utils/fileRepository.ts
+++ b/packages/server/src/utils/fileRepository.ts
@ -0,0 +1,113 @@
+import { ChatFlow } from '../database/entities/ChatFlow'
+import path from 'path'
+import { getStoragePath } from 'flowise-components'
+import fs from 'fs'
+import { IReactFlowObject } from '../Interface'
+
+export const containsBase64File = (chatflow: ChatFlow) => {
+    const parsedFlowData: IReactFlowObject = JSON.parse(chatflow.flowData)
+    const re = new RegExp('^data.*;base64', 'i')
+    let found = false
+    const nodes = parsedFlowData.nodes
+    for (const node of nodes) {
+        if (node.data.category !== 'Document Loaders') {
+            continue
+        }
+        const inputs = node.data.inputs
+        if (inputs) {
+            const keys = Object.getOwnPropertyNames(inputs)
+            for (let i = 0; i < keys.length; i++) {
+                const input = inputs[keys[i]]
+                if (!input) {
+                    continue
+                }
+                if (typeof input !== 'string') {
+                    continue
+                }
+                if (input.startsWith('[')) {
+                    try {
+                        const files = JSON.parse(input)
+                        for (let j = 0; j < files.length; j++) {
+                            const file = files[j]
+                            if (re.test(file)) {
+                                found = true
+                                break
+                            }
+                        }
+                    } catch (e) {
+                        continue
+                    }
+                }
+                if (re.test(input)) {
+                    found = true
+                    break
+                }
+            }
+        }
+    }
+    return found
+}
+
+function addFileToStorage(file: string, chatflowid: string, fileNames: string[]) {
+    const dir = path.join(getStoragePath(), chatflowid)
+    if (!fs.existsSync(dir)) {
+        fs.mkdirSync(dir, { recursive: true })
+    }
+
+    const splitDataURI = file.split(',')
+    const filename = splitDataURI.pop()?.split(':')[1] ?? ''
+    const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
+
+    const filePath = path.join(dir, filename)
+    fs.writeFileSync(filePath, bf)
+    fileNames.push(filename)
+    return 'FILE-STORAGE::' + JSON.stringify(fileNames)
+}
+
+export const updateFlowDataWithFilePaths = (chatflowid: string, flowData: string) => {
+    try {
+        const parsedFlowData: IReactFlowObject = JSON.parse(flowData)
+        const re = new RegExp('^data.*;base64', 'i')
+        const nodes = parsedFlowData.nodes
+        for (let j = 0; j < nodes.length; j++) {
+            const node = nodes[j]
+            if (node.data.category !== 'Document Loaders') {
+                continue
+            }
+            if (node.data.inputs) {
+                const inputs = node.data.inputs
+                const keys = Object.getOwnPropertyNames(inputs)
+                for (let i = 0; i < keys.length; i++) {
+                    const fileNames: string[] = []
+                    const key = keys[i]
+                    const input = inputs?.[key]
+                    if (!input) {
+                        continue
+                    }
+                    if (typeof input !== 'string') {
+                        continue
+                    }
+                    if (input.startsWith('[')) {
+                        try {
+                            const files = JSON.parse(input)
+                            for (let j = 0; j < files.length; j++) {
+                                const file = files[j]
+                                if (re.test(file)) {
+                                    node.data.inputs[key] = addFileToStorage(file, chatflowid, fileNames)
+                                }
+                            }
+                        } catch (e) {
+                            continue
+                        }
+                    } else if (re.test(input)) {
+                        node.data.inputs[key] = addFileToStorage(input, chatflowid, fileNames)
+                    }
+                }
+            }
+        }
+
+        return JSON.stringify(parsedFlowData)
+    } catch (e) {
+        return ''
+    }
+}
--- a/packages/server/src/utils/index.ts
+++ b/packages/server/src/utils/index.ts
@ -243,6 +243,15 @@ export const getEndingNodes = (nodeDependencies: INodeDependencies, graph: INode
 */
 export const getFileName = (fileBase64: string): string => {
    let fileNames = []
+    if (fileBase64.startsWith('FILE-STORAGE::')) {
+        const names = fileBase64.substring(14)
+        if (names.includes('[') && names.includes(']')) {
+            const files = JSON.parse(names)
+            return files.join(', ')
+        } else {
+            return fileBase64.substring(14)
+        }
+    }
    if (fileBase64.startsWith('[') && fileBase64.endsWith(']')) {
        const files = JSON.parse(fileBase64)
        for (const file of files) {
--- a/packages/ui/src/utils/genericHelper.js
+++ b/packages/ui/src/utils/genericHelper.js
@ -234,6 +234,15 @@ export const convertDateStringToDateObject = (dateString) => {

 export const getFileName = (fileBase64) => {
    let fileNames = []
+    if (fileBase64.startsWith('FILE-STORAGE::')) {
+        const names = fileBase64.substring(14)
+        if (names.includes('[') && names.includes(']')) {
+            const files = JSON.parse(names)
+            return files.join(', ')
+        } else {
+            return fileBase64.substring(14)
+        }
+    }
    if (fileBase64.startsWith('[') && fileBase64.endsWith(']')) {
        const files = JSON.parse(fileBase64)
        for (const file of files) {