update Notion Loader

feature/Notion
Henry 2023-07-21 17:27:39 +01:00
parent 6de237ab5f
commit b5b9b9c8b0
6 changed files with 114 additions and 22 deletions

View File

@ -1,6 +1,6 @@
import { INode, INodeData, INodeParams } from '../../../src/Interface'
import { TextSplitter } from 'langchain/text_splitter'
import { NotionDBLoader, NotionDBLoaderParams } from 'langchain/document_loaders/web/notiondb'
import { NotionAPILoader, NotionAPILoaderOptions } from 'langchain/document_loaders/web/notionapi'
class NotionDB_DocumentLoaders implements INode {
label: string
@ -18,7 +18,7 @@ class NotionDB_DocumentLoaders implements INode {
this.type = 'Document'
this.icon = 'notion.png'
this.category = 'Document Loaders'
this.description = 'Load data from Notion Database ID'
this.description = 'Load data from Notion Database (each row is a separate document with all properties as metadata)'
this.baseClasses = [this.type]
this.inputs = [
{
@ -27,13 +27,6 @@ class NotionDB_DocumentLoaders implements INode {
type: 'TextSplitter',
optional: true
},
{
label: 'Notion Database Id',
name: 'databaseId',
type: 'string',
description:
'If your URL looks like - https://www.notion.so/<long_hash_1>?v=<long_hash_2>, then <long_hash_1> is the database ID'
},
{
label: 'Notion Integration Token',
name: 'notionIntegrationToken',
@ -42,10 +35,10 @@ class NotionDB_DocumentLoaders implements INode {
'You can find integration token <a target="_blank" href="https://developers.notion.com/docs/create-a-notion-integration#step-1-create-an-integration">here</a>'
},
{
label: 'Page Size Limit',
name: 'pageSizeLimit',
type: 'number',
default: 10
label: 'Notion Database Id',
name: 'databaseId',
type: 'string',
description: 'If your URL looks like - https://www.notion.so/abcdefh?v=long_hash_2, then abcdefh is the database ID'
},
{
label: 'Metadata',
@ -60,16 +53,17 @@ class NotionDB_DocumentLoaders implements INode {
async init(nodeData: INodeData): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const databaseId = nodeData.inputs?.databaseId as string
const notionIntegrationToken = nodeData.inputs?.notionIntegrationToken as string
const pageSizeLimit = nodeData.inputs?.pageSizeLimit as string
const metadata = nodeData.inputs?.metadata
const notionIntegrationToken = nodeData.inputs?.notionIntegrationToken as string
const obj: NotionDBLoaderParams = {
pageSizeLimit: pageSizeLimit ? parseInt(pageSizeLimit, 10) : 10,
databaseId,
notionIntegrationToken
const obj: NotionAPILoaderOptions = {
clientOptions: {
auth: notionIntegrationToken
},
id: databaseId,
type: 'database'
}
const loader = new NotionDBLoader(obj)
const loader = new NotionAPILoader(obj)
let docs = []
if (textSplitter) {

View File

@ -0,0 +1,96 @@
import { INode, INodeData, INodeParams } from '../../../src/Interface'
import { TextSplitter } from 'langchain/text_splitter'
import { NotionAPILoader, NotionAPILoaderOptions } from 'langchain/document_loaders/web/notionapi'
class NotionPage_DocumentLoaders implements INode {
label: string
name: string
description: string
type: string
icon: string
category: string
baseClasses: string[]
inputs: INodeParams[]
constructor() {
this.label = 'Notion Page'
this.name = 'notionPage'
this.type = 'Document'
this.icon = 'notion.png'
this.category = 'Document Loaders'
this.description = 'Load data from Notion Page (including child pages all as separate documents)'
this.baseClasses = [this.type]
this.inputs = [
{
label: 'Text Splitter',
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: 'Notion Integration Token',
name: 'notionIntegrationToken',
type: 'password',
description:
'You can find integration token <a target="_blank" href="https://developers.notion.com/docs/create-a-notion-integration#step-1-create-an-integration">here</a>'
},
{
label: 'Notion Page Id',
name: 'pageId',
type: 'string',
description:
'The last The 32 char hex in the url path. For example: https://www.notion.so/skarard/LangChain-Notion-API-b34ca03f219c4420a6046fc4bdfdf7b4, b34ca03f219c4420a6046fc4bdfdf7b4 is the Page ID'
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
async init(nodeData: INodeData): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const pageId = nodeData.inputs?.pageId as string
const metadata = nodeData.inputs?.metadata
const notionIntegrationToken = nodeData.inputs?.notionIntegrationToken as string
const obj: NotionAPILoaderOptions = {
clientOptions: {
auth: notionIntegrationToken
},
id: pageId,
type: 'page'
}
const loader = new NotionAPILoader(obj)
let docs = []
if (textSplitter) {
docs = await loader.loadAndSplit(textSplitter)
} else {
docs = await loader.load()
}
if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}
return docs
}
}
module.exports = { nodeClass: NotionPage_DocumentLoaders }

View File

Before

Width:  |  Height:  |  Size: 11 KiB

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 11 KiB

View File

@ -20,6 +20,7 @@
"@dqbd/tiktoken": "^1.0.7",
"@getzep/zep-js": "^0.4.1",
"@huggingface/inference": "^2.6.1",
"@notionhq/client": "^2.2.8",
"@opensearch-project/opensearch": "^1.2.0",
"@pinecone-database/pinecone": "^0.0.12",
"@qdrant/js-client-rest": "^1.2.2",
@ -41,8 +42,10 @@
"linkifyjs": "^4.1.1",
"mammoth": "^1.5.1",
"moment": "^2.29.3",
"mysql2": "^3.5.1",
"node-fetch": "^2.6.11",
"node-html-markdown": "^1.3.0",
"notion-to-md": "^3.1.1",
"pdf-parse": "^1.1.1",
"pdfjs-dist": "^3.7.107",
"playwright": "^1.35.0",
@ -52,8 +55,7 @@
"srt-parser-2": "^1.2.3",
"vm2": "^3.9.19",
"weaviate-ts-client": "^1.1.0",
"ws": "^8.9.0",
"mysql2": "^3.5.1"
"ws": "^8.9.0"
},
"devDependencies": {
"@types/gulp": "4.0.9",